Skip to content

Commit

Permalink
editor: support table captions (avoid processing them)
Browse files Browse the repository at this point in the history
  • Loading branch information
miltondp committed Jan 1, 2023
1 parent d595fcf commit 3f22b72
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 14 deletions.
21 changes: 21 additions & 0 deletions libs/manubot/ai_editor/editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,14 +168,35 @@ def revise_file(
# Initialize a temporary list to store the lines of the current paragraph
paragraph = []

current_table_paragraph = False

for line in infile:
# if line is starting either an "image paragraph", a "table paragraph" or a "html comment paragraph",
# then skip all lines until the end of that paragraph
if self.line_is_not_part_of_paragraph(line, include_blank=False):
if line.startswith("|"):
current_table_paragraph = True

while line is not None and line.strip() != "":
outfile.write(line)
line = next(infile, None)

# for "table paragraphs", there is a blank line after the table
# and then the next paragraph is the table caption that starts
# with "Table: ". We want to include those lines as part of the
# "table paragraph"
if (
line is not None
and current_table_paragraph
and line.startswith("Table: ")
):
while line is not None and line.strip() != "":
outfile.write(line)
line = next(infile, None)

# we finished processing the "table paragraph"
current_table_paragraph = False

# stop if we reached the end of the file
if line is None:
break
Expand Down
33 changes: 19 additions & 14 deletions tests/test_editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from manubot.ai_editor import env_vars
from manubot.ai_editor.editor import ManuscriptEditor
from manubot.ai_editor.models import DummyManuscriptRevisionModel, GPT3CompletionModel
from manubot.ai_editor.models import GPT3CompletionModel, RandomManuscriptRevisionModel

MANUSCRIPTS_DIR = Path(__file__).parent / "manuscripts"

Expand Down Expand Up @@ -59,8 +59,8 @@ def _check_nonparagraph_lines_are_preserved(input_filepath, output_filepath):
@pytest.mark.parametrize(
"model",
[
DummyManuscriptRevisionModel(),
GPT3CompletionModel(None, None),
RandomManuscriptRevisionModel(),
# GPT3CompletionModel(None, None),
],
)
def test_revise_abstract(tmp_path, model):
Expand All @@ -82,8 +82,8 @@ def test_revise_abstract(tmp_path, model):
@pytest.mark.parametrize(
"model",
[
DummyManuscriptRevisionModel(),
GPT3CompletionModel(None, None),
RandomManuscriptRevisionModel(),
# GPT3CompletionModel(None, None),
],
)
def test_revise_introduction(tmp_path, model):
Expand Down Expand Up @@ -170,8 +170,8 @@ def test_get_section_from_filename_using_environment_variable_is_invalid():
@pytest.mark.parametrize(
"model",
[
DummyManuscriptRevisionModel(),
GPT3CompletionModel(None, None),
RandomManuscriptRevisionModel(),
# GPT3CompletionModel(None, None),
],
)
def test_revise_results_with_header_only(tmp_path, model):
Expand All @@ -193,8 +193,8 @@ def test_revise_results_with_header_only(tmp_path, model):
@pytest.mark.parametrize(
"model",
[
DummyManuscriptRevisionModel(),
GPT3CompletionModel(None, None),
RandomManuscriptRevisionModel(),
# GPT3CompletionModel(None, None),
],
)
def test_revise_results_intro_with_figure(tmp_path, model):
Expand Down Expand Up @@ -233,7 +233,7 @@ def test_revise_results_intro_with_figure(tmp_path, model):
@pytest.mark.parametrize(
"model",
[
DummyManuscriptRevisionModel(),
RandomManuscriptRevisionModel(),
# GPT3CompletionModel(None, None),
],
)
Expand Down Expand Up @@ -268,7 +268,7 @@ def test_revise_methods_with_equation(tmp_path, model):
@pytest.mark.parametrize(
"model",
[
DummyManuscriptRevisionModel(),
RandomManuscriptRevisionModel(),
# GPT3CompletionModel(None, None),
],
)
Expand All @@ -291,7 +291,7 @@ def test_revise_supplementary_material_with_tables_and_multiline_html_comments(
output_filepath=tmp_path / "20.00.supplementary_material.md",
)

# make sure the "image paragraph" was exactly copied to the output file
# make sure the "table paragraph" was exactly copied to the output file
assert (
r"""
| | **Interaction confidence** <!-- $colspan="7" --> | | | | | | |
Expand All @@ -315,11 +315,16 @@ def test_revise_supplementary_material_with_tables_and_multiline_html_comments(
| <!-- $colspan="7" --> |||||||
| *PYGM* | 0.02 | 0.04 | 0.14 | Skeletal muscle<!-- $rowspan="2" --> | 0.01 | 0.02 | 0.04 |
| *TPM2* | 0.05 | 0.56 | 0.80 | 0.01 | 0.28 | 0.47<!-- $removenext="2" --> |
Table: Network statistics of six gene pairs shown in Figure @fig:upsetplot_coefs b for blood and predicted cell types.
Only gene pairs present in GIANT models are listed.
For each gene in the pair (first column), the minimum, average and maximum interaction coefficients with the other genes in the network are shown.
{#tbl:giant:weights}
""".strip()
in open(tmp_path / "20.00.supplementary_material.md").read()
)

# make sure the "image paragraph" was exactly copied to the output file
# make sure the "HTML comment paragraph" was exactly copied to the output file
assert (
r"""
<!-- ![
Expand All @@ -346,7 +351,7 @@ def test_revise_supplementary_material_with_tables_and_multiline_html_comments(
@pytest.mark.parametrize(
"model",
[
DummyManuscriptRevisionModel(),
RandomManuscriptRevisionModel(),
# GPT3CompletionModel(None, None),
],
)
Expand Down

0 comments on commit 3f22b72

Please sign in to comment.