Skip to content

Commit

Permalink
editor: better support for HTML comments
Browse files Browse the repository at this point in the history
  • Loading branch information
miltondp committed Jan 1, 2023
1 parent 02b7fb5 commit af9e699
Show file tree
Hide file tree
Showing 4 changed files with 256 additions and 3 deletions.
19 changes: 16 additions & 3 deletions libs/manubot/ai_editor/editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,22 @@ def revise_file(
current_table_paragraph = False

for line in infile:
# if line is starting either an "image paragraph", a "table paragraph" or a "html comment paragraph",
# then skip all lines until the end of that paragraph
if self.line_is_not_part_of_paragraph(line, include_blank=False):
if line.startswith("<!--"):
# This is an HTML comment.
while line is not None and not line.strip().endswith("-->"):
outfile.write(line)
line = next(infile, None)

if line is not None and line.strip().endswith("-->"):
outfile.write(line)
line = next(infile, None)

# if line is starting either an "image paragraph", a "table
# paragraph" or a "html comment paragraph", then skip all lines
# until the end of that paragraph
if line is not None and self.line_is_not_part_of_paragraph(
line, include_blank=False
):
if line.startswith("|"):
current_table_paragraph = True

Expand Down
32 changes: 32 additions & 0 deletions tests/manuscripts/phenoplier/50.00.supplementary_material.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
### Latent variables (gene modules) information

#### LV603

<!-- LV603:multiplier_pathways:start -->
| Pathway | AUC | FDR |
|:------------------------------------|:------|:---------|
| IRIS Neutrophil-Resting | 0.91 | 4.51e-35 |
| SVM Neutrophils | 0.98 | 1.43e-09 |
| PID IL8CXCR2 PATHWAY | 0.81 | 7.04e-03 |
| SIG PIP3 SIGNALING IN B LYMPHOCYTES | 0.77 | 1.95e-02 |

Table: Pathways aligned to LV603 from the MultiPLIER models. {#tbl:sup:multiplier_pathways:lv603}
<!-- LV603:multiplier_pathways:end -->

<!-- LV603:phenomexcan_traits_assocs:start -->
| Trait description | Sample size | Cases | FDR |
|:------------------------------------------|:--------------|:--------|:---------------|
| Basophill percentage | 349,861 | | 1.19e&#8209;10 |
| Basophill count | 349,856 | | 1.89e&#8209;05 |
| Treatment/medication code: ispaghula husk | 361,141 | 327 | 1.36e&#8209;02 |

Table: Significant trait associations of LV603 in PhenomeXcan. {#tbl:sup:phenomexcan_assocs:lv603}
<!-- LV603:phenomexcan_traits_assocs:end -->

<!-- LV603:emerge_traits_assocs:start -->
| Phecode | Trait description | Sample size | Cases | FDR |
|:----------------------------|:--------------------|:--------------|:--------|:------|
| No significant associations | | | | |

Table: Significant trait associations of LV603 in eMERGE. {#tbl:sup:emerge_assocs:lv603}
<!-- LV603:emerge_traits_assocs:end -->
44 changes: 44 additions & 0 deletions tests/manuscripts/phenoplier/50.01.supplementary_material.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
### Latent variables (gene modules) information

#### LV603

<!-- LV603:multiplier_pathways:start
this is a more complex multiline html comment -->
| Pathway | AUC | FDR |
|:------------------------------------|:------|:---------|
| IRIS Neutrophil-Resting | 0.91 | 4.51e-35 |
| SVM Neutrophils | 0.98 | 1.43e-09 |
| PID IL8CXCR2 PATHWAY | 0.81 | 7.04e-03 |
| SIG PIP3 SIGNALING IN B LYMPHOCYTES | 0.77 | 1.95e-02 |

Table: Pathways aligned to LV603 from the MultiPLIER models. {#tbl:sup:multiplier_pathways:lv603}
<!-- LV603:multiplier_pathways:end -->

<!-- LV603:phenomexcan_traits_assocs:start
and this html comments is multiline but
also has an empty line in the middle-->
| Trait description | Sample size | Cases | FDR |
|:------------------------------------------|:--------------|:--------|:---------------|
| Basophill percentage | 349,861 | | 1.19e&#8209;10 |
| Basophill count | 349,856 | | 1.89e&#8209;05 |
| Treatment/medication code: ispaghula husk | 361,141 | 327 | 1.36e&#8209;02 |

Table: Significant trait associations of LV603 in PhenomeXcan. {#tbl:sup:phenomexcan_assocs:lv603}
<!-- LV603:phenomexcan_traits_assocs:end
-->

<!--
and this html multiline comment has a space
LV603:emerge_traits_assocs:start
-->
| Phecode | Trait description | Sample size | Cases | FDR |
|:----------------------------|:--------------------|:--------------|:--------|:------|
| No significant associations | | | | |

Table: Significant trait associations of LV603 in eMERGE. {#tbl:sup:emerge_assocs:lv603}
<!-- LV603:emerge_traits_assocs:end -->
164 changes: 164 additions & 0 deletions tests/test_editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,170 @@ def test_revise_supplementary_material_with_tables_and_multiline_html_comments(
)


@pytest.mark.parametrize(
"model",
[
RandomManuscriptRevisionModel(),
# GPT3CompletionModel(None, None),
],
)
def test_revise_supplementary_material_from_phenoplier_with_many_tables(
tmp_path, model
):
print(f"\n{str(tmp_path)}\n")

me = ManuscriptEditor(
content_dir=MANUSCRIPTS_DIR / "phenoplier",
)

model.title = me.title
model.keywords = me.keywords

me.revise_file("50.00.supplementary_material.md", tmp_path, model)

_check_nonparagraph_lines_are_preserved(
input_filepath=MANUSCRIPTS_DIR
/ "phenoplier"
/ "50.00.supplementary_material.md",
output_filepath=tmp_path / "50.00.supplementary_material.md",
)

# make sure the "table paragraph" was exactly copied to the output file
assert (
r"""
<!-- LV603:multiplier_pathways:start -->
| Pathway | AUC | FDR |
|:------------------------------------|:------|:---------|
| IRIS Neutrophil-Resting | 0.91 | 4.51e-35 |
| SVM Neutrophils | 0.98 | 1.43e-09 |
| PID IL8CXCR2 PATHWAY | 0.81 | 7.04e-03 |
| SIG PIP3 SIGNALING IN B LYMPHOCYTES | 0.77 | 1.95e-02 |
Table: Pathways aligned to LV603 from the MultiPLIER models. {#tbl:sup:multiplier_pathways:lv603}
<!-- LV603:multiplier_pathways:end -->
""".strip()
in open(tmp_path / "50.00.supplementary_material.md").read()
)

# make sure the "table paragraph" was exactly copied to the output file
assert (
r"""
<!-- LV603:phenomexcan_traits_assocs:start -->
| Trait description | Sample size | Cases | FDR |
|:------------------------------------------|:--------------|:--------|:---------------|
| Basophill percentage | 349,861 | | 1.19e&#8209;10 |
| Basophill count | 349,856 | | 1.89e&#8209;05 |
| Treatment/medication code: ispaghula husk | 361,141 | 327 | 1.36e&#8209;02 |
Table: Significant trait associations of LV603 in PhenomeXcan. {#tbl:sup:phenomexcan_assocs:lv603}
<!-- LV603:phenomexcan_traits_assocs:end -->
""".strip()
in open(tmp_path / "50.00.supplementary_material.md").read()
)

# make sure the "table paragraph" was exactly copied to the output file
assert (
r"""
<!-- LV603:emerge_traits_assocs:start -->
| Phecode | Trait description | Sample size | Cases | FDR |
|:----------------------------|:--------------------|:--------------|:--------|:------|
| No significant associations | | | | |
Table: Significant trait associations of LV603 in eMERGE. {#tbl:sup:emerge_assocs:lv603}
<!-- LV603:emerge_traits_assocs:end -->
""".strip()
in open(tmp_path / "50.00.supplementary_material.md").read()
)


@pytest.mark.parametrize(
"model",
[
RandomManuscriptRevisionModel(),
# GPT3CompletionModel(None, None),
],
)
def test_revise_supplementary_material_from_phenoplier_with_many_tables_and_complex_html_comments(
tmp_path, model
):
print(f"\n{str(tmp_path)}\n")

me = ManuscriptEditor(
content_dir=MANUSCRIPTS_DIR / "phenoplier",
)

model.title = me.title
model.keywords = me.keywords

me.revise_file("50.01.supplementary_material.md", tmp_path, model)

_check_nonparagraph_lines_are_preserved(
input_filepath=MANUSCRIPTS_DIR
/ "phenoplier"
/ "50.01.supplementary_material.md",
output_filepath=tmp_path / "50.01.supplementary_material.md",
)

# make sure the "table paragraph" was exactly copied to the output file
assert (
r"""
<!-- LV603:multiplier_pathways:start
this is a more complex multiline html comment -->
| Pathway | AUC | FDR |
|:------------------------------------|:------|:---------|
| IRIS Neutrophil-Resting | 0.91 | 4.51e-35 |
| SVM Neutrophils | 0.98 | 1.43e-09 |
| PID IL8CXCR2 PATHWAY | 0.81 | 7.04e-03 |
| SIG PIP3 SIGNALING IN B LYMPHOCYTES | 0.77 | 1.95e-02 |
Table: Pathways aligned to LV603 from the MultiPLIER models. {#tbl:sup:multiplier_pathways:lv603}
<!-- LV603:multiplier_pathways:end -->
""".strip()
in open(tmp_path / "50.01.supplementary_material.md").read()
)

# make sure the "table paragraph" was exactly copied to the output file
assert (
r"""
<!-- LV603:phenomexcan_traits_assocs:start
and this html comments is multiline but
also has an empty line in the middle-->
| Trait description | Sample size | Cases | FDR |
|:------------------------------------------|:--------------|:--------|:---------------|
| Basophill percentage | 349,861 | | 1.19e&#8209;10 |
| Basophill count | 349,856 | | 1.89e&#8209;05 |
| Treatment/medication code: ispaghula husk | 361,141 | 327 | 1.36e&#8209;02 |
Table: Significant trait associations of LV603 in PhenomeXcan. {#tbl:sup:phenomexcan_assocs:lv603}
<!-- LV603:phenomexcan_traits_assocs:end
-->
""".strip()
in open(tmp_path / "50.01.supplementary_material.md").read()
)

# make sure the "table paragraph" was exactly copied to the output file
assert (
r"""
<!--
and this html multiline comment has a space
LV603:emerge_traits_assocs:start
-->
| Phecode | Trait description | Sample size | Cases | FDR |
|:----------------------------|:--------------------|:--------------|:--------|:------|
| No significant associations | | | | |
Table: Significant trait associations of LV603 in eMERGE. {#tbl:sup:emerge_assocs:lv603}
<!-- LV603:emerge_traits_assocs:end -->
""".strip()
in open(tmp_path / "50.01.supplementary_material.md").read()
)


@pytest.mark.parametrize(
"model",
[
Expand Down

0 comments on commit af9e699

Please sign in to comment.