Comments, revisited (#375)

* feat: revisit comments, close #348 * chore: improve unit testing and docs * fix: allow merging if lines end in trailing inline comment * fix: allow merging ahead of inline comments' * fix: allow merging of blank lines after inline comments * fix: keep inline comments inline * fix: strip whitespace when splitting comments onto multiple lines * fix: merge through comments if directly after standalone operator * chore: bump primer refs * fix: rename split param to no_tail * fix: improve comment placement with trailing operators; self-review * chore: bump primer ref for http
tconbeer · Jan 27, 2023 · 0f99ef6 · 0f99ef6
1 parent dd485df
commit 0f99ef6
Show file tree

Hide file tree

Showing 22 changed files with 506 additions and 137 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,7 @@ All notable changes to this project will be documented in this file.
 
 ### Formatting Changes + Bug Fixes
 
+-   sqlfmt no longer merges lines that contain comments, unless the position of those comments can be preserved ([#348](https://github.com/tconbeer/sqlfmt/issues/348) - thank you, [@rileyschack](https://github.com/rileyschack) and [@IanEdington](https://github.com/IanEdington)!). Accordingly, comments that are inline will stay inline, even if they are too long to fit.
 -   sqlfmt no longer merges together lines containing multiline jinja blocks unless those lines start with an operator or comma ([#365](https://github.com/tconbeer/sqlfmt/issues/365) - thank you, [@gavlt](https://github.com/gavlt)!).
 -   fixed a bug where adding a jinja end tag (e.g., `{% endif %}`) to a line could cause bad formatting of everything on that line
 

diff --git a/src/sqlfmt/comment.py b/src/sqlfmt/comment.py
@@ -2,7 +2,6 @@
 from dataclasses import dataclass
 from typing import ClassVar, Iterator, Tuple
 
-from sqlfmt.exception import InlineCommentException
 from sqlfmt.token import Token
 
 
@@ -68,20 +67,16 @@ def is_multiline(self) -> bool:
         """
         return "\n" in self.token.token
 
-    def render_inline(self, max_length: int, content_length: int) -> str:
+    @property
+    def is_inline(self) -> bool:
+        return not self.is_standalone and not self.is_multiline
+
+    def render_inline(self) -> str:
         """
-        For a Comment, returns the string for properly formatting this Comment
-        inline, after content_length characters of non-comment Nodes
+        Renders a comment as an inline comment, assuming it'll fit.
         """
-        if self.is_standalone:
-            raise InlineCommentException("Can't inline standalone comment")
-        else:
-            inline_prefix = " " * 2
-            rendered = inline_prefix + str(self)
-            if content_length + len(rendered) > max_length:
-                raise InlineCommentException("Comment too long to be inlined")
-            else:
-                return inline_prefix + str(self)
+        inline_prefix = " " * 2
+        return f"{inline_prefix}{self}"
 
     def render_standalone(self, max_length: int, prefix: str) -> str:
         """
@@ -100,7 +95,7 @@ def render_standalone(self, max_length: int, prefix: str) -> str:
                     available_length = max_length - len(prefix) - len(marker) - 2
                     line_gen = self._split_before(comment_text, available_length)
                     return "".join(
-                        [prefix + marker + " " + txt + "\n" for txt in line_gen]
+                        [prefix + marker + " " + txt.strip() + "\n" for txt in line_gen]
                     )
                 else:  # block-style or jinja comment. Don't wrap long lines for now
                     return prefix + str(self)

diff --git a/src/sqlfmt/exception.py b/src/sqlfmt/exception.py
@@ -64,15 +64,6 @@ class SqlfmtControlFlowException(Exception):
     pass
 
 
-class InlineCommentException(SqlfmtControlFlowException):
-    """
-    Raised by the Line class if we try to render a comment
-    inline that is too long
-    """
-
-    pass
-
-
 class StopRulesetLexing(SqlfmtControlFlowException):
     """
     Raised by the Analyzer or one of its actions to indicate

diff --git a/src/sqlfmt/jinjafmt.py b/src/sqlfmt/jinjafmt.py
@@ -367,9 +367,11 @@ def format_line(self, line: Line) -> List[Line]:
                         chain(
                             *[
                                 self.format_line(new_line)
-                                for new_line in [
+                                for new_line, _ in [
                                     splitter.split_at_index(line, 0, i, line.comments),
-                                    splitter.split_at_index(line, i, -1, []),
+                                    splitter.split_at_index(
+                                        line, i, -1, [], no_tail=True
+                                    ),
                                 ]
                             ]
                         )

diff --git a/src/sqlfmt/line.py b/src/sqlfmt/line.py
@@ -2,7 +2,6 @@
 from typing import List, Optional, Tuple
 
 from sqlfmt.comment import Comment
-from sqlfmt.exception import InlineCommentException
 from sqlfmt.node import Node
 from sqlfmt.token import Token
 
@@ -88,39 +87,40 @@ def prefix(self) -> str:
         prefix = INDENT * self.depth[0]
         return prefix
 
+    @property
+    def has_inline_comment(self) -> bool:
+        """
+        Returns true if the line has an attached comment that was parsed
+        as an inline comment.
+        """
+        if self.comments and self.comments[-1].is_inline:
+            return True
+        else:
+            return False
+
     def render_with_comments(self, max_length: int) -> str:
         """
         Returns a string that represents the properly-formatted Line,
         including associated comments
         """
-        content = str(self)
-        rendered = content
-        if len(self.comments) == 1:
-            # standalone or multiline comment
-            if self.nodes[0].is_newline:
-                rendered = f"{self.prefix}{self.comments[0]}"
-            # inline comment
+        content = str(self).rstrip()
+        rendered_lines: List[str] = []
+        for comment in self.comments:
+            if comment.is_multiline or comment.is_standalone:
+                rendered_lines.append(
+                    comment.render_standalone(max_length=max_length, prefix=self.prefix)
+                )
             else:
-                try:
-                    comment = self.comments[0].render_inline(
-                        max_length=max_length, content_length=len(content.rstrip())
-                    )
-                    rendered = f"{content.rstrip()}{comment}"
-                except InlineCommentException:
-                    comment = self.comments[0].render_standalone(
-                        max_length=max_length, prefix=self.prefix
-                    )
-                    rendered = f"{comment}{content}"
-        # wrap comments above; note that str(comment) is newline-terminated
-        elif len(self.comments) > 1:
-            comment = "".join(
-                [
-                    c.render_standalone(max_length=max_length, prefix=self.prefix)
-                    for c in self.comments
-                ]
-            )
-            rendered = f"{comment}{content}"
-        return rendered
+                # comment is potentially an inline comment, and we'll handle
+                # that below. Inline comments must be the last comment
+                pass
+
+        if self.has_inline_comment:
+            rendered_lines.append(f"{content}{self.comments[-1].render_inline()}")
+        else:
+            rendered_lines.append(f"{self}")
+
+        return "".join(rendered_lines)
 
     @classmethod
     def from_nodes(

diff --git a/src/sqlfmt/merger.py b/src/sqlfmt/merger.py
@@ -67,7 +67,41 @@ def _extract_components(
         final_newline: Optional[Node] = None
         allow_multiline_jinja = True
         has_multiline_jinja = False
+        has_inline_comment_above = False
         for line in lines:
+            # only merge lines with comments if it's a standalone comment
+            # above the first line or an inline comment after the last
+            # line
+            if line.comments:
+                if has_inline_comment_above:
+                    raise CannotMergeException(
+                        "Can't merge lines with inline comments and other comments"
+                    )
+                elif len(line.comments) == 1 and line.has_inline_comment:
+                    # this is a comment that must be rendered inline,
+                    # so it'll probably block merging unless the
+                    # next line is just a comma
+                    has_inline_comment_above = True
+                elif len(nodes) == 1 and nodes[0].is_operator:
+                    # if source has standalone operators, we can merge
+                    # the operator into the contents, even if there is
+                    # a comment in the way
+                    pass
+                elif nodes:
+                    raise CannotMergeException(
+                        "Can't merge lines with standalone comments unless the "
+                        "comments are above the first line"
+                    )
+            # make an exception for inline comments followed by
+            # a lonely comma (e.g., leading commas with inline comments)
+            elif has_inline_comment_above:
+                if not (line.is_standalone_comma or line.is_blank_line):
+                    raise CannotMergeException(
+                        "Can't merge lines with inline comments unless "
+                        "the following line is a single standalone comma "
+                        "or a blank line"
+                    )
+
             if has_multiline_jinja and not (
                 line.starts_with_operator or line.starts_with_comma
             ):

diff --git a/src/sqlfmt/splitter.py b/src/sqlfmt/splitter.py
@@ -32,25 +32,32 @@ def maybe_split(self, line: Line) -> List[Line]:
                 if head == 0:
                     new_lines.append(line)
                 else:
-                    new_lines.append(self.split_at_index(line, head, i, comments))
+                    new_line, comments = self.split_at_index(
+                        line, head, i, comments, no_tail=True
+                    )
+                    assert (
+                        not comments
+                    ), "Comments must be empty here or we'll drop them"
+                    new_lines.append(new_line)
                 return new_lines
             elif (
                 i > head
                 and not never_split_after
                 and not node.formatting_disabled
                 and (always_split_after or self.maybe_split_before(node))
             ):
-                new_line = self.split_at_index(line, head, i, comments)
+                new_line, comments = self.split_at_index(line, head, i, comments)
                 new_lines.append(new_line)
-                comments = []  # only first split gets original comments
                 head = i
                 # node now follows a new newline node, so we need to update
                 # its previous node (this can impact its depth)
                 node.previous_node = new_line.nodes[-1]
 
             always_split_after, never_split_after = self.maybe_split_after(node)
 
-        new_lines.append(self.split_at_index(line, head, -1, comments))
+        new_line, comments = self.split_at_index(line, head, -1, comments, no_tail=True)
+        assert not comments, "Comments must be empty here or we'll drop them"
+        new_lines.append(new_line)
         return new_lines
 
     def maybe_split_before(self, node: Node) -> bool:
@@ -118,23 +125,52 @@ def maybe_split_after(self, node: Node) -> Tuple[bool, bool]:
             return False, False
 
     def split_at_index(
-        self, line: Line, head: int, index: int, comments: List[Comment]
-    ) -> Line:
+        self,
+        line: Line,
+        head: int,
+        index: int,
+        comments: List[Comment],
+        no_tail: bool = False,
+    ) -> Tuple[Line, List[Comment]]:
         """
-        Return a new line comprised of the nodes line[head:index], plus a newline node
+        Return a new line comprised of the nodes line[head:index], plus a newline node.
+
+        Also return a list of comments that need to be included in the tail
+        of the split line. This includes inline comments and if the head
+        is just a comma, all comments.
         """
         if index == -1:
             new_nodes = line.nodes[head:]
         else:
             assert index > head, "Cannot split at start of line!"
             new_nodes = line.nodes[head:index]
 
+        assert new_nodes, "Cannot split a line without nodes!"
+
+        if no_tail:
+            head_comments, tail_comments = comments, []
+
+        elif comments:
+            if new_nodes[0].is_comma:
+                head_comments, tail_comments = [], comments
+            elif index == len(line.nodes) - 2 and line.nodes[index].is_operator:
+                # the only thing after the split is an operator + \n, so keep the
+                # comments with the stuff before the operator
+                head_comments, tail_comments = comments, []
+            elif comments[-1].is_inline:
+                head_comments, tail_comments = comments[:-1], [comments[-1]]
+            else:
+                head_comments, tail_comments = comments, []
+
+        else:  # no comments
+            head_comments, tail_comments = [], []
+
         new_line = Line.from_nodes(
             previous_node=new_nodes[0].previous_node,
             nodes=new_nodes,
-            comments=comments,
+            comments=head_comments,
         )
         if not new_line.nodes[-1].is_newline:
             self.node_manager.append_newline(new_line)
 
-        return new_line
+        return new_line, tail_comments
diff --git a/src/sqlfmt_primer/primer.py b/src/sqlfmt_primer/primer.py
@@ -30,16 +30,16 @@ def get_projects() -> List[SQLProject]:
         SQLProject(
             name="gitlab",
             git_url="https://github.com/tconbeer/gitlab-analytics-sqlfmt.git",
-            git_ref="c786f5c",  # sqlfmt d97af4e
-            expected_changed=3,
-            expected_unchanged=2414,
+            git_ref="7b0001e",  # sqlfmt 125a964
+            expected_changed=1,
+            expected_unchanged=2416,
             expected_errored=0,
             sub_directory=Path("transform/snowflake-dbt/"),
         ),
         SQLProject(
             name="rittman",
             git_url="https://github.com/tconbeer/rittman_ra_data_warehouse.git",
-            git_ref="067c14b",  # sqlfmt d97af4e
+            git_ref="803b933",  # sqlfmt 125a964
             expected_changed=0,
             expected_unchanged=307,
             expected_errored=4,  # true mismatching brackets
@@ -48,7 +48,7 @@ def get_projects() -> List[SQLProject]:
         SQLProject(
             name="http_archive",
             git_url="https://github.com/tconbeer/http_archive_almanac.git",
-            git_ref="2aad62b",  # sqlfmt d620ac8
+            git_ref="414b535",  # sqlfmt faaf71b
             expected_changed=0,
             expected_unchanged=1729,
             expected_errored=0,
@@ -66,7 +66,7 @@ def get_projects() -> List[SQLProject]:
         SQLProject(
             name="jaffle_shop",
             git_url="https://github.com/tconbeer/jaffle_shop.git",
-            git_ref="1466afa",  # sqlfmt d620ac8
+            git_ref="894e5d4",  # sqlfmt 124890d
             expected_changed=0,
             expected_unchanged=5,
             expected_errored=0,
@@ -75,7 +75,7 @@ def get_projects() -> List[SQLProject]:
         SQLProject(
             name="dbt_utils",
             git_url="https://github.com/tconbeer/dbt-utils.git",
-            git_ref="8d81899",  # sqlfmt d97af4e
+            git_ref="ab55e10",  # sqlfmt 124890d
             expected_changed=0,
             expected_unchanged=131,
             expected_errored=0,

diff --git a/tests/data/unformatted/101_multiline.sql b/tests/data/unformatted/101_multiline.sql
@@ -66,7 +66,6 @@ with
  # And this is a nice multiline jinja comment
  # that we will also handle.
 #}
-/* what!?! */
 select *
 from renamed
-where true
+where true  /* what!?! */