Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
7c0a8fe
Base version of automated test marker
ShaharNaveh Aug 11, 2025
69ed76b
Remove debug code
ShaharNaveh Aug 11, 2025
dc3a6de
Add newline
ShaharNaveh Aug 11, 2025
5c09ef0
Apply RustPython patch
ShaharNaveh Aug 11, 2025
ad061b2
Convert to tool with args
ShaharNaveh Aug 20, 2025
a6f2324
Apply patch
ShaharNaveh Aug 22, 2025
302b3d1
Remove old script
ShaharNaveh Aug 22, 2025
db74d2f
Add textwrap.py
ShaharNaveh Aug 22, 2025
11694e3
ruff fmt
ShaharNaveh Aug 22, 2025
d8b4e26
Add more modules
ShaharNaveh Aug 22, 2025
2308a5a
Merge remote-tracking branch 'upstream/main' into auto-updater
ShaharNaveh Aug 25, 2025
2fb6842
Add `gen` subcommand
ShaharNaveh Aug 29, 2025
db5eb4b
Use `_generate_next_value_`
ShaharNaveh Aug 29, 2025
af1c28d
Gen & patch
ShaharNaveh Aug 30, 2025
42365d2
Remove old tool
ShaharNaveh Aug 30, 2025
fdce40b
Merge remote-tracking branch 'upstream/main' into auto-updater
ShaharNaveh Aug 30, 2025
32baa80
Revert changes under `Lib/`
ShaharNaveh Aug 30, 2025
fb7324d
Don't crash if cls renamed/moved
ShaharNaveh Aug 30, 2025
f4056ac
Update `Lib/test/test_os.py` with tool
ShaharNaveh Aug 30, 2025
4296b59
apply patch
ShaharNaveh Aug 30, 2025
e2aa220
Fix double assignment
ShaharNaveh Aug 30, 2025
74b47a0
Better args
ShaharNaveh Aug 30, 2025
51c6ad9
Update `test_list.py` as well
ShaharNaveh Aug 30, 2025
01a90ef
Less complex print
ShaharNaveh Aug 30, 2025
a0e56ae
Improve exoectedFailure match
ShaharNaveh Aug 30, 2025
43a63a8
fix list slice
ShaharNaveh Aug 30, 2025
3e2c1f1
Add __doc__ and to help
ShaharNaveh Sep 2, 2025
789cf6e
Merge remote-tracking branch 'upstream/main' into auto-updater
ShaharNaveh Sep 2, 2025
3e9872c
Update scripts/lib_updater.py
ShaharNaveh Sep 4, 2025
d0763e4
Clearer output arg
ShaharNaveh Sep 5, 2025
e00bb28
Don't crash on missing id
ShaharNaveh Sep 5, 2025
8a5875e
Merge remote-tracking branch 'upstream/main' into auto-updater
ShaharNaveh Sep 5, 2025
6c615cc
Merge remote-tracking branch 'origin/auto-updater' into auto-updater
ShaharNaveh Sep 5, 2025
c0e90cc
Fix comment regex
ShaharNaveh Sep 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add gen subcommand
  • Loading branch information
ShaharNaveh committed Aug 29, 2025
commit 2fb6842f877840a278d85a8d5b77d6e84d656e19
199 changes: 199 additions & 0 deletions scripts/lib_updater.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
#!/usr/bin/env python
import argparse
import ast
import dataclasses
import enum
import json
import re
import sys
from typing import TYPE_CHECKING, Self

if TYPE_CHECKING:
from collections.abc import Iterator

COMMENT = "TODO: RUSTPYTHON"


@enum.unique
class ProgName(enum.StrEnum):
Gen = enum.auto()
Patch = enum.auto()


@enum.unique
class UtMethod(enum.StrEnum):
"""
UnitTest Method
"""

ExpectedFailure = "expectedFailure"
ExpectedFailureIf = "expectedFailureIf"
ExpectedFailureIfWindows = "expectedFailureIfWindows"
SkipUnless = "skipUnless"
Skip = enum.auto()
SkipIf = "skipIf"


@dataclasses.dataclass(frozen=True, slots=True)
class PatchEntry:
"""
Stores patch metadata.

Attributes
----------
parent_class : str
Parent class of test.
test_name : str
Test name.
ut_method : UtMethod
unittest method.
cond : str, optional
`ut_method` condition. Relevant only for UtMethod.{expectedFailureIf,skipIf}.
reason : str, optional
Reason for why the test is patched in this way.
"""

parent_class: str
test_name: str
ut_method: UtMethod
cond: str | None = None
reason: str = ""

@classmethod
def iter_patch_entires(cls, tree: ast.Module, lines: list[str]) -> "Iterator[Self]":
for cls_node, fn_node in iter_tests(tree):
parent_class = cls_node.name
for dec_node in fn_node.decorator_list:
if not isinstance(dec_node, (ast.Attribute, ast.Call)):
continue

attr_node = (
dec_node if isinstance(dec_node, ast.Attribute) else dec_node.func
)

if isinstance(attr_node, ast.Name) or attr_node.value.id != "unittest":
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is attr_node.value expected to be a Name? Then

Suggested change
if isinstance(attr_node, ast.Name) or attr_node.value.id != "unittest":
if isinstance(attr_node, ast.Name):
continue
if isinstance(attr_node.value, ast.Name) and attr_node.value.id != "unittest":
$ python ./scripts/lib_updater.py --from Lib/test/test_property.py --to ../cpython/Lib/test/test_property.py  
Attribute(value=Name(id='spam', ctx=Load()), attr='setter', ctx=Load())
Attribute(value=Name(id='spam', ctx=Load()), attr='deleter', ctx=Load())
Attribute(value=Attribute(value=Name(id='BaseClass', ctx=Load()), attr='spam', ctx=Load()), attr='getter', ctx=Load())
Traceback (most recent call last):
  File "/Users/user/Projects/rustpython2/./scripts/lib_updater.py", line 314, in <module>
    patches = build_patch_dict(iter_patches(args.gather_from.read_text()))
  File "/Users/user/Projects/rustpython2/./scripts/lib_updater.py", line 213, in build_patch_dict
    for entry in it:
                 ^^
  File "/Users/user/Projects/rustpython2/./scripts/lib_updater.py", line 208, in iter_patches
    yield from PatchEntry.iter_patch_entires(tree, lines)
  File "/Users/user/Projects/rustpython2/./scripts/lib_updater.py", line 131, in iter_patch_entires
    if isinstance(attr_node, ast.Name) or attr_node.value.id != "unittest":
                                          ^^^^^^^^^^^^^^^^^^
AttributeError: 'Attribute' object has no attribute 'id'

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a bug 🎉
Will use getattr instead.

And attr_node can be of type Name, but if that's the case then we skip as it's not a unittest patch

continue

cond = None
match attr_node.attr:
case UtMethod.ExpectedFailure:
for line in lines[dec_node.lineno - 2 : dec_node.lineno]:
if COMMENT not in line:
continue
reason = "".join(re.findall(rf"{COMMENT} (.*)", line))
break
else:
continue
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

expectedFailure reason extraction misses same-line comments and regex is too strict

  • You’re scanning the two lines above the decorator, but fmt() renders the comment on the same line.
  • The regex requires a space after the marker; fmt() uses a semicolon.
-                        for line in lines[dec_node.lineno - 2 : dec_node.lineno]:
-                            if COMMENT not in line:
-                                continue
-                            reason = "".join(re.findall(rf"{COMMENT} (.*)", line))
+                        # Look at the decorator line itself and accept :, ; or space after the marker
+                        line = lines[dec_node.lineno - 1]
+                        if COMMENT in line:
+                            m = re.search(rf"{re.escape(COMMENT)}\s*[:;]?\s*(.*)$", line)
+                            reason = m.group(1) if m else ""
                             break
                         else:
                             continue

Also applies to: 118-118

🤖 Prompt for AI Agents
In scripts/lib_updater.py around lines 115 to 121 (and similarly at 118), the
current loop only checks the two lines before the decorator and uses a regex
that requires a space after the COMMENT marker, missing comments placed on the
same line or separated by a semicolon; update the slice to include the decorator
line (e.g., lines[dec_node.lineno - 2 : dec_node.lineno + 1]) so same-line
comments are scanned, and replace the strict regex with one that allows optional
separators and flexible spacing (e.g., use re.search with a pattern like
rf"{COMMENT}\s*(?:[:;]\s*)?(.*)" and extract group(1).strip() ), breaking when a
hit is found.

case (
UtMethod.Skip
| UtMethod.SkipIf
| UtMethod.ExpectedFailureIf
| UtMethod.ExpectedFailureIfWindows
):
reason = next(
(
node.value
for node in ast.walk(dec_node)
if isinstance(node, ast.Constant)
and isinstance(node.value, str)
and node.value.startswith(COMMENT)
),
None,
)

# If we didn't find a constant with the COMMENT, then we didn't put this decorator
if not reason:
continue

if attr_node.attr not in (
UtMethod.Skip,
UtMethod.ExpectedFailureIfWindows,
):
cond = ast.unparse(dec_node.args[0])
case _:
continue

yield cls(
parent_class,
fn_node.name,
UtMethod(attr_node.attr),
cond,
reason.replace(COMMENT, "").strip().lstrip(";").lstrip(":").strip(),
)


def iter_tests(
tree: ast.Module,
) -> "Iterator[tuple[ast.ClassDef, ast.FunctionDef | ast.AsyncFunctionDef]]":
for key, nodes in ast.iter_fields(tree):
if key != "body":
continue

for cls_node in nodes:
if not isinstance(cls_node, ast.ClassDef):
continue

for fn_node in cls_node.body:
if not isinstance(fn_node, (ast.FunctionDef, ast.AsyncFunctionDef)):
continue

yield (cls_node, fn_node)


def iter_patches(contents: str) -> "Iterator[PatchEntry]":
lines = contents.splitlines()
tree = ast.parse(contents)
yield from PatchEntry.iter_patch_entires(tree, lines)

Comment on lines +198 to +202
Copy link
Contributor

@coderabbitai coderabbitai bot Sep 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Handle unparseable files gracefully (bad-syntax tests)

ast.parse can raise SyntaxError on intentionally invalid test files; fail gracefully with a clear message.

-def iter_patches(contents: str) -> "Iterator[PatchEntry]":
-    lines = contents.splitlines()
-    tree = ast.parse(contents)
-    yield from PatchEntry.iter_patch_entires(tree, lines)
+def iter_patches(contents: str) -> "Iterator[PatchEntry]":
+    lines = contents.splitlines()
+    try:
+        tree = ast.parse(contents)
+    except SyntaxError as e:
+        print(f"ERROR: could not parse source for patch extraction: {e}", file=sys.stderr)
+        return
+    yield from PatchEntry.iter_patch_entires(tree, lines)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def iter_patches(contents: str) -> "Iterator[PatchEntry]":
lines = contents.splitlines()
tree = ast.parse(contents)
yield from PatchEntry.iter_patch_entires(tree, lines)
def iter_patches(contents: str) -> "Iterator[PatchEntry]":
lines = contents.splitlines()
try:
tree = ast.parse(contents)
except SyntaxError as e:
print(f"ERROR: could not parse source for patch extraction: {e}", file=sys.stderr)
return
yield from PatchEntry.iter_patch_entires(tree, lines)
🤖 Prompt for AI Agents
In scripts/lib_updater.py around lines 198 to 202, ast.parse(contents) can raise
SyntaxError for intentionally invalid test files; wrap the parse call in a
try/except that catches SyntaxError and handle it gracefully by raising a
clearer ValueError (or RuntimeError) with a human-readable message that includes
the original exception details (or alternatively return an empty iterator),
preserving the function signature and ensuring the original exception is chained
for debugging.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is fine and intended. you can ignore it

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ShaharNaveh Understood! I'll ignore the error handling suggestion. The current behavior of letting ast.parse() raise SyntaxError directly is intentional for your use case.


✏️ Learnings added
Learnt from: ShaharNaveh
PR: RustPython/RustPython#6089
File: scripts/lib_updater.py:198-202
Timestamp: 2025-09-07T05:38:31.644Z
Learning: In scripts/lib_updater.py, the iter_patches function intentionally does not handle SyntaxError from ast.parse(contents). The author confirmed this behavior is fine and intended - the tool should fail fast on unparseable files rather than silently skip processing.

Note: Learnings are effective only in the context of similar code segments. To apply general coding guidelines, please configure review instructions. You can manage existing learnings in the app.


🧠 Learnings used
Learnt from: CR
PR: RustPython/RustPython#0
File: .github/copilot-instructions.md:0-0
Timestamp: 2025-07-19T03:16:56.511Z
Learning: Applies to Lib/test/**/*.py : When tests fail due to unsupported syntax, keep the test as `unittest.expectedFailure`, document that it requires PEP 695 support, and focus on tests that can be fixed through Rust code changes only

Learnt from: ShaharNaveh
PR: RustPython/RustPython#6089
File: scripts/lib_updater.py:292-297
Timestamp: 2025-08-30T14:40:05.858Z
Learning: In scripts/lib_updater.py, the --inplace flag intentionally writes to orig_file (not remote_file) even though patches are applied to remote_file content. This workflow allows updating the original RustPython test file with patches applied to new upstream CPython content.


def read_infile(infile: str) -> str:
if infile == "-":
return sys.stdin.read()

with open(infile, mode="r", encoding="utf-8") as fd:
return fd.read()


def build_argparse() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
description="Helper tool for updating files under Lib/"
)

subparsers = parser.add_subparsers(dest="pname", required=True)

# Gen
parser_gen = subparsers.add_parser(ProgName.Gen)
parser_gen.add_argument(
"infile",
default="-",
help="File path to generate patches from, can get from stdin",
nargs="?",
)

# Patch
parser_patch = subparsers.add_parser(ProgName.Patch)
parser_patch.add_argument("src", help="File path to apply patches for")
parser_patch.add_argument(
"infile",
default="-",
help="File path containing patches, can get from stdin",
nargs="?",
)

return parser


if __name__ == "__main__":
parser = build_argparse()
args = parser.parse_args()

contents = read_infile(args.infile)
match args.pname:
case ProgName.Gen:
patches = list(map(dataclasses.asdict, iter_patches(contents)))
output = json.dumps(patches, indent=4)
case ProgName.Patch:
pass # TODO

sys.stdout.write(f"{output}\n")
sys.stdout.flush()