Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Lib/test/test_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -1012,7 +1012,7 @@ def return_genexp():
code_lines = self.get_code_lines(genexp_code)
self.assertEqual(genexp_lines, code_lines)

# TODO: RUSTPYTHON
# TODO: RUSTPYTHON; implicit return line number after async for
@unittest.expectedFailure
def test_line_number_implicit_return_after_async_for(self):

Expand Down
8 changes: 3 additions & 5 deletions Lib/test/test_sys.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,6 @@ def test_setrecursionlimit_to_depth(self):
finally:
sys.setrecursionlimit(old_limit)

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_getwindowsversion(self):
# Raise SkipTest if sys doesn't have getwindowsversion attribute
test.support.get_attribute(sys, "getwindowsversion")
Expand Down Expand Up @@ -851,7 +850,6 @@ def test_subinterp_intern_singleton(self):
'''))
self.assertTrue(sys._is_interned(s))

@unittest.expectedFailure # TODO: RUSTPYTHON; needs update for context_aware_warnings
def test_sys_flags(self):
self.assertTrue(sys.flags)
attrs = ("debug",
Expand Down Expand Up @@ -880,7 +878,7 @@ def test_sys_flags_no_instantiation(self):
def test_sys_version_info_no_instantiation(self):
self.assert_raise_on_new_sys_type(sys.version_info)

@unittest.expectedFailure # TODO: RUSTPYTHON
@unittest.expectedFailure # TODO: RUSTPYTHON; TypeError not raised for getwindowsversion instantiation
def test_sys_getwindowsversion_no_instantiation(self):
# Skip if not being run on Windows.
test.support.get_attribute(sys, "getwindowsversion")
Expand Down Expand Up @@ -1057,12 +1055,12 @@ def check_locale_surrogateescape(self, locale):
'stdout: surrogateescape\n'
'stderr: backslashreplace\n')

@unittest.expectedFailure # TODO: RUSTPYTHON; stderr: backslashreplace
@unittest.expectedFailure # TODO: RUSTPYTHON; iso8859_1 codec not registered
@support.requires_subprocess()
def test_c_locale_surrogateescape(self):
self.check_locale_surrogateescape('C')

@unittest.expectedFailure # TODO: RUSTPYTHON; stderr: backslashreplace
@unittest.expectedFailure # TODO: RUSTPYTHON; iso8859_1 codec not registered
@support.requires_subprocess()
def test_posix_locale_surrogateescape(self):
self.check_locale_surrogateescape('POSIX')
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

122 changes: 120 additions & 2 deletions crates/vm/src/stdlib/io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2607,6 +2607,92 @@ mod _io {
}
}

#[pyclass(module = "_io", name, no_attr)]
#[derive(Debug, PyPayload)]
struct StatelessIncrementalEncoder {
encode: PyObjectRef,
errors: Option<PyStrRef>,
name: Option<PyStrRef>,
}

#[pyclass]
impl StatelessIncrementalEncoder {
#[pymethod]
fn encode(
&self,
input: PyObjectRef,
_final: OptionalArg<bool>,
vm: &VirtualMachine,
) -> PyResult {
let mut args: Vec<PyObjectRef> = vec![input];
if let Some(errors) = &self.errors {
args.push(errors.to_owned().into());
}
let res = self.encode.call(args, vm)?;
let tuple: PyTupleRef = res.try_into_value(vm)?;
if tuple.len() != 2 {
return Err(vm.new_type_error("encoder must return a tuple (object, integer)"));
}
Ok(tuple[0].clone())
}

#[pymethod]
fn reset(&self) {}

#[pymethod]
fn setstate(&self, _state: PyObjectRef) {}

#[pymethod]
fn getstate(&self, vm: &VirtualMachine) -> PyObjectRef {
vm.ctx.new_int(0).into()
}

#[pygetset]
fn name(&self) -> Option<PyStrRef> {
self.name.clone()
}
Comment on lines +2610 to +2653
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Validate the consumed-length element from codec output.

The wrapper ignores the second tuple item entirely; if it’s not an integer, malformed codecs won’t surface errors. Consider validating it to match the codec contract.

🛠️ Suggested guard for the consumed-length slot
             let tuple: PyTupleRef = res.try_into_value(vm)?;
             if tuple.len() != 2 {
                 return Err(vm.new_type_error("encoder must return a tuple (object, integer)"));
             }
+            let _consumed: isize = isize::try_from_object(vm, tuple[1].clone()).map_err(|_| {
+                vm.new_type_error("encoder must return a tuple (object, integer)")
+            })?;
             Ok(tuple[0].clone())
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
#[pyclass(module = "_io", name, no_attr)]
#[derive(Debug, PyPayload)]
struct StatelessIncrementalEncoder {
encode: PyObjectRef,
errors: Option<PyStrRef>,
name: Option<PyStrRef>,
}
#[pyclass]
impl StatelessIncrementalEncoder {
#[pymethod]
fn encode(
&self,
input: PyObjectRef,
_final: OptionalArg<bool>,
vm: &VirtualMachine,
) -> PyResult {
let mut args: Vec<PyObjectRef> = vec![input];
if let Some(errors) = &self.errors {
args.push(errors.to_owned().into());
}
let res = self.encode.call(args, vm)?;
let tuple: PyTupleRef = res.try_into_value(vm)?;
if tuple.len() != 2 {
return Err(vm.new_type_error("encoder must return a tuple (object, integer)"));
}
Ok(tuple[0].clone())
}
#[pymethod]
fn reset(&self) {}
#[pymethod]
fn setstate(&self, _state: PyObjectRef) {}
#[pymethod]
fn getstate(&self, vm: &VirtualMachine) -> PyObjectRef {
vm.ctx.new_int(0).into()
}
#[pygetset]
fn name(&self) -> Option<PyStrRef> {
self.name.clone()
}
#[pyclass(module = "_io", name, no_attr)]
#[derive(Debug, PyPayload)]
struct StatelessIncrementalEncoder {
encode: PyObjectRef,
errors: Option<PyStrRef>,
name: Option<PyStrRef>,
}
#[pyclass]
impl StatelessIncrementalEncoder {
#[pymethod]
fn encode(
&self,
input: PyObjectRef,
_final: OptionalArg<bool>,
vm: &VirtualMachine,
) -> PyResult {
let mut args: Vec<PyObjectRef> = vec![input];
if let Some(errors) = &self.errors {
args.push(errors.to_owned().into());
}
let res = self.encode.call(args, vm)?;
let tuple: PyTupleRef = res.try_into_value(vm)?;
if tuple.len() != 2 {
return Err(vm.new_type_error("encoder must return a tuple (object, integer)"));
}
let _consumed: isize = isize::try_from_object(vm, tuple[1].clone()).map_err(|_| {
vm.new_type_error("encoder must return a tuple (object, integer)")
})?;
Ok(tuple[0].clone())
}
#[pymethod]
fn reset(&self) {}
#[pymethod]
fn setstate(&self, _state: PyObjectRef) {}
#[pymethod]
fn getstate(&self, vm: &VirtualMachine) -> PyObjectRef {
vm.ctx.new_int(0).into()
}
#[pygetset]
fn name(&self) -> Option<PyStrRef> {
self.name.clone()
}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@crates/vm/src/stdlib/io.rs` around lines 2610 - 2653, The encode wrapper in
StatelessIncrementalEncoder currently ignores the second tuple item from the
codec result; update the encode method to validate that res.try_into_value(vm)?
yields a 2-tuple where the second element is an integer (non-negative if
appropriate to your codec contract) and raise a TypeError via vm.new_type_error
when it is missing or not an int (reusing the existing error message "encoder
must return a tuple (object, integer)"). Locate the encode method and the tuple
variable, convert/validate tuple[1] as an integer (e.g., via try_into_value or
checking .isinstance), and only then return tuple[0].clone(), ensuring malformed
codec outputs surface as errors.

}

#[pyclass(module = "_io", name, no_attr)]
#[derive(Debug, PyPayload)]
struct StatelessIncrementalDecoder {
decode: PyObjectRef,
errors: Option<PyStrRef>,
}

#[pyclass]
impl StatelessIncrementalDecoder {
#[pymethod]
fn decode(
&self,
input: PyObjectRef,
_final: OptionalArg<bool>,
vm: &VirtualMachine,
) -> PyResult {
let mut args: Vec<PyObjectRef> = vec![input];
if let Some(errors) = &self.errors {
args.push(errors.to_owned().into());
}
let res = self.decode.call(args, vm)?;
let tuple: PyTupleRef = res.try_into_value(vm)?;
if tuple.len() != 2 {
return Err(vm.new_type_error("decoder must return a tuple (object, integer)"));
}
Ok(tuple[0].clone())
}

#[pymethod]
fn getstate(&self, vm: &VirtualMachine) -> (PyBytesRef, u64) {
(vm.ctx.empty_bytes.to_owned(), 0)
}

#[pymethod]
fn setstate(&self, _state: PyTupleRef, _vm: &VirtualMachine) {}

#[pymethod]
fn reset(&self) {}
}
Comment on lines +2656 to +2694
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Validate the consumed-length element from codec output.

Same concern as the encoder wrapper: the second tuple element should be an integer; otherwise invalid codec implementations pass silently.

🛠️ Suggested guard for the consumed-length slot
             let tuple: PyTupleRef = res.try_into_value(vm)?;
             if tuple.len() != 2 {
                 return Err(vm.new_type_error("decoder must return a tuple (object, integer)"));
             }
+            let _consumed: isize = isize::try_from_object(vm, tuple[1].clone()).map_err(|_| {
+                vm.new_type_error("decoder must return a tuple (object, integer)")
+            })?;
             Ok(tuple[0].clone())
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@crates/vm/src/stdlib/io.rs` around lines 2656 - 2694, In
StatelessIncrementalDecoder::decode, validate that the second tuple element is
an integer and raise a TypeError if not; after converting res to PyTupleRef and
checking tuple.len(), try to convert tuple[1] to an integer (using the VM's
integer/index conversion helpers) and return Err(vm.new_type_error("decoder must
return a tuple (object, integer)")) when conversion fails, otherwise proceed to
return tuple[0].clone() as before. Ensure this check is applied in the decode
method to mirror the encoder wrapper's guard.


#[pyattr]
#[pyclass(name = "TextIOWrapper", base = _TextIOBase)]
#[derive(Debug, Default)]
Expand Down Expand Up @@ -2830,7 +2916,25 @@ mod _io {

let encoder = if vm.call_method(buffer, "writable", ())?.try_to_bool(vm)? {
let incremental_encoder =
codec.get_incremental_encoder(Some(errors.to_owned()), vm)?;
match codec.get_incremental_encoder(Some(errors.to_owned()), vm) {
Ok(encoder) => encoder,
Err(err)
if err.fast_isinstance(vm.ctx.exceptions.type_error)
|| err.fast_isinstance(vm.ctx.exceptions.attribute_error) =>
{
let name = vm
.get_attribute_opt(codec.as_tuple().to_owned().into(), "name")?
.and_then(|obj| obj.downcast::<PyStr>().ok());
StatelessIncrementalEncoder {
encode: codec.get_encode_func().to_owned(),
errors: Some(errors.to_owned()),
name,
}
.into_ref(&vm.ctx)
.into()
}
Err(err) => return Err(err),
};
let encoding_name = vm.get_attribute_opt(incremental_encoder.clone(), "name")?;
let encode_func = encoding_name.and_then(|name| {
let name = name.downcast_ref::<PyStr>()?;
Expand All @@ -2845,7 +2949,21 @@ mod _io {
};

let decoder = if vm.call_method(buffer, "readable", ())?.try_to_bool(vm)? {
let decoder = codec.get_incremental_decoder(Some(errors.to_owned()), vm)?;
let decoder = match codec.get_incremental_decoder(Some(errors.to_owned()), vm) {
Ok(decoder) => decoder,
Err(err)
if err.fast_isinstance(vm.ctx.exceptions.type_error)
|| err.fast_isinstance(vm.ctx.exceptions.attribute_error) =>
{
StatelessIncrementalDecoder {
decode: codec.get_decode_func().to_owned(),
errors: Some(errors.to_owned()),
}
.into_ref(&vm.ctx)
.into()
}
Err(err) => return Err(err),
};
if let Newlines::Universal | Newlines::Passthrough = newline {
let args = IncrementalNewlineDecoderArgs {
decoder,
Expand Down
27 changes: 18 additions & 9 deletions crates/vm/src/stdlib/sys.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ mod sys {
RUST_MULTIARCH.replace("-unknown", "")
}

#[pyclass(no_attr, name = "_BootstrapStderr", module = "sys")]
#[pyclass(no_attr, name = "_BootstrapStderr")]
#[derive(Debug, PyPayload)]
pub(super) struct BootstrapStderr;

Expand All @@ -95,7 +95,7 @@ mod sys {

/// Lightweight stdio wrapper for sandbox mode (no host_env).
/// Directly uses Rust's std::io for stdin/stdout/stderr without FileIO.
#[pyclass(no_attr, name = "_SandboxStdio", module = "sys")]
#[pyclass(no_attr, name = "_SandboxStdio")]
#[derive(Debug, PyPayload)]
pub struct SandboxStdio {
pub fd: i32,
Expand Down Expand Up @@ -1565,10 +1565,6 @@ mod sys {
safe_path: bool,
/// -X warn_default_encoding, PYTHONWARNDEFAULTENCODING
warn_default_encoding: u8,
/// -X thread_inherit_context, whether new threads inherit context from parent
thread_inherit_context: bool,
/// -X context_aware_warnings, whether warnings are context aware
context_aware_warnings: bool,
}

impl FlagsData {
Expand All @@ -1592,13 +1588,11 @@ mod sys {
int_max_str_digits: settings.int_max_str_digits,
safe_path: settings.safe_path,
warn_default_encoding: settings.warn_default_encoding as u8,
thread_inherit_context: settings.thread_inherit_context,
context_aware_warnings: settings.context_aware_warnings,
}
}
}

#[pystruct_sequence(name = "flags", module = "sys", data = "FlagsData", no_attr)]
#[pystruct_sequence(name = "flags", data = "FlagsData", no_attr)]
pub(super) struct PyFlags;

#[pyclass(with(PyStructSequence))]
Expand All @@ -1607,6 +1601,16 @@ mod sys {
fn slot_new(_cls: PyTypeRef, _args: FuncArgs, vm: &VirtualMachine) -> PyResult {
Err(vm.new_type_error("cannot create 'sys.flags' instances"))
}

#[pygetset]
fn context_aware_warnings(&self, vm: &VirtualMachine) -> bool {
vm.state.config.settings.context_aware_warnings
}

#[pygetset]
fn thread_inherit_context(&self, vm: &VirtualMachine) -> bool {
vm.state.config.settings.thread_inherit_context
}
}

#[cfg(feature = "threading")]
Expand Down Expand Up @@ -1776,10 +1780,15 @@ mod sys {
build: u32,
platform: u32,
service_pack: String,
#[pystruct_sequence(skip)]
service_pack_major: u16,
#[pystruct_sequence(skip)]
service_pack_minor: u16,
#[pystruct_sequence(skip)]
suite_mask: u16,
#[pystruct_sequence(skip)]
product_type: u8,
#[pystruct_sequence(skip)]
platform_version: (u32, u32, u32),
}

Expand Down
32 changes: 24 additions & 8 deletions crates/vm/src/vm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,21 @@ impl VirtualMachine {
self.state
.codec_registry
.register_manual("utf8", utf8_codec)?;

// Register latin-1 / iso8859-1 aliases needed very early for stdio
// bootstrap (e.g. PYTHONIOENCODING=latin-1).
if cfg!(feature = "freeze-stdlib") {
self.import("encodings.latin_1", 0)?;
let latin1_module = sys_modules.get_item("encodings.latin_1", self)?;
let getregentry = latin1_module.get_attr("getregentry", self)?;
let codec_info = getregentry.call((), self)?;
let latin1_codec: crate::codecs::PyCodec = codec_info.try_into_value(self)?;
for name in ["latin-1", "latin_1", "latin1", "iso8859-1", "iso8859_1"] {
self.state
.codec_registry
.register_manual(name, latin1_codec.clone())?;
}
}
Ok(())
}

Expand Down Expand Up @@ -367,8 +382,8 @@ impl VirtualMachine {
let io = import::import_builtin(self, "_io")?;

// Full stdio: FileIO → BufferedWriter → TextIOWrapper
#[cfg(feature = "host_env")]
let make_stdio = |name: &str, fd: i32, write: bool| {
#[cfg(all(feature = "host_env", feature = "stdio"))]
let make_stdio = |name: &str, fd: i32, write: bool| -> PyResult<PyObjectRef> {
let buffered_stdio = self.state.config.settings.buffered_stdio;
let unbuffered = write && !buffered_stdio;
let buf = crate::stdlib::io::open(
Expand Down Expand Up @@ -397,12 +412,13 @@ impl VirtualMachine {
let errors = if fd == 2 {
Some("backslashreplace")
} else {
self.state
.config
.settings
.stdio_errors
.as_deref()
.or(Some("surrogateescape"))
self.state.config.settings.stdio_errors.as_deref().or(
if self.state.config.settings.stdio_encoding.is_some() {
Some("strict")
} else {
Some("surrogateescape")
},
)
};

let stdio = self.call_method(
Expand Down
Loading