RustPython · youknowone · Feb 6, 2026 · Feb 4, 2026 · Feb 5, 2026 · Feb 5, 2026
diff --git a/Lib/_pycodecs.py b/Lib/_pycodecs.py
diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py
@@ -172,3 +172,23 @@ def _alias_mbcs(encoding):
             pass
 
     codecs.register(_alias_mbcs)
+
+    from ._win_cp_codecs import create_win32_code_page_codec
+
+    def win32_code_page_search_function(encoding):
+        encoding = encoding.lower()
+        if not encoding.startswith('cp'):
+            return None
+        try:
+            cp = int(encoding[2:])
+        except ValueError:
+            return None
+        # Test if the code page is supported
+        try:
+            codecs.code_page_encode(cp, 'x')
+        except (OverflowError, OSError):
+            return None
+
+        return create_win32_code_page_codec(cp)
+
+    codecs.register(win32_code_page_search_function)
diff --git a/Lib/encodings/_win_cp_codecs.py b/Lib/encodings/_win_cp_codecs.py
@@ -0,0 +1,36 @@
+import codecs
+
+def create_win32_code_page_codec(cp):
+    from codecs import code_page_encode, code_page_decode
+
+    def encode(input, errors='strict'):
+        return code_page_encode(cp, input, errors)
+
+    def decode(input, errors='strict'):
+        return code_page_decode(cp, input, errors, True)
+
+    class IncrementalEncoder(codecs.IncrementalEncoder):
+        def encode(self, input, final=False):
+            return code_page_encode(cp, input, self.errors)[0]
+
+    class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
+        def _buffer_decode(self, input, errors, final):
+            return code_page_decode(cp, input, errors, final)
+
+    class StreamWriter(codecs.StreamWriter):
+        def encode(self, input, errors='strict'):
+            return code_page_encode(cp, input, errors)
+
+    class StreamReader(codecs.StreamReader):
+        def decode(self, input, errors, final):
+            return code_page_decode(cp, input, errors, final)
+
+    return codecs.CodecInfo(
+        name=f'cp{cp}',
+        encode=encode,
+        decode=decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
diff --git a/Lib/test/test_bz2.py b/Lib/test/test_bz2.py
@@ -730,7 +730,7 @@ def testOpenBytesFilename(self):
             self.assertEqual(f.read(), self.DATA)
             self.assertEqual(f.name, str_filename)
 
-    @unittest.expectedFailure # TODO: RUSTPYTHON
+    @unittest.expectedFailure  # TODO: RUSTPYTHON; AssertionError: <FakePath 'Z:\\TEMP\\tmphoipjcen'> != 'Z:\\TEMP\\tmphoipjcen'
     def testOpenPathLikeFilename(self):
         filename = FakePath(self.filename)
         with BZ2File(filename, "wb") as f:
@@ -1189,7 +1189,6 @@ def test_encoding_error_handler(self):
                 as f:
             self.assertEqual(f.read(), "foobar")
 
-    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_newline(self):
         # Test with explicit newline (universal newline mode disabled).
         text = self.TEXT.decode("ascii")

diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
@@ -465,7 +465,6 @@ class UTF32Test(ReadTest, unittest.TestCase):
               b'\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m'
               b'\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m')
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'?
     def test_only_one_bom(self):
         _,_,reader,writer = codecs.lookup(self.encoding)
         # encode some stream
@@ -481,7 +480,6 @@ def test_only_one_bom(self):
         f = reader(s)
         self.assertEqual(f.read(), "spamspam")
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'?
     def test_badbom(self):
         s = io.BytesIO(4*b"\xff")
         f = codecs.getreader(self.encoding)(s)
@@ -491,7 +489,6 @@ def test_badbom(self):
         f = codecs.getreader(self.encoding)(s)
         self.assertRaises(UnicodeDecodeError, f.read)
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'?
     def test_partial(self):
         self.check_partial(
             "\x00\xff\u0100\uffff\U00010000",
@@ -523,7 +520,6 @@ def test_partial(self):
             ]
         )
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON
     def test_handlers(self):
         self.assertEqual(('\ufffd', 1),
                          codecs.utf_32_decode(b'\x01', 'replace', True))
@@ -534,7 +530,6 @@ def test_errors(self):
         self.assertRaises(UnicodeDecodeError, codecs.utf_32_decode,
                           b"\xff", "strict", True)
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'?
     def test_decoder_state(self):
         self.check_state_handling_decode(self.encoding,
                                          "spamspam", self.spamle)
@@ -551,35 +546,24 @@ def test_issue8941(self):
         self.assertEqual('\U00010000' * 1024,
                          codecs.utf_32_decode(encoded_be)[0])
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON
-    def test_lone_surrogates(self):
-        return super().test_lone_surrogates()
-
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'?
     def test_bug1098990_a(self):
         return super().test_bug1098990_a()
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'?
     def test_bug1098990_b(self):
         return super().test_bug1098990_b()
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'?
     def test_bug1175396(self):
         return super().test_bug1175396()
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'?
     def test_incremental_surrogatepass(self):
         return super().test_incremental_surrogatepass()
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'?
     def test_mixed_readline_and_read(self):
         return super().test_mixed_readline_and_read()
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'?
     def test_readline(self):
         return super().test_readline()
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'?
     def test_readlinequeue(self):
         return super().test_readlinequeue()
 
@@ -636,10 +620,6 @@ def test_issue8941(self):
         self.assertEqual('\U00010000' * 1024,
                          codecs.utf_32_le_decode(encoded)[0])
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON
-    def test_lone_surrogates(self):
-        return super().test_lone_surrogates()
-
 
 
 
@@ -693,10 +673,6 @@ def test_issue8941(self):
         self.assertEqual('\U00010000' * 1024,
                          codecs.utf_32_be_decode(encoded)[0])
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON
-    def test_lone_surrogates(self):
-        return super().test_lone_surrogates()
-
 
 
 
@@ -739,7 +715,6 @@ def test_badbom(self):
         f = codecs.getreader(self.encoding)(s)
         self.assertRaises(UnicodeDecodeError, f.read)
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; UnicodeDecodeError: 'utf-16' codec can't decode bytes in position 0-1: unexpected end of data
     def test_partial(self):
         self.check_partial(
             "\x00\xff\u0100\uffff\U00010000",
@@ -761,7 +736,6 @@ def test_partial(self):
             ]
         )
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; IndexError: index out of range
     def test_handlers(self):
         self.assertEqual(('\ufffd', 1),
                          codecs.utf_16_decode(b'\x01', 'replace', True))
@@ -805,11 +779,6 @@ def test_invalid_modes(self):
             self.assertIn("can't have text and binary mode at once",
                           str(cm.exception))
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON
-    def test_lone_surrogates(self):
-        return super().test_lone_surrogates()
-
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; IndexError: index out of range
     def test_incremental_surrogatepass(self):
         return super().test_incremental_surrogatepass()
 
@@ -819,7 +788,6 @@ class UTF16LETest(ReadTest, unittest.TestCase):
     encoding = "utf-16-le"
     ill_formed_sequence = b"\x80\xdc"
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; UnicodeDecodeError: 'utf-16' codec can't decode bytes in position 0-1: unexpected end of data
     def test_partial(self):
         self.check_partial(
             "\x00\xff\u0100\uffff\U00010000",
@@ -839,7 +807,6 @@ def test_partial(self):
             ]
         )
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON
     def test_errors(self):
         tests = [
             (b'\xff', '\ufffd'),
@@ -861,11 +828,6 @@ def test_nonbmp(self):
         self.assertEqual(b'\x00\xd8\x03\xde'.decode(self.encoding),
                          "\U00010203")
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON
-    def test_lone_surrogates(self):
-        return super().test_lone_surrogates()
-
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; IndexError: index out of range
     def test_incremental_surrogatepass(self):
         return super().test_incremental_surrogatepass()
 
@@ -874,7 +836,6 @@ class UTF16BETest(ReadTest, unittest.TestCase):
     encoding = "utf-16-be"
     ill_formed_sequence = b"\xdc\x80"
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; UnicodeDecodeError: 'utf-16' codec can't decode bytes in position 0-1: unexpected end of data
     def test_partial(self):
         self.check_partial(
             "\x00\xff\u0100\uffff\U00010000",
@@ -894,7 +855,6 @@ def test_partial(self):
             ]
         )
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON
     def test_errors(self):
         tests = [
             (b'\xff', '\ufffd'),
@@ -916,11 +876,6 @@ def test_nonbmp(self):
         self.assertEqual(b'\xd8\x00\xde\x03'.decode(self.encoding),
                          "\U00010203")
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON
-    def test_lone_surrogates(self):
-        return super().test_lone_surrogates()
-
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; UnicodeDecodeError: 'utf-16' codec can't decode bytes in position 0-1: unexpected end of data
     def test_incremental_surrogatepass(self):
         return super().test_incremental_surrogatepass()
 
@@ -970,7 +925,6 @@ def test_decode_error(self):
                 self.assertEqual(data.decode(self.encoding, error_handler),
                                  expected)
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON
     def test_lone_surrogates(self):
         super().test_lone_surrogates()
         # not sure if this is making sense for
@@ -1023,7 +977,6 @@ def test_incremental_errors(self):
 class UTF7Test(ReadTest, unittest.TestCase):
     encoding = "utf-7"
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON
     def test_ascii(self):
         # Set D (directly encoded characters)
         set_d = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
@@ -1050,7 +1003,6 @@ def test_ascii(self):
                          b'+AAAAAQACAAMABAAFAAYABwAIAAsADAAOAA8AEAARABIAEwAU'
                          b'ABUAFgAXABgAGQAaABsAHAAdAB4AHwBcAH4Afw-')
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; TypeError: expected at least 5 arguments, got 1
     def test_partial(self):
         self.check_partial(
             'a+-b\x00c\x80d\u0100e\U00010000f',
@@ -1090,7 +1042,6 @@ def test_partial(self):
             ]
         )
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON
     def test_errors(self):
         tests = [
             (b'\xffb', '\ufffdb'),
@@ -1121,7 +1072,6 @@ def test_errors(self):
                                 raw, 'strict', True)
                 self.assertEqual(raw.decode('utf-7', 'replace'), expected)
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON
     def test_nonbmp(self):
         self.assertEqual('\U000104A0'.encode(self.encoding), b'+2AHcoA-')
         self.assertEqual('\ud801\udca0'.encode(self.encoding), b'+2AHcoA-')
@@ -1137,7 +1087,6 @@ def test_nonbmp(self):
         self.assertEqual(b'+IKwgrNgB3KA'.decode(self.encoding),
                          '\u20ac\u20ac\U000104A0')
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON
     def test_lone_surrogates(self):
         tests = [
             (b'a+2AE-b', 'a\ud801b'),
@@ -1158,15 +1107,9 @@ def test_lone_surrogates(self):
             with self.subTest(raw=raw):
                 self.assertEqual(raw.decode('utf-7', 'replace'), expected)
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON
-    def test_bug1175396(self):
-        return super().test_bug1175396()
-
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; TypeError: expected at least 5 arguments, got 1
     def test_readline(self):
         return super().test_readline()
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; TypeError: utf_7_decode() takes from 1 to 2 positional arguments but 3 were given
     def test_incremental_surrogatepass(self):
         return super().test_incremental_surrogatepass()
 
@@ -3062,7 +3005,6 @@ def test_latin1(self):
 
 
 class BomTest(unittest.TestCase):
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'?
     def test_seek0(self):
         data = "1234567890"
         tests = ("utf-16",
@@ -3457,7 +3399,6 @@ def test_invalid_code_page(self):
         self.assertRaises(OSError, codecs.code_page_encode, 123, 'a')
         self.assertRaises(OSError, codecs.code_page_decode, 123, b'a')
 
-    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_code_page_name(self):
         self.assertRaisesRegex(UnicodeEncodeError, 'cp932',
             codecs.code_page_encode, 932, '\xff')
@@ -3524,7 +3465,7 @@ def check_encode(self, cp, tests):
                 self.assertRaises(UnicodeEncodeError,
                     text.encode, f'cp{cp}', errors)
 
-    @unittest.expectedFailure # TODO: RUSTPYTHON
+    @unittest.expectedFailure  # TODO: RUSTPYTHON
     def test_cp932(self):
         self.check_encode(932, (
             ('abc', 'strict', b'abc'),
@@ -3559,7 +3500,6 @@ def test_cp932(self):
             (b'\x81\x00abc', 'backslashreplace', '\\x81\x00abc'),
         ))
 
-    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_cp1252(self):
         self.check_encode(1252, (
             ('abc', 'strict', b'abc'),
@@ -3578,7 +3518,6 @@ def test_cp1252(self):
             (b'\xff', 'strict', '\xff'),
         ))
 
-    @unittest.expectedFailureIfWindows("TODO: RUSTPYTHON")
     def test_cp708(self):
         self.check_encode(708, (
             ('abc2%', 'strict', b'abc2%'),
@@ -3608,7 +3547,6 @@ def test_cp708(self):
             (b'[\xa0]', 'surrogatepass', None),
         ))
 
-    @unittest.expectedFailureIfWindows("TODO: RUSTPYTHON")
     def test_cp20106(self):
         self.check_encode(20106, (
             ('abc', 'strict', b'abc'),
@@ -3633,7 +3571,7 @@ def test_cp20106(self):
             (b'(\xbf)', 'surrogatepass', None),
         ))
 
-    @unittest.expectedFailure # TODO: RUSTPYTHON
+    @unittest.expectedFailure  # TODO: RUSTPYTHON  # TODO: RUSTPYTHON
     def test_cp_utf7(self):
         cp = 65000
         self.check_encode(cp, (
@@ -3654,7 +3592,6 @@ def test_cp_utf7(self):
             (b'[\xff]', 'strict', '[\xff]'),
         ))
 
-    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_multibyte_encoding(self):
         self.check_decode(932, (
             (b'\x84\xe9\x80', 'ignore', '\u9a3e'),
@@ -3688,7 +3625,6 @@ def test_code_page_decode_flags(self):
         self.assertEqual(codecs.code_page_decode(42, b'abc'),
                          ('\uf061\uf062\uf063', 3))
 
-    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_incremental(self):
         decoded = codecs.code_page_decode(932, b'\x82', 'strict', False)
         self.assertEqual(decoded, ('', 0))