Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
606 changes: 366 additions & 240 deletions Lib/_pycodecs.py

Large diffs are not rendered by default.

20 changes: 20 additions & 0 deletions Lib/encodings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,3 +172,23 @@ def _alias_mbcs(encoding):
pass

codecs.register(_alias_mbcs)

from ._win_cp_codecs import create_win32_code_page_codec

def win32_code_page_search_function(encoding):
encoding = encoding.lower()
if not encoding.startswith('cp'):
return None
try:
cp = int(encoding[2:])
except ValueError:
return None
# Test if the code page is supported
try:
codecs.code_page_encode(cp, 'x')
except (OverflowError, OSError):
return None

return create_win32_code_page_codec(cp)

codecs.register(win32_code_page_search_function)
36 changes: 36 additions & 0 deletions Lib/encodings/_win_cp_codecs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import codecs

def create_win32_code_page_codec(cp):
from codecs import code_page_encode, code_page_decode

def encode(input, errors='strict'):
return code_page_encode(cp, input, errors)

def decode(input, errors='strict'):
return code_page_decode(cp, input, errors, True)

class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return code_page_encode(cp, input, self.errors)[0]

class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
def _buffer_decode(self, input, errors, final):
return code_page_decode(cp, input, errors, final)

class StreamWriter(codecs.StreamWriter):
def encode(self, input, errors='strict'):
return code_page_encode(cp, input, errors)

class StreamReader(codecs.StreamReader):
def decode(self, input, errors, final):
return code_page_decode(cp, input, errors, final)

return codecs.CodecInfo(
name=f'cp{cp}',
encode=encode,
decode=decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
3 changes: 1 addition & 2 deletions Lib/test/test_bz2.py
Original file line number Diff line number Diff line change
Expand Up @@ -730,7 +730,7 @@ def testOpenBytesFilename(self):
self.assertEqual(f.read(), self.DATA)
self.assertEqual(f.name, str_filename)

@unittest.expectedFailure # TODO: RUSTPYTHON
@unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: <FakePath 'Z:\\TEMP\\tmphoipjcen'> != 'Z:\\TEMP\\tmphoipjcen'
def testOpenPathLikeFilename(self):
filename = FakePath(self.filename)
with BZ2File(filename, "wb") as f:
Expand Down Expand Up @@ -1189,7 +1189,6 @@ def test_encoding_error_handler(self):
as f:
self.assertEqual(f.read(), "foobar")

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_newline(self):
# Test with explicit newline (universal newline mode disabled).
text = self.TEXT.decode("ascii")
Expand Down
68 changes: 2 additions & 66 deletions Lib/test/test_codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,6 @@ class UTF32Test(ReadTest, unittest.TestCase):
b'\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m'
b'\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m')

@unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'?
def test_only_one_bom(self):
_,_,reader,writer = codecs.lookup(self.encoding)
# encode some stream
Expand All @@ -481,7 +480,6 @@ def test_only_one_bom(self):
f = reader(s)
self.assertEqual(f.read(), "spamspam")

@unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'?
def test_badbom(self):
s = io.BytesIO(4*b"\xff")
f = codecs.getreader(self.encoding)(s)
Expand All @@ -491,7 +489,6 @@ def test_badbom(self):
f = codecs.getreader(self.encoding)(s)
self.assertRaises(UnicodeDecodeError, f.read)

@unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'?
def test_partial(self):
self.check_partial(
"\x00\xff\u0100\uffff\U00010000",
Expand Down Expand Up @@ -523,7 +520,6 @@ def test_partial(self):
]
)

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_handlers(self):
self.assertEqual(('\ufffd', 1),
codecs.utf_32_decode(b'\x01', 'replace', True))
Expand All @@ -534,7 +530,6 @@ def test_errors(self):
self.assertRaises(UnicodeDecodeError, codecs.utf_32_decode,
b"\xff", "strict", True)

@unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'?
def test_decoder_state(self):
self.check_state_handling_decode(self.encoding,
"spamspam", self.spamle)
Expand All @@ -551,35 +546,24 @@ def test_issue8941(self):
self.assertEqual('\U00010000' * 1024,
codecs.utf_32_decode(encoded_be)[0])

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_lone_surrogates(self):
return super().test_lone_surrogates()

@unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'?
def test_bug1098990_a(self):
return super().test_bug1098990_a()

@unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'?
def test_bug1098990_b(self):
return super().test_bug1098990_b()

@unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'?
def test_bug1175396(self):
return super().test_bug1175396()

@unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'?
def test_incremental_surrogatepass(self):
return super().test_incremental_surrogatepass()

@unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'?
def test_mixed_readline_and_read(self):
return super().test_mixed_readline_and_read()

@unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'?
def test_readline(self):
return super().test_readline()

@unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'?
def test_readlinequeue(self):
return super().test_readlinequeue()

Expand Down Expand Up @@ -636,10 +620,6 @@ def test_issue8941(self):
self.assertEqual('\U00010000' * 1024,
codecs.utf_32_le_decode(encoded)[0])

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_lone_surrogates(self):
return super().test_lone_surrogates()




Expand Down Expand Up @@ -693,10 +673,6 @@ def test_issue8941(self):
self.assertEqual('\U00010000' * 1024,
codecs.utf_32_be_decode(encoded)[0])

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_lone_surrogates(self):
return super().test_lone_surrogates()




Expand Down Expand Up @@ -739,7 +715,6 @@ def test_badbom(self):
f = codecs.getreader(self.encoding)(s)
self.assertRaises(UnicodeDecodeError, f.read)

@unittest.expectedFailure # TODO: RUSTPYTHON; UnicodeDecodeError: 'utf-16' codec can't decode bytes in position 0-1: unexpected end of data
def test_partial(self):
self.check_partial(
"\x00\xff\u0100\uffff\U00010000",
Expand All @@ -761,7 +736,6 @@ def test_partial(self):
]
)

@unittest.expectedFailure # TODO: RUSTPYTHON; IndexError: index out of range
def test_handlers(self):
self.assertEqual(('\ufffd', 1),
codecs.utf_16_decode(b'\x01', 'replace', True))
Expand Down Expand Up @@ -805,11 +779,6 @@ def test_invalid_modes(self):
self.assertIn("can't have text and binary mode at once",
str(cm.exception))

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_lone_surrogates(self):
return super().test_lone_surrogates()

@unittest.expectedFailure # TODO: RUSTPYTHON; IndexError: index out of range
def test_incremental_surrogatepass(self):
return super().test_incremental_surrogatepass()

Expand All @@ -819,7 +788,6 @@ class UTF16LETest(ReadTest, unittest.TestCase):
encoding = "utf-16-le"
ill_formed_sequence = b"\x80\xdc"

@unittest.expectedFailure # TODO: RUSTPYTHON; UnicodeDecodeError: 'utf-16' codec can't decode bytes in position 0-1: unexpected end of data
def test_partial(self):
self.check_partial(
"\x00\xff\u0100\uffff\U00010000",
Expand All @@ -839,7 +807,6 @@ def test_partial(self):
]
)

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_errors(self):
tests = [
(b'\xff', '\ufffd'),
Expand All @@ -861,11 +828,6 @@ def test_nonbmp(self):
self.assertEqual(b'\x00\xd8\x03\xde'.decode(self.encoding),
"\U00010203")

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_lone_surrogates(self):
return super().test_lone_surrogates()

@unittest.expectedFailure # TODO: RUSTPYTHON; IndexError: index out of range
def test_incremental_surrogatepass(self):
return super().test_incremental_surrogatepass()

Expand All @@ -874,7 +836,6 @@ class UTF16BETest(ReadTest, unittest.TestCase):
encoding = "utf-16-be"
ill_formed_sequence = b"\xdc\x80"

@unittest.expectedFailure # TODO: RUSTPYTHON; UnicodeDecodeError: 'utf-16' codec can't decode bytes in position 0-1: unexpected end of data
def test_partial(self):
self.check_partial(
"\x00\xff\u0100\uffff\U00010000",
Expand All @@ -894,7 +855,6 @@ def test_partial(self):
]
)

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_errors(self):
tests = [
(b'\xff', '\ufffd'),
Expand All @@ -916,11 +876,6 @@ def test_nonbmp(self):
self.assertEqual(b'\xd8\x00\xde\x03'.decode(self.encoding),
"\U00010203")

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_lone_surrogates(self):
return super().test_lone_surrogates()

@unittest.expectedFailure # TODO: RUSTPYTHON; UnicodeDecodeError: 'utf-16' codec can't decode bytes in position 0-1: unexpected end of data
def test_incremental_surrogatepass(self):
return super().test_incremental_surrogatepass()

Expand Down Expand Up @@ -970,7 +925,6 @@ def test_decode_error(self):
self.assertEqual(data.decode(self.encoding, error_handler),
expected)

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_lone_surrogates(self):
super().test_lone_surrogates()
# not sure if this is making sense for
Expand Down Expand Up @@ -1023,7 +977,6 @@ def test_incremental_errors(self):
class UTF7Test(ReadTest, unittest.TestCase):
encoding = "utf-7"

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_ascii(self):
# Set D (directly encoded characters)
set_d = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Expand All @@ -1050,7 +1003,6 @@ def test_ascii(self):
b'+AAAAAQACAAMABAAFAAYABwAIAAsADAAOAA8AEAARABIAEwAU'
b'ABUAFgAXABgAGQAaABsAHAAdAB4AHwBcAH4Afw-')

@unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: expected at least 5 arguments, got 1
def test_partial(self):
self.check_partial(
'a+-b\x00c\x80d\u0100e\U00010000f',
Expand Down Expand Up @@ -1090,7 +1042,6 @@ def test_partial(self):
]
)

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_errors(self):
tests = [
(b'\xffb', '\ufffdb'),
Expand Down Expand Up @@ -1121,7 +1072,6 @@ def test_errors(self):
raw, 'strict', True)
self.assertEqual(raw.decode('utf-7', 'replace'), expected)

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_nonbmp(self):
self.assertEqual('\U000104A0'.encode(self.encoding), b'+2AHcoA-')
self.assertEqual('\ud801\udca0'.encode(self.encoding), b'+2AHcoA-')
Expand All @@ -1137,7 +1087,6 @@ def test_nonbmp(self):
self.assertEqual(b'+IKwgrNgB3KA'.decode(self.encoding),
'\u20ac\u20ac\U000104A0')

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_lone_surrogates(self):
tests = [
(b'a+2AE-b', 'a\ud801b'),
Expand All @@ -1158,15 +1107,9 @@ def test_lone_surrogates(self):
with self.subTest(raw=raw):
self.assertEqual(raw.decode('utf-7', 'replace'), expected)

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_bug1175396(self):
return super().test_bug1175396()

@unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: expected at least 5 arguments, got 1
def test_readline(self):
return super().test_readline()

@unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: utf_7_decode() takes from 1 to 2 positional arguments but 3 were given
def test_incremental_surrogatepass(self):
return super().test_incremental_surrogatepass()

Expand Down Expand Up @@ -3062,7 +3005,6 @@ def test_latin1(self):


class BomTest(unittest.TestCase):
@unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'?
def test_seek0(self):
data = "1234567890"
tests = ("utf-16",
Expand Down Expand Up @@ -3457,7 +3399,6 @@ def test_invalid_code_page(self):
self.assertRaises(OSError, codecs.code_page_encode, 123, 'a')
self.assertRaises(OSError, codecs.code_page_decode, 123, b'a')

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_code_page_name(self):
self.assertRaisesRegex(UnicodeEncodeError, 'cp932',
codecs.code_page_encode, 932, '\xff')
Expand Down Expand Up @@ -3524,7 +3465,7 @@ def check_encode(self, cp, tests):
self.assertRaises(UnicodeEncodeError,
text.encode, f'cp{cp}', errors)

@unittest.expectedFailure # TODO: RUSTPYTHON
@unittest.expectedFailure # TODO: RUSTPYTHON
def test_cp932(self):
self.check_encode(932, (
('abc', 'strict', b'abc'),
Expand Down Expand Up @@ -3559,7 +3500,6 @@ def test_cp932(self):
(b'\x81\x00abc', 'backslashreplace', '\\x81\x00abc'),
))

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_cp1252(self):
self.check_encode(1252, (
('abc', 'strict', b'abc'),
Expand All @@ -3578,7 +3518,6 @@ def test_cp1252(self):
(b'\xff', 'strict', '\xff'),
))

@unittest.expectedFailureIfWindows("TODO: RUSTPYTHON")
def test_cp708(self):
self.check_encode(708, (
('abc2%', 'strict', b'abc2%'),
Expand Down Expand Up @@ -3608,7 +3547,6 @@ def test_cp708(self):
(b'[\xa0]', 'surrogatepass', None),
))

@unittest.expectedFailureIfWindows("TODO: RUSTPYTHON")
def test_cp20106(self):
self.check_encode(20106, (
('abc', 'strict', b'abc'),
Expand All @@ -3633,7 +3571,7 @@ def test_cp20106(self):
(b'(\xbf)', 'surrogatepass', None),
))

@unittest.expectedFailure # TODO: RUSTPYTHON
@unittest.expectedFailure # TODO: RUSTPYTHON # TODO: RUSTPYTHON
def test_cp_utf7(self):
cp = 65000
self.check_encode(cp, (
Expand All @@ -3654,7 +3592,6 @@ def test_cp_utf7(self):
(b'[\xff]', 'strict', '[\xff]'),
))

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_multibyte_encoding(self):
self.check_decode(932, (
(b'\x84\xe9\x80', 'ignore', '\u9a3e'),
Expand Down Expand Up @@ -3688,7 +3625,6 @@ def test_code_page_decode_flags(self):
self.assertEqual(codecs.code_page_decode(42, b'abc'),
('\uf061\uf062\uf063', 3))

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_incremental(self):
decoded = codecs.code_page_decode(932, b'\x82', 'strict', False)
self.assertEqual(decoded, ('', 0))
Expand Down
Loading
Loading