Skip to content

Commit c8cdbf7

Browse files
author
martin.v.loewis
committed
Issue #3811: The Unicode database was updated to 5.1.
Reviewed by Fredrik Lundh and Marc-Andre Lemburg. git-svn-id: http://svn.python.org/projects/python/trunk@66362 6015fed2-1504-0410-9fe1-9d1591cc4771
1 parent a2bfe60 commit c8cdbf7

9 files changed

Lines changed: 18540 additions & 15272 deletions

File tree

Doc/library/unicodedata.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@
1616

1717
This module provides access to the Unicode Character Database which defines
1818
character properties for all Unicode characters. The data in this database is
19-
based on the :file:`UnicodeData.txt` file version 4.1.0 which is publicly
19+
based on the :file:`UnicodeData.txt` file version 5.1.0 which is publicly
2020
available from ftp://ftp.unicode.org/.
2121

2222
The module uses the same names and symbols as defined by the UnicodeData File
23-
Format 4.1.0 (see http://www.unicode.org/Public/4.1.0/ucd/UCD.html). It defines
23+
Format 5.1.0 (see http://www.unicode.org/Public/5.1.0/ucd/UCD.html). It defines
2424
the following functions:
2525

2626

Lib/test/test_unicodedata.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
class UnicodeMethodsTest(unittest.TestCase):
1717

1818
# update this, if the database changes
19-
expectedchecksum = 'c198ed264497f108434b3f576d4107237221cc8a'
19+
expectedchecksum = 'aef99984a58c8e1e5363a3175f2ff9608599a93e'
2020

2121
def test_method_checksum(self):
2222
h = hashlib.sha1()
@@ -75,7 +75,7 @@ def tearDown(self):
7575
class UnicodeFunctionsTest(UnicodeDatabaseTest):
7676

7777
# update this, if the database changes
78-
expectedchecksum = '4e389f97e9f88b8b7ab743121fd643089116f9f2'
78+
expectedchecksum = '3136d5afd787dc2bcb1bdcac95e385349fbebbca'
7979

8080
def test_function_checksum(self):
8181
data = []
@@ -225,6 +225,16 @@ def test_digit_numeric_consistent(self):
225225
def test_bug_1704793(self):
226226
self.assertEquals(self.db.lookup("GOTHIC LETTER FAIHU"), u'\U00010346')
227227

228+
def test_ucd_510(self):
229+
import unicodedata
230+
# In UCD 5.1.0, a mirrored property changed wrt. UCD 3.2.0
231+
self.assert_(unicodedata.mirrored(u"\u0f3a"))
232+
self.assert_(not unicodedata.ucd_3_2_0.mirrored(u"\u0f3a"))
233+
# Also, we now have two ways of representing
234+
# the upper-case mapping: as delta, or as absolute value
235+
self.assert_(u"a".upper()==u'A')
236+
self.assert_(u"\u1d79".upper()==u'\ua77d')
237+
228238
def test_main():
229239
test.test_support.run_unittest(
230240
UnicodeMiscTest,

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ C-API
6868
Library
6969
-------
7070

71+
- Issue #3811: The Unicode database was updated to 5.1.
72+
7173
- Issue #3809: Fixed spurious 'test.blah' file left behind by test_logging.
7274

7375
- Issue 3781: Clean up the API for warnings.catch_warnings() by having it

Modules/unicodedata.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
/* ------------------------------------------------------------------------
22
3-
unicodedata -- Provides access to the Unicode 4.1 data base.
3+
unicodedata -- Provides access to the Unicode 5.1 data base.
44
5-
Data was extracted from the Unicode 4.1 UnicodeData.txt file.
5+
Data was extracted from the Unicode 5.1 UnicodeData.txt file.
66
77
Written by Marc-Andre Lemburg ([email protected]).
88
Modified for Python 2.0 by Fredrik Lundh ([email protected])
@@ -34,6 +34,7 @@ typedef struct change_record {
3434
const unsigned char bidir_changed;
3535
const unsigned char category_changed;
3636
const unsigned char decimal_changed;
37+
const unsigned char mirrored_changed;
3738
const int numeric_changed;
3839
} change_record;
3940

@@ -354,6 +355,8 @@ unicodedata_mirrored(PyObject *self, PyObject *args)
354355
const change_record *old = get_old_record(self, c);
355356
if (old->category_changed == 0)
356357
index = 0; /* unassigned */
358+
else if (old->mirrored_changed != 0xFF)
359+
index = old->mirrored_changed;
357360
}
358361
return PyInt_FromLong(index);
359362
}
@@ -1177,11 +1180,11 @@ PyDoc_STRVAR(unicodedata_docstring,
11771180
"This module provides access to the Unicode Character Database which\n\
11781181
defines character properties for all Unicode characters. The data in\n\
11791182
this database is based on the UnicodeData.txt file version\n\
1180-
4.1.0 which is publically available from ftp://ftp.unicode.org/.\n\
1183+
5.1.0 which is publically available from ftp://ftp.unicode.org/.\n\
11811184
\n\
11821185
The module uses the same names and symbols as defined by the\n\
1183-
UnicodeData File Format 4.1.0 (see\n\
1184-
http://www.unicode.org/Public/4.1.0/ucd/UCD.html).");
1186+
UnicodeData File Format 5.1.0 (see\n\
1187+
http://www.unicode.org/Public/5.1.0/ucd/UCD.html).");
11851188

11861189
PyMODINIT_FUNC
11871190
initunicodedata(void)

0 commit comments

Comments
 (0)