|
19 | 19 | # data from |
20 | 20 | # http://snoops.roy202.org/testerman/browser/trunk/plugins/codecs/gsm0338.py |
21 | 21 |
|
22 | | -def_regular_mapping = [ |
23 | | - ('\x00', u'\u0040'), # COMMERCIAL AT |
24 | | -# ('\x00', u'\u0000'), # NULL (see note above) |
25 | | - ('\x01', u'\u00A3'), # POUND SIGN |
26 | | - ('\x02', u'\u0024'), # DOLLAR SIGN |
27 | | - ('\x03', u'\u00A5'), # YEN SIGN |
28 | | - ('\x04', u'\u00E8'), # LATIN SMALL LETTER E WITH GRAVE |
29 | | - ('\x05', u'\u00E9'), # LATIN SMALL LETTER E WITH ACUTE |
30 | | - ('\x06', u'\u00F9'), # LATIN SMALL LETTER U WITH GRAVE |
31 | | - ('\x07', u'\u00EC'), # LATIN SMALL LETTER I WITH GRAVE |
32 | | - ('\x08', u'\u00F2'), # LATIN SMALL LETTER O WITH GRAVE |
33 | | - ('\x09', u'\u00C7'), # LATIN CAPITAL LETTER C WITH CEDILLA |
34 | | - # The Unicode page suggests this is a mistake, but |
35 | | - # it's still in the latest version of the spec and |
36 | | - # our implementation has to be exact. |
| 22 | +# default GSM 03.38 -> unicode |
| 23 | +def_regular_decode_dict = { |
| 24 | + '\x00': u'\u0040', # COMMERCIAL AT |
| 25 | + '\x01': u'\u00A3', # POUND SIGN |
| 26 | + '\x02': u'\u0024', # DOLLAR SIGN |
| 27 | + '\x03': u'\u00A5', # YEN SIGN |
| 28 | + '\x04': u'\u00E8', # LATIN SMALL LETTER E WITH GRAVE |
| 29 | + '\x05': u'\u00E9', # LATIN SMALL LETTER E WITH ACUTE |
| 30 | + '\x06': u'\u00F9', # LATIN SMALL LETTER U WITH GRAVE |
| 31 | + '\x07': u'\u00EC', # LATIN SMALL LETTER I WITH GRAVE |
| 32 | + '\x08': u'\u00F2', # LATIN SMALL LETTER O WITH GRAVE |
| 33 | + '\x09': u'\u00C7', # LATIN CAPITAL LETTER C WITH CEDILLA |
| 34 | + # The Unicode page suggests this is a mistake: but |
| 35 | + # it's still in the latest version of the spec and |
| 36 | + # our implementation has to be exact. |
37 | 37 |
|
38 | | - ('\x0A', u'\u000A'), # LINE FEED |
39 | | - ('\x0B', u'\u00D8'), # LATIN CAPITAL LETTER O WITH STROKE |
40 | | - ('\x0C', u'\u00F8'), # LATIN SMALL LETTER O WITH STROKE |
41 | | - ('\x0D', u'\u000D'), # CARRIAGE RETURN |
42 | | - ('\x0E', u'\u00C5'), # LATIN CAPITAL LETTER A WITH RING ABOVE |
43 | | - ('\x0F', u'\u00E5'), # LATIN SMALL LETTER A WITH RING ABOVE |
44 | | - ('\x10', u'\u0394'), # GREEK CAPITAL LETTER DELTA |
45 | | - ('\x11', u'\u005F'), # LOW LINE |
46 | | - ('\x12', u'\u03A6'), # GREEK CAPITAL LETTER PHI |
47 | | - ('\x13', u'\u0393'), # GREEK CAPITAL LETTER GAMMA |
48 | | - ('\x14', u'\u039B'), # GREEK CAPITAL LETTER LAMDA |
49 | | - ('\x15', u'\u03A9'), # GREEK CAPITAL LETTER OMEGA |
50 | | - ('\x16', u'\u03A0'), # GREEK CAPITAL LETTER PI |
51 | | - ('\x17', u'\u03A8'), # GREEK CAPITAL LETTER PSI |
52 | | - ('\x18', u'\u03A3'), # GREEK CAPITAL LETTER SIGMA |
53 | | - ('\x19', u'\u0398'), # GREEK CAPITAL LETTER THETA |
54 | | - ('\x1A', u'\u039E'), # GREEK CAPITAL LETTER XI |
55 | | - ('\x1C', u'\u00C6'), # LATIN CAPITAL LETTER AE |
56 | | - ('\x1D', u'\u00E6'), # LATIN SMALL LETTER AE |
57 | | - ('\x1E', u'\u00DF'), # LATIN SMALL LETTER SHARP S (German) |
58 | | - ('\x1F', u'\u00C9'), # LATIN CAPITAL LETTER E WITH ACUTE |
59 | | - ('\x20', u'\u0020'), # SPACE |
60 | | - ('\x21', u'\u0021'), # EXCLAMATION MARK |
61 | | - ('\x22', u'\u0022'), # QUOTATION MARK |
62 | | - ('\x23', u'\u0023'), # NUMBER SIGN |
63 | | - ('\x24', u'\u00A4'), # CURRENCY SIGN |
64 | | - ('\x25', u'\u0025'), # PERCENT SIGN |
65 | | - ('\x26', u'\u0026'), # AMPERSAND |
66 | | - ('\x27', u'\u0027'), # APOSTROPHE |
67 | | - ('\x28', u'\u0028'), # LEFT PARENTHESIS |
68 | | - ('\x29', u'\u0029'), # RIGHT PARENTHESIS |
69 | | - ('\x2A', u'\u002A'), # ASTERISK |
70 | | - ('\x2B', u'\u002B'), # PLUS SIGN |
71 | | - ('\x2C', u'\u002C'), # COMMA |
72 | | - ('\x2D', u'\u002D'), # HYPHEN-MINUS |
73 | | - ('\x2E', u'\u002E'), # FULL STOP |
74 | | - ('\x2F', u'\u002F'), # SOLIDUS |
75 | | - ('\x30', u'\u0030'), # DIGIT ZERO |
76 | | - ('\x31', u'\u0031'), # DIGIT ONE |
77 | | - ('\x32', u'\u0032'), # DIGIT TWO |
78 | | - ('\x33', u'\u0033'), # DIGIT THREE |
79 | | - ('\x34', u'\u0034'), # DIGIT FOUR |
80 | | - ('\x35', u'\u0035'), # DIGIT FIVE |
81 | | - ('\x36', u'\u0036'), # DIGIT SIX |
82 | | - ('\x37', u'\u0037'), # DIGIT SEVEN |
83 | | - ('\x38', u'\u0038'), # DIGIT EIGHT |
84 | | - ('\x39', u'\u0039'), # DIGIT NINE |
85 | | - ('\x3A', u'\u003A'), # COLON |
86 | | - ('\x3B', u'\u003B'), # SEMICOLON |
87 | | - ('\x3C', u'\u003C'), # LESS-THAN SIGN |
88 | | - ('\x3D', u'\u003D'), # EQUALS SIGN |
89 | | - ('\x3E', u'\u003E'), # GREATER-THAN SIGN |
90 | | - ('\x3F', u'\u003F'), # QUESTION MARK |
91 | | - ('\x40', u'\u00A1'), # INVERTED EXCLAMATION MARK |
92 | | - ('\x41', u'\u0041'), # LATIN CAPITAL LETTER A |
93 | | - ('\x42', u'\u0042'), # LATIN CAPITAL LETTER B |
94 | | - ('\x43', u'\u0043'), # LATIN CAPITAL LETTER C |
95 | | - ('\x44', u'\u0044'), # LATIN CAPITAL LETTER D |
96 | | - ('\x45', u'\u0045'), # LATIN CAPITAL LETTER E |
97 | | - ('\x46', u'\u0046'), # LATIN CAPITAL LETTER F |
98 | | - ('\x47', u'\u0047'), # LATIN CAPITAL LETTER G |
99 | | - ('\x48', u'\u0048'), # LATIN CAPITAL LETTER H |
100 | | - ('\x49', u'\u0049'), # LATIN CAPITAL LETTER I |
101 | | - ('\x4A', u'\u004A'), # LATIN CAPITAL LETTER J |
102 | | - ('\x4B', u'\u004B'), # LATIN CAPITAL LETTER K |
103 | | - ('\x4C', u'\u004C'), # LATIN CAPITAL LETTER L |
104 | | - ('\x4D', u'\u004D'), # LATIN CAPITAL LETTER M |
105 | | - ('\x4E', u'\u004E'), # LATIN CAPITAL LETTER N |
106 | | - ('\x4F', u'\u004F'), # LATIN CAPITAL LETTER O |
107 | | - ('\x50', u'\u0050'), # LATIN CAPITAL LETTER P |
108 | | - ('\x51', u'\u0051'), # LATIN CAPITAL LETTER Q |
109 | | - ('\x52', u'\u0052'), # LATIN CAPITAL LETTER R |
110 | | - ('\x53', u'\u0053'), # LATIN CAPITAL LETTER S |
111 | | - ('\x54', u'\u0054'), # LATIN CAPITAL LETTER T |
112 | | - ('\x55', u'\u0055'), # LATIN CAPITAL LETTER U |
113 | | - ('\x56', u'\u0056'), # LATIN CAPITAL LETTER V |
114 | | - ('\x57', u'\u0057'), # LATIN CAPITAL LETTER W |
115 | | - ('\x58', u'\u0058'), # LATIN CAPITAL LETTER X |
116 | | - ('\x59', u'\u0059'), # LATIN CAPITAL LETTER Y |
117 | | - ('\x5A', u'\u005A'), # LATIN CAPITAL LETTER Z |
118 | | - ('\x5B', u'\u00C4'), # LATIN CAPITAL LETTER A WITH DIAERESIS |
119 | | - ('\x5C', u'\u00D6'), # LATIN CAPITAL LETTER O WITH DIAERESIS |
120 | | - ('\x5D', u'\u00D1'), # LATIN CAPITAL LETTER N WITH TILDE |
121 | | - ('\x5E', u'\u00DC'), # LATIN CAPITAL LETTER U WITH DIAERESIS |
122 | | - ('\x5F', u'\u00A7'), # SECTION SIGN |
123 | | - ('\x60', u'\u00BF'), # INVERTED QUESTION MARK |
124 | | - ('\x61', u'\u0061'), # LATIN SMALL LETTER A |
125 | | - ('\x62', u'\u0062'), # LATIN SMALL LETTER B |
126 | | - ('\x63', u'\u0063'), # LATIN SMALL LETTER C |
127 | | - ('\x64', u'\u0064'), # LATIN SMALL LETTER D |
128 | | - ('\x65', u'\u0065'), # LATIN SMALL LETTER E |
129 | | - ('\x66', u'\u0066'), # LATIN SMALL LETTER F |
130 | | - ('\x67', u'\u0067'), # LATIN SMALL LETTER G |
131 | | - ('\x68', u'\u0068'), # LATIN SMALL LETTER H |
132 | | - ('\x69', u'\u0069'), # LATIN SMALL LETTER I |
133 | | - ('\x6A', u'\u006A'), # LATIN SMALL LETTER J |
134 | | - ('\x6B', u'\u006B'), # LATIN SMALL LETTER K |
135 | | - ('\x6C', u'\u006C'), # LATIN SMALL LETTER L |
136 | | - ('\x6D', u'\u006D'), # LATIN SMALL LETTER M |
137 | | - ('\x6E', u'\u006E'), # LATIN SMALL LETTER N |
138 | | - ('\x6F', u'\u006F'), # LATIN SMALL LETTER O |
139 | | - ('\x70', u'\u0070'), # LATIN SMALL LETTER P |
140 | | - ('\x71', u'\u0071'), # LATIN SMALL LETTER Q |
141 | | - ('\x72', u'\u0072'), # LATIN SMALL LETTER R |
142 | | - ('\x73', u'\u0073'), # LATIN SMALL LETTER S |
143 | | - ('\x74', u'\u0074'), # LATIN SMALL LETTER T |
144 | | - ('\x75', u'\u0075'), # LATIN SMALL LETTER U |
145 | | - ('\x76', u'\u0076'), # LATIN SMALL LETTER V |
146 | | - ('\x77', u'\u0077'), # LATIN SMALL LETTER W |
147 | | - ('\x78', u'\u0078'), # LATIN SMALL LETTER X |
148 | | - ('\x79', u'\u0079'), # LATIN SMALL LETTER Y |
149 | | - ('\x7A', u'\u007A'), # LATIN SMALL LETTER Z |
150 | | - ('\x7B', u'\u00E4'), # LATIN SMALL LETTER A WITH DIAERESIS |
151 | | - ('\x7C', u'\u00F6'), # LATIN SMALL LETTER O WITH DIAERESIS |
152 | | - ('\x7D', u'\u00F1'), # LATIN SMALL LETTER N WITH TILDE |
153 | | - ('\x7E', u'\u00FC'), # LATIN SMALL LETTER U WITH DIAERESIS |
154 | | - ('\x7F', u'\u00E0'), # LATIN SMALL LETTER A WITH GRAVE |
155 | | -] |
| 38 | + '\x0A': u'\u000A', # LINE FEED |
| 39 | + '\x0B': u'\u00D8', # LATIN CAPITAL LETTER O WITH STROKE |
| 40 | + '\x0C': u'\u00F8', # LATIN SMALL LETTER O WITH STROKE |
| 41 | + '\x0D': u'\u000D', # CARRIAGE RETURN |
| 42 | + '\x0E': u'\u00C5', # LATIN CAPITAL LETTER A WITH RING ABOVE |
| 43 | + '\x0F': u'\u00E5', # LATIN SMALL LETTER A WITH RING ABOVE |
| 44 | + '\x10': u'\u0394', # GREEK CAPITAL LETTER DELTA |
| 45 | + '\x11': u'\u005F', # LOW LINE |
| 46 | + '\x12': u'\u03A6', # GREEK CAPITAL LETTER PHI |
| 47 | + '\x13': u'\u0393', # GREEK CAPITAL LETTER GAMMA |
| 48 | + '\x14': u'\u039B', # GREEK CAPITAL LETTER LAMDA |
| 49 | + '\x15': u'\u03A9', # GREEK CAPITAL LETTER OMEGA |
| 50 | + '\x16': u'\u03A0', # GREEK CAPITAL LETTER PI |
| 51 | + '\x17': u'\u03A8', # GREEK CAPITAL LETTER PSI |
| 52 | + '\x18': u'\u03A3', # GREEK CAPITAL LETTER SIGMA |
| 53 | + '\x19': u'\u0398', # GREEK CAPITAL LETTER THETA |
| 54 | + '\x1A': u'\u039E', # GREEK CAPITAL LETTER XI |
| 55 | + '\x1C': u'\u00C6', # LATIN CAPITAL LETTER AE |
| 56 | + '\x1D': u'\u00E6', # LATIN SMALL LETTER AE |
| 57 | + '\x1E': u'\u00DF', # LATIN SMALL LETTER SHARP S (German) |
| 58 | + '\x1F': u'\u00C9', # LATIN CAPITAL LETTER E WITH ACUTE |
| 59 | + '\x20': u'\u0020', # SPACE |
| 60 | + '\x21': u'\u0021', # EXCLAMATION MARK |
| 61 | + '\x22': u'\u0022', # QUOTATION MARK |
| 62 | + '\x23': u'\u0023', # NUMBER SIGN |
| 63 | + '\x24': u'\u00A4', # CURRENCY SIGN |
| 64 | + '\x25': u'\u0025', # PERCENT SIGN |
| 65 | + '\x26': u'\u0026', # AMPERSAND |
| 66 | + '\x27': u'\u0027', # APOSTROPHE |
| 67 | + '\x28': u'\u0028', # LEFT PARENTHESIS |
| 68 | + '\x29': u'\u0029', # RIGHT PARENTHESIS |
| 69 | + '\x2A': u'\u002A', # ASTERISK |
| 70 | + '\x2B': u'\u002B', # PLUS SIGN |
| 71 | + '\x2C': u'\u002C', # COMMA |
| 72 | + '\x2D': u'\u002D', # HYPHEN-MINUS |
| 73 | + '\x2E': u'\u002E', # FULL STOP |
| 74 | + '\x2F': u'\u002F', # SOLIDUS |
| 75 | + '\x30': u'\u0030', # DIGIT ZERO |
| 76 | + '\x31': u'\u0031', # DIGIT ONE |
| 77 | + '\x32': u'\u0032', # DIGIT TWO |
| 78 | + '\x33': u'\u0033', # DIGIT THREE |
| 79 | + '\x34': u'\u0034', # DIGIT FOUR |
| 80 | + '\x35': u'\u0035', # DIGIT FIVE |
| 81 | + '\x36': u'\u0036', # DIGIT SIX |
| 82 | + '\x37': u'\u0037', # DIGIT SEVEN |
| 83 | + '\x38': u'\u0038', # DIGIT EIGHT |
| 84 | + '\x39': u'\u0039', # DIGIT NINE |
| 85 | + '\x3A': u'\u003A', # COLON |
| 86 | + '\x3B': u'\u003B', # SEMICOLON |
| 87 | + '\x3C': u'\u003C', # LESS-THAN SIGN |
| 88 | + '\x3D': u'\u003D', # EQUALS SIGN |
| 89 | + '\x3E': u'\u003E', # GREATER-THAN SIGN |
| 90 | + '\x3F': u'\u003F', # QUESTION MARK |
| 91 | + '\x40': u'\u00A1', # INVERTED EXCLAMATION MARK |
| 92 | + '\x41': u'\u0041', # LATIN CAPITAL LETTER A |
| 93 | + '\x42': u'\u0042', # LATIN CAPITAL LETTER B |
| 94 | + '\x43': u'\u0043', # LATIN CAPITAL LETTER C |
| 95 | + '\x44': u'\u0044', # LATIN CAPITAL LETTER D |
| 96 | + '\x45': u'\u0045', # LATIN CAPITAL LETTER E |
| 97 | + '\x46': u'\u0046', # LATIN CAPITAL LETTER F |
| 98 | + '\x47': u'\u0047', # LATIN CAPITAL LETTER G |
| 99 | + '\x48': u'\u0048', # LATIN CAPITAL LETTER H |
| 100 | + '\x49': u'\u0049', # LATIN CAPITAL LETTER I |
| 101 | + '\x4A': u'\u004A', # LATIN CAPITAL LETTER J |
| 102 | + '\x4B': u'\u004B', # LATIN CAPITAL LETTER K |
| 103 | + '\x4C': u'\u004C', # LATIN CAPITAL LETTER L |
| 104 | + '\x4D': u'\u004D', # LATIN CAPITAL LETTER M |
| 105 | + '\x4E': u'\u004E', # LATIN CAPITAL LETTER N |
| 106 | + '\x4F': u'\u004F', # LATIN CAPITAL LETTER O |
| 107 | + '\x50': u'\u0050', # LATIN CAPITAL LETTER P |
| 108 | + '\x51': u'\u0051', # LATIN CAPITAL LETTER Q |
| 109 | + '\x52': u'\u0052', # LATIN CAPITAL LETTER R |
| 110 | + '\x53': u'\u0053', # LATIN CAPITAL LETTER S |
| 111 | + '\x54': u'\u0054', # LATIN CAPITAL LETTER T |
| 112 | + '\x55': u'\u0055', # LATIN CAPITAL LETTER U |
| 113 | + '\x56': u'\u0056', # LATIN CAPITAL LETTER V |
| 114 | + '\x57': u'\u0057', # LATIN CAPITAL LETTER W |
| 115 | + '\x58': u'\u0058', # LATIN CAPITAL LETTER X |
| 116 | + '\x59': u'\u0059', # LATIN CAPITAL LETTER Y |
| 117 | + '\x5A': u'\u005A', # LATIN CAPITAL LETTER Z |
| 118 | + '\x5B': u'\u00C4', # LATIN CAPITAL LETTER A WITH DIAERESIS |
| 119 | + '\x5C': u'\u00D6', # LATIN CAPITAL LETTER O WITH DIAERESIS |
| 120 | + '\x5D': u'\u00D1', # LATIN CAPITAL LETTER N WITH TILDE |
| 121 | + '\x5E': u'\u00DC', # LATIN CAPITAL LETTER U WITH DIAERESIS |
| 122 | + '\x5F': u'\u00A7', # SECTION SIGN |
| 123 | + '\x60': u'\u00BF', # INVERTED QUESTION MARK |
| 124 | + '\x61': u'\u0061', # LATIN SMALL LETTER A |
| 125 | + '\x62': u'\u0062', # LATIN SMALL LETTER B |
| 126 | + '\x63': u'\u0063', # LATIN SMALL LETTER C |
| 127 | + '\x64': u'\u0064', # LATIN SMALL LETTER D |
| 128 | + '\x65': u'\u0065', # LATIN SMALL LETTER E |
| 129 | + '\x66': u'\u0066', # LATIN SMALL LETTER F |
| 130 | + '\x67': u'\u0067', # LATIN SMALL LETTER G |
| 131 | + '\x68': u'\u0068', # LATIN SMALL LETTER H |
| 132 | + '\x69': u'\u0069', # LATIN SMALL LETTER I |
| 133 | + '\x6A': u'\u006A', # LATIN SMALL LETTER J |
| 134 | + '\x6B': u'\u006B', # LATIN SMALL LETTER K |
| 135 | + '\x6C': u'\u006C', # LATIN SMALL LETTER L |
| 136 | + '\x6D': u'\u006D', # LATIN SMALL LETTER M |
| 137 | + '\x6E': u'\u006E', # LATIN SMALL LETTER N |
| 138 | + '\x6F': u'\u006F', # LATIN SMALL LETTER O |
| 139 | + '\x70': u'\u0070', # LATIN SMALL LETTER P |
| 140 | + '\x71': u'\u0071', # LATIN SMALL LETTER Q |
| 141 | + '\x72': u'\u0072', # LATIN SMALL LETTER R |
| 142 | + '\x73': u'\u0073', # LATIN SMALL LETTER S |
| 143 | + '\x74': u'\u0074', # LATIN SMALL LETTER T |
| 144 | + '\x75': u'\u0075', # LATIN SMALL LETTER U |
| 145 | + '\x76': u'\u0076', # LATIN SMALL LETTER V |
| 146 | + '\x77': u'\u0077', # LATIN SMALL LETTER W |
| 147 | + '\x78': u'\u0078', # LATIN SMALL LETTER X |
| 148 | + '\x79': u'\u0079', # LATIN SMALL LETTER Y |
| 149 | + '\x7A': u'\u007A', # LATIN SMALL LETTER Z |
| 150 | + '\x7B': u'\u00E4', # LATIN SMALL LETTER A WITH DIAERESIS |
| 151 | + '\x7C': u'\u00F6', # LATIN SMALL LETTER O WITH DIAERESIS |
| 152 | + '\x7D': u'\u00F1', # LATIN SMALL LETTER N WITH TILDE |
| 153 | + '\x7E': u'\u00FC', # LATIN SMALL LETTER U WITH DIAERESIS |
| 154 | + '\x7F': u'\u00E0', # LATIN SMALL LETTER A WITH GRAVE |
| 155 | +} |
156 | 156 |
|
157 | | -# Escaped characters |
158 | | -def_escaped_mapping = [ |
159 | | - ('\x0A', u'\u000C'), # FORM FEED |
160 | | - ('\x14', u'\u005E'), # CIRCUMFLEX ACCENT |
161 | | - ('\x28', u'\u007B'), # LEFT CURLY BRACKET |
162 | | - ('\x29', u'\u007D'), # RIGHT CURLY BRACKET |
163 | | - ('\x2F', u'\u005C'), # REVERSE SOLIDUS |
164 | | - ('\x3C', u'\u005B'), # LEFT SQUARE BRACKET |
165 | | - ('\x3D', u'\u007E'), # TILDE |
166 | | - ('\x3E', u'\u005D'), # RIGHT SQUARE BRACKET |
167 | | - ('\x40', u'\u007C'), # VERTICAL LINE |
168 | | - ('\x65', u'\u20AC'), # EURO SIGN |
169 | | -] |
| 157 | +# default GSM 03.38 escaped characters -> unicode |
| 158 | +def_escape_decode_dict = { |
| 159 | + '\x0A': u'\u000C', # FORM FEED |
| 160 | + '\x14': u'\u005E', # CIRCUMFLEX ACCENT |
| 161 | + '\x28': u'\u007B', # LEFT CURLY BRACKET |
| 162 | + '\x29': u'\u007D', # RIGHT CURLY BRACKET |
| 163 | + '\x2F': u'\u005C', # REVERSE SOLIDUS |
| 164 | + '\x3C': u'\u005B', # LEFT SQUARE BRACKET |
| 165 | + '\x3D': u'\u007E', # TILDE |
| 166 | + '\x3E': u'\u005D', # RIGHT SQUARE BRACKET |
| 167 | + '\x40': u'\u007C', # VERTICAL LINE |
| 168 | + '\x65': u'\u20AC', # EURO SIGN |
| 169 | +} |
170 | 170 |
|
171 | 171 | # Replacement characters, default is question mark. Used when it is not too |
172 | 172 | # important to ensure exact UTF-8 -> GSM -> UTF-8 equivilence, such as when |
173 | 173 | # humans read and write SMS. But for USSD and other M2M applications it's |
174 | 174 | # important to ensure the conversion is exact. |
175 | | -def_replace_mapping = [ |
176 | | - ('\x09', u'\u00E7'), # LATIN SMALL LETTER C WITH CEDILLA |
| 175 | +def_replace_encode_dict = { |
| 176 | + u'\u00E7': '\x09', # LATIN SMALL LETTER C WITH CEDILLA |
177 | 177 |
|
178 | | - ('\x41', u'\u0391'), # GREEK CAPITAL LETTER ALPHA |
179 | | - ('\x42', u'\u0392'), # GREEK CAPITAL LETTER BETA |
180 | | - ('\x45', u'\u0395'), # GREEK CAPITAL LETTER EPSILON |
181 | | - ('\x48', u'\u0397'), # GREEK CAPITAL LETTER ETA |
182 | | - ('\x49', u'\u0399'), # GREEK CAPITAL LETTER IOTA |
183 | | - ('\x4B', u'\u039A'), # GREEK CAPITAL LETTER KAPPA |
184 | | - ('\x4D', u'\u039C'), # GREEK CAPITAL LETTER MU |
185 | | - ('\x4E', u'\u039D'), # GREEK CAPITAL LETTER NU |
186 | | - ('\x4F', u'\u039F'), # GREEK CAPITAL LETTER OMICRON |
187 | | - ('\x50', u'\u03A1'), # GREEK CAPITAL LETTER RHO |
188 | | - ('\x54', u'\u03A4'), # GREEK CAPITAL LETTER TAU |
189 | | - ('\x58', u'\u03A7'), # GREEK CAPITAL LETTER CHI |
190 | | - ('\x59', u'\u03A5'), # GREEK CAPITAL LETTER UPSILON |
191 | | - ('\x5A', u'\u0396'), # GREEK CAPITAL LETTER ZETA |
192 | | -] |
| 178 | + u'\u0391': '\x41', # GREEK CAPITAL LETTER ALPHA |
| 179 | + u'\u0392': '\x42', # GREEK CAPITAL LETTER BETA |
| 180 | + u'\u0395': '\x45', # GREEK CAPITAL LETTER EPSILON |
| 181 | + u'\u0397': '\x48', # GREEK CAPITAL LETTER ETA |
| 182 | + u'\u0399': '\x49', # GREEK CAPITAL LETTER IOTA |
| 183 | + u'\u039A': '\x4B', # GREEK CAPITAL LETTER KAPPA |
| 184 | + u'\u039C': '\x4D', # GREEK CAPITAL LETTER MU |
| 185 | + u'\u039D': '\x4E', # GREEK CAPITAL LETTER NU |
| 186 | + u'\u039F': '\x4F', # GREEK CAPITAL LETTER OMICRON |
| 187 | + u'\u03A1': '\x50', # GREEK CAPITAL LETTER RHO |
| 188 | + u'\u03A4': '\x54', # GREEK CAPITAL LETTER TAU |
| 189 | + u'\u03A7': '\x58', # GREEK CAPITAL LETTER CHI |
| 190 | + u'\u03A5': '\x59', # GREEK CAPITAL LETTER UPSILON |
| 191 | + u'\u0396': '\x5A', # GREEK CAPITAL LETTER ZETA |
| 192 | +} |
193 | 193 |
|
194 | 194 | QUESTION_MARK = chr(0x3f) |
195 | 195 |
|
196 | 196 | # unicode -> default GSM 03.38 |
197 | | -def_regular_encode_dict = dict([(u, g) for g, u in def_regular_mapping]) |
| 197 | +def_regular_encode_dict = \ |
| 198 | + dict((u, g) for g, u in def_regular_decode_dict.iteritems()) |
198 | 199 |
|
199 | 200 | # unicode -> default escaped GSM 03.38 characters |
200 | | -def_escape_encode_dict = dict([(u, g) for g, u in def_escaped_mapping]) |
201 | | - |
202 | | -# unicode -> default replacement characters |
203 | | -def_replace_encode_dict = dict([(u, g) for g, u in def_replace_mapping]) |
204 | | - |
205 | | -# default GSM 03.38 -> unicode |
206 | | -# Note: We've removed the duplicates to be strict TS23.038 compliant |
207 | | -def_regular_decode_dict = dict([(g, u) for g, u in def_regular_mapping]) |
208 | | -def_escape_decode_dict = dict([(g, u) for g, u in def_escaped_mapping]) |
| 201 | +def_escape_encode_dict = \ |
| 202 | + dict((u, g) for g, u in def_escape_decode_dict.iteritems()) |
209 | 203 |
|
210 | 204 |
|
211 | 205 | def encode(input_, errors='strict'): |
|
0 commit comments