Skip to content

Commit 1826f54

Browse files
committed
GSM add encoding / decoding tests
1 parent 4d5ce65 commit 1826f54

1 file changed

Lines changed: 271 additions & 0 deletions

File tree

Lines changed: 271 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,271 @@
1+
# -*- coding: utf-8 -*-
2+
# Copyright (C) 2011 Sphere Systems Ltd
3+
# Author: Andrew Bird
4+
#
5+
# This program is free software; you can redistribute it and/or modify
6+
# it under the terms of the GNU General Public License as published by
7+
# the Free Software Foundation; either version 2 of the License, or
8+
# (at your option) any later version.
9+
#
10+
# This program is distributed in the hope that it will be useful,
11+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
# GNU General Public License for more details.
14+
#
15+
# You should have received a copy of the GNU General Public License along
16+
# with this program; if not, write to the Free Software Foundation, Inc.,
17+
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18+
"""Unittests for the gsm encoding/decoding module"""
19+
20+
import unittest
21+
import messaging.sms.gsm0338 # imports GSM7 codec
22+
23+
# Reversed from: ftp://ftp.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT
24+
MAP = {
25+
# unichr(0x0000): (0x0000, 0x00), # Null
26+
u'@': (0x0040, 0x00),
27+
u'£': (0x00a3, 0x01),
28+
u'$': (0x0024, 0x02),
29+
u'¥': (0x00a5, 0x03),
30+
u'è': (0x00e8, 0x04),
31+
u'é': (0x00e9, 0x05),
32+
u'ù': (0x00f9, 0x06),
33+
u'ì': (0x00ec, 0x07),
34+
u'ò': (0x00f2, 0x08),
35+
u'ç': (0x00e7, 0x09),
36+
unichr(0x000a): (0x000a, 0x0a), # Linefeed
37+
u'Ø': (0x00d8, 0x0b),
38+
u'ø': (0x00f8, 0x0c),
39+
unichr(0x000d): (0x000d, 0x0d), # Carriage return
40+
u'Å': (0x00c5, 0x0e),
41+
u'å': (0x00e5, 0x0f),
42+
u'Δ': (0x0394, 0x10),
43+
u'_': (0x005f, 0x11),
44+
u'Φ': (0x03a6, 0x12),
45+
u'Γ': (0x0393, 0x13),
46+
u'Λ': (0x039b, 0x14),
47+
u'Ω': (0x03a9, 0x15),
48+
u'Π': (0x03a0, 0x16),
49+
u'Ψ': (0x03a8, 0x17),
50+
u'Σ': (0x03a3, 0x18),
51+
u'Θ': (0x0398, 0x19),
52+
u'Ξ': (0x039e, 0x1a),
53+
unichr(0x00a0): (0x00a0, 0x1b), # Escape to extension table (displayed
54+
# as NBSP, on decode of invalid escape
55+
# sequence)
56+
u'Æ': (0x00c6, 0x1c),
57+
u'æ': (0x00e6, 0x1d),
58+
u'ß': (0x00df, 0x1e),
59+
u'É': (0x00c9, 0x1f),
60+
u' ': (0x0020, 0x20),
61+
u'!': (0x0021, 0x21),
62+
u'"': (0x0022, 0x22),
63+
u'#': (0x0023, 0x23),
64+
u'¤': (0x00a4, 0x24),
65+
u'%': (0x0025, 0x25),
66+
u'&': (0x0026, 0x26),
67+
u'\'': (0x0027, 0x27),
68+
u'{': (0x007b, 0x1b28),
69+
u'}': (0x007d, 0x1b29),
70+
u'*': (0x002a, 0x2a),
71+
u'+': (0x002b, 0x2b),
72+
u',': (0x002c, 0x2c),
73+
u'-': (0x002d, 0x2d),
74+
u'.': (0x002e, 0x2e),
75+
u'\\': (0x005c, 0x1b2f),
76+
u'0': (0x0030, 0x30),
77+
u'1': (0x0031, 0x31),
78+
u'2': (0x0032, 0x32),
79+
u'3': (0x0033, 0x33),
80+
u'4': (0x0034, 0x34),
81+
u'5': (0x0035, 0x35),
82+
u'6': (0x0036, 0x36),
83+
u'7': (0x0037, 0x37),
84+
u'8': (0x0038, 0x38),
85+
u'9': (0x0039, 0x39),
86+
u':': (0x003a, 0x3a),
87+
u';': (0x003b, 0x3b),
88+
u'[': (0x005b, 0x1b3c),
89+
unichr(0x000c): (0x000c, 0x1b0a), # Formfeed
90+
u']': (0x005d, 0x1b3e),
91+
u'?': (0x003f, 0x3f),
92+
u'|': (0x007c, 0x1b40),
93+
u'A': (0x0041, 0x41),
94+
u'B': (0x0042, 0x42),
95+
u'C': (0x0043, 0x43),
96+
u'D': (0x0044, 0x44),
97+
u'E': (0x0045, 0x45),
98+
u'F': (0x0046, 0x46),
99+
u'G': (0x0047, 0x47),
100+
u'H': (0x0048, 0x48),
101+
u'I': (0x0049, 0x49),
102+
u'J': (0x004a, 0x4a),
103+
u'K': (0x004b, 0x4b),
104+
u'L': (0x004c, 0x4c),
105+
u'M': (0x004d, 0x4d),
106+
u'N': (0x004e, 0x4e),
107+
u'O': (0x004f, 0x4f),
108+
u'P': (0x0050, 0x50),
109+
u'Q': (0x0051, 0x51),
110+
u'R': (0x0052, 0x52),
111+
u'S': (0x0053, 0x53),
112+
u'T': (0x0054, 0x54),
113+
u'U': (0x0055, 0x55),
114+
u'V': (0x0056, 0x56),
115+
u'W': (0x0057, 0x57),
116+
u'X': (0x0058, 0x58),
117+
u'Y': (0x0059, 0x59),
118+
u'Z': (0x005a, 0x5a),
119+
u'Ä': (0x00c4, 0x5b),
120+
u'Ö': (0x00d6, 0x5c),
121+
u'Ñ': (0x00d1, 0x5d),
122+
u'Ü': (0x00dc, 0x5e),
123+
u'§': (0x00a7, 0x5f),
124+
u'¿': (0x00bf, 0x60),
125+
u'a': (0x0061, 0x61),
126+
u'b': (0x0062, 0x62),
127+
u'c': (0x0063, 0x63),
128+
u'd': (0x0064, 0x64),
129+
u'€': (0x20ac, 0x1b65),
130+
u'f': (0x0066, 0x66),
131+
u'g': (0x0067, 0x67),
132+
u'h': (0x0068, 0x68),
133+
u'<': (0x003c, 0x3c),
134+
u'j': (0x006a, 0x6a),
135+
u'k': (0x006b, 0x6b),
136+
u'l': (0x006c, 0x6c),
137+
u'm': (0x006d, 0x6d),
138+
u'n': (0x006e, 0x6e),
139+
u'~': (0x007e, 0x1b3d),
140+
u'p': (0x0070, 0x70),
141+
u'q': (0x0071, 0x71),
142+
u'r': (0x0072, 0x72),
143+
u's': (0x0073, 0x73),
144+
u't': (0x0074, 0x74),
145+
u'>': (0x003e, 0x3e),
146+
u'v': (0x0076, 0x76),
147+
u'i': (0x0069, 0x69),
148+
u'x': (0x0078, 0x78),
149+
u'^': (0x005e, 0x1b14),
150+
u'z': (0x007a, 0x7a),
151+
u'ä': (0x00e4, 0x7b),
152+
u'ö': (0x00f6, 0x7c),
153+
u'ñ': (0x00f1, 0x7d),
154+
u'ü': (0x00fc, 0x7e),
155+
u'à': (0x00e0, 0x7f),
156+
u'¡': (0x00a1, 0x40),
157+
u'/': (0x002f, 0x2f),
158+
u'o': (0x006f, 0x6f),
159+
u'u': (0x0075, 0x75),
160+
u'w': (0x0077, 0x77),
161+
u'y': (0x0079, 0x79),
162+
u'e': (0x0065, 0x65),
163+
u'=': (0x003d, 0x3d),
164+
u'(': (0x0028, 0x28),
165+
u')': (0x0029, 0x29),
166+
}
167+
168+
GREEK_MAP = { # Note: these might look like Latin uppercase, but they aren't
169+
u'Α': (0x0391, 0x41),
170+
u'Β': (0x0392, 0x42),
171+
u'Ε': (0x0395, 0x45),
172+
u'Η': (0x0397, 0x48),
173+
u'Ι': (0x0399, 0x49),
174+
u'Κ': (0x039a, 0x4b),
175+
u'Μ': (0x039c, 0x4d),
176+
u'Ν': (0x039d, 0x4e),
177+
u'Ο': (0x039f, 0x4f),
178+
u'Ρ': (0x03a1, 0x50),
179+
u'Τ': (0x03a4, 0x54),
180+
u'Χ': (0x03a7, 0x58),
181+
u'Υ': (0x03a5, 0x59),
182+
u'Ζ': (0x0396, 0x5a),
183+
}
184+
185+
QUIRK_MAP = {
186+
u'Ç': (0x00c7, 0x09), # LATIN CAPITAL LETTER C WITH CEDILLA
187+
}
188+
189+
BAD = -1
190+
191+
192+
class TestEncodingFunctions(unittest.TestCase):
193+
194+
def test_encoding_supported_unicode_gsm(self):
195+
196+
for key in MAP.keys():
197+
# Use 'ignore' so that we see the code tested, not an exception
198+
s_gsm = key.encode('gsm0338', 'ignore')
199+
200+
if len(s_gsm) == 1:
201+
i_gsm = ord(s_gsm)
202+
elif len(s_gsm) == 2:
203+
i_gsm = (ord(s_gsm[0]) << 8) + ord(s_gsm[1])
204+
else:
205+
i_gsm = BAD # so we see the comparison, not an exception
206+
207+
# We shouldn't generate an invalid escape sequence
208+
if key == unichr(0x00a0):
209+
self.assertEqual(BAD, i_gsm)
210+
else:
211+
self.assertEqual(MAP[key][1], i_gsm)
212+
213+
def test_encoding_supported_greek_unicode_gsm(self):
214+
# Note: Conversion is one way, hence no corresponding decode test
215+
216+
for key in GREEK_MAP.keys():
217+
# Use 'ignore' so that we see the code tested, not an exception
218+
s_gsm = key.encode('gsm0338', 'ignore')
219+
220+
if len(s_gsm) == 1:
221+
i_gsm = ord(s_gsm)
222+
else:
223+
i_gsm = BAD # so we see the comparison, not an exception
224+
225+
self.assertEqual(GREEK_MAP[key][1], i_gsm)
226+
227+
def test_encoding_supported_quirk_unicode_gsm(self):
228+
# Note: Conversion is one way, hence no corresponding decode test
229+
230+
for key in QUIRK_MAP.keys():
231+
# Use 'ignore' so that we see the code tested, not an exception
232+
s_gsm = key.encode('gsm0338', 'ignore')
233+
234+
if len(s_gsm) == 1:
235+
i_gsm = ord(s_gsm)
236+
else:
237+
i_gsm = BAD # so we see the comparison, not an exception
238+
239+
self.assertEqual(QUIRK_MAP[key][1], i_gsm)
240+
241+
def test_decoding_supported_unicode_gsm(self):
242+
for key in MAP.keys():
243+
i_gsm = MAP[key][1]
244+
if i_gsm <= 0xff:
245+
s_gsm = chr(i_gsm)
246+
elif i_gsm <= 0xffff:
247+
s_gsm = chr((i_gsm & 0xff00) >> 8)
248+
s_gsm += chr(i_gsm & 0x00ff)
249+
250+
s_unicode = s_gsm.decode('gsm0338', 'strict')
251+
self.assertEqual(MAP[key][0], ord(s_unicode))
252+
253+
def test_is_gsm_text_true(self):
254+
_MAP = dict(MAP.items() + GREEK_MAP.items() + QUIRK_MAP.items())
255+
256+
for key in _MAP.keys():
257+
if key == unichr(0x00a0):
258+
continue
259+
self.assertEqual(messaging.sms.gsm0338.is_gsm_text(key), True)
260+
261+
def test_is_gsm_text_false(self):
262+
_MAP = dict(MAP.items() + GREEK_MAP.items() + QUIRK_MAP.items())
263+
264+
self.assertEqual(
265+
messaging.sms.gsm0338.is_gsm_text(unichr(0x00a0)), False)
266+
267+
for i in xrange(1, 0xffff + 1):
268+
if unichr(i) not in _MAP:
269+
# Note: it's a little odd, but on error we want to see values
270+
if messaging.sms.gsm0338.is_gsm_text(unichr(i)) is not False:
271+
self.assertEqual(BAD, i)

0 commit comments

Comments
 (0)