|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +# Copyright (C) 2011 Sphere Systems Ltd |
| 3 | +# Author: Andrew Bird |
| 4 | +# |
| 5 | +# This program is free software; you can redistribute it and/or modify |
| 6 | +# it under the terms of the GNU General Public License as published by |
| 7 | +# the Free Software Foundation; either version 2 of the License, or |
| 8 | +# (at your option) any later version. |
| 9 | +# |
| 10 | +# This program is distributed in the hope that it will be useful, |
| 11 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 | +# GNU General Public License for more details. |
| 14 | +# |
| 15 | +# You should have received a copy of the GNU General Public License along |
| 16 | +# with this program; if not, write to the Free Software Foundation, Inc., |
| 17 | +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| 18 | +"""Unittests for the gsm encoding/decoding module""" |
| 19 | + |
| 20 | +import unittest |
| 21 | +import messaging.sms.gsm0338 # imports GSM7 codec |
| 22 | + |
| 23 | +# Reversed from: ftp://ftp.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT |
| 24 | +MAP = { |
| 25 | +# unichr(0x0000): (0x0000, 0x00), # Null |
| 26 | + u'@': (0x0040, 0x00), |
| 27 | + u'£': (0x00a3, 0x01), |
| 28 | + u'$': (0x0024, 0x02), |
| 29 | + u'¥': (0x00a5, 0x03), |
| 30 | + u'è': (0x00e8, 0x04), |
| 31 | + u'é': (0x00e9, 0x05), |
| 32 | + u'ù': (0x00f9, 0x06), |
| 33 | + u'ì': (0x00ec, 0x07), |
| 34 | + u'ò': (0x00f2, 0x08), |
| 35 | + u'ç': (0x00e7, 0x09), |
| 36 | + unichr(0x000a): (0x000a, 0x0a), # Linefeed |
| 37 | + u'Ø': (0x00d8, 0x0b), |
| 38 | + u'ø': (0x00f8, 0x0c), |
| 39 | + unichr(0x000d): (0x000d, 0x0d), # Carriage return |
| 40 | + u'Å': (0x00c5, 0x0e), |
| 41 | + u'å': (0x00e5, 0x0f), |
| 42 | + u'Δ': (0x0394, 0x10), |
| 43 | + u'_': (0x005f, 0x11), |
| 44 | + u'Φ': (0x03a6, 0x12), |
| 45 | + u'Γ': (0x0393, 0x13), |
| 46 | + u'Λ': (0x039b, 0x14), |
| 47 | + u'Ω': (0x03a9, 0x15), |
| 48 | + u'Π': (0x03a0, 0x16), |
| 49 | + u'Ψ': (0x03a8, 0x17), |
| 50 | + u'Σ': (0x03a3, 0x18), |
| 51 | + u'Θ': (0x0398, 0x19), |
| 52 | + u'Ξ': (0x039e, 0x1a), |
| 53 | + unichr(0x00a0): (0x00a0, 0x1b), # Escape to extension table (displayed |
| 54 | + # as NBSP, on decode of invalid escape |
| 55 | + # sequence) |
| 56 | + u'Æ': (0x00c6, 0x1c), |
| 57 | + u'æ': (0x00e6, 0x1d), |
| 58 | + u'ß': (0x00df, 0x1e), |
| 59 | + u'É': (0x00c9, 0x1f), |
| 60 | + u' ': (0x0020, 0x20), |
| 61 | + u'!': (0x0021, 0x21), |
| 62 | + u'"': (0x0022, 0x22), |
| 63 | + u'#': (0x0023, 0x23), |
| 64 | + u'¤': (0x00a4, 0x24), |
| 65 | + u'%': (0x0025, 0x25), |
| 66 | + u'&': (0x0026, 0x26), |
| 67 | + u'\'': (0x0027, 0x27), |
| 68 | + u'{': (0x007b, 0x1b28), |
| 69 | + u'}': (0x007d, 0x1b29), |
| 70 | + u'*': (0x002a, 0x2a), |
| 71 | + u'+': (0x002b, 0x2b), |
| 72 | + u',': (0x002c, 0x2c), |
| 73 | + u'-': (0x002d, 0x2d), |
| 74 | + u'.': (0x002e, 0x2e), |
| 75 | + u'\\': (0x005c, 0x1b2f), |
| 76 | + u'0': (0x0030, 0x30), |
| 77 | + u'1': (0x0031, 0x31), |
| 78 | + u'2': (0x0032, 0x32), |
| 79 | + u'3': (0x0033, 0x33), |
| 80 | + u'4': (0x0034, 0x34), |
| 81 | + u'5': (0x0035, 0x35), |
| 82 | + u'6': (0x0036, 0x36), |
| 83 | + u'7': (0x0037, 0x37), |
| 84 | + u'8': (0x0038, 0x38), |
| 85 | + u'9': (0x0039, 0x39), |
| 86 | + u':': (0x003a, 0x3a), |
| 87 | + u';': (0x003b, 0x3b), |
| 88 | + u'[': (0x005b, 0x1b3c), |
| 89 | + unichr(0x000c): (0x000c, 0x1b0a), # Formfeed |
| 90 | + u']': (0x005d, 0x1b3e), |
| 91 | + u'?': (0x003f, 0x3f), |
| 92 | + u'|': (0x007c, 0x1b40), |
| 93 | + u'A': (0x0041, 0x41), |
| 94 | + u'B': (0x0042, 0x42), |
| 95 | + u'C': (0x0043, 0x43), |
| 96 | + u'D': (0x0044, 0x44), |
| 97 | + u'E': (0x0045, 0x45), |
| 98 | + u'F': (0x0046, 0x46), |
| 99 | + u'G': (0x0047, 0x47), |
| 100 | + u'H': (0x0048, 0x48), |
| 101 | + u'I': (0x0049, 0x49), |
| 102 | + u'J': (0x004a, 0x4a), |
| 103 | + u'K': (0x004b, 0x4b), |
| 104 | + u'L': (0x004c, 0x4c), |
| 105 | + u'M': (0x004d, 0x4d), |
| 106 | + u'N': (0x004e, 0x4e), |
| 107 | + u'O': (0x004f, 0x4f), |
| 108 | + u'P': (0x0050, 0x50), |
| 109 | + u'Q': (0x0051, 0x51), |
| 110 | + u'R': (0x0052, 0x52), |
| 111 | + u'S': (0x0053, 0x53), |
| 112 | + u'T': (0x0054, 0x54), |
| 113 | + u'U': (0x0055, 0x55), |
| 114 | + u'V': (0x0056, 0x56), |
| 115 | + u'W': (0x0057, 0x57), |
| 116 | + u'X': (0x0058, 0x58), |
| 117 | + u'Y': (0x0059, 0x59), |
| 118 | + u'Z': (0x005a, 0x5a), |
| 119 | + u'Ä': (0x00c4, 0x5b), |
| 120 | + u'Ö': (0x00d6, 0x5c), |
| 121 | + u'Ñ': (0x00d1, 0x5d), |
| 122 | + u'Ü': (0x00dc, 0x5e), |
| 123 | + u'§': (0x00a7, 0x5f), |
| 124 | + u'¿': (0x00bf, 0x60), |
| 125 | + u'a': (0x0061, 0x61), |
| 126 | + u'b': (0x0062, 0x62), |
| 127 | + u'c': (0x0063, 0x63), |
| 128 | + u'd': (0x0064, 0x64), |
| 129 | + u'€': (0x20ac, 0x1b65), |
| 130 | + u'f': (0x0066, 0x66), |
| 131 | + u'g': (0x0067, 0x67), |
| 132 | + u'h': (0x0068, 0x68), |
| 133 | + u'<': (0x003c, 0x3c), |
| 134 | + u'j': (0x006a, 0x6a), |
| 135 | + u'k': (0x006b, 0x6b), |
| 136 | + u'l': (0x006c, 0x6c), |
| 137 | + u'm': (0x006d, 0x6d), |
| 138 | + u'n': (0x006e, 0x6e), |
| 139 | + u'~': (0x007e, 0x1b3d), |
| 140 | + u'p': (0x0070, 0x70), |
| 141 | + u'q': (0x0071, 0x71), |
| 142 | + u'r': (0x0072, 0x72), |
| 143 | + u's': (0x0073, 0x73), |
| 144 | + u't': (0x0074, 0x74), |
| 145 | + u'>': (0x003e, 0x3e), |
| 146 | + u'v': (0x0076, 0x76), |
| 147 | + u'i': (0x0069, 0x69), |
| 148 | + u'x': (0x0078, 0x78), |
| 149 | + u'^': (0x005e, 0x1b14), |
| 150 | + u'z': (0x007a, 0x7a), |
| 151 | + u'ä': (0x00e4, 0x7b), |
| 152 | + u'ö': (0x00f6, 0x7c), |
| 153 | + u'ñ': (0x00f1, 0x7d), |
| 154 | + u'ü': (0x00fc, 0x7e), |
| 155 | + u'à': (0x00e0, 0x7f), |
| 156 | + u'¡': (0x00a1, 0x40), |
| 157 | + u'/': (0x002f, 0x2f), |
| 158 | + u'o': (0x006f, 0x6f), |
| 159 | + u'u': (0x0075, 0x75), |
| 160 | + u'w': (0x0077, 0x77), |
| 161 | + u'y': (0x0079, 0x79), |
| 162 | + u'e': (0x0065, 0x65), |
| 163 | + u'=': (0x003d, 0x3d), |
| 164 | + u'(': (0x0028, 0x28), |
| 165 | + u')': (0x0029, 0x29), |
| 166 | +} |
| 167 | + |
| 168 | +GREEK_MAP = { # Note: these might look like Latin uppercase, but they aren't |
| 169 | + u'Α': (0x0391, 0x41), |
| 170 | + u'Β': (0x0392, 0x42), |
| 171 | + u'Ε': (0x0395, 0x45), |
| 172 | + u'Η': (0x0397, 0x48), |
| 173 | + u'Ι': (0x0399, 0x49), |
| 174 | + u'Κ': (0x039a, 0x4b), |
| 175 | + u'Μ': (0x039c, 0x4d), |
| 176 | + u'Ν': (0x039d, 0x4e), |
| 177 | + u'Ο': (0x039f, 0x4f), |
| 178 | + u'Ρ': (0x03a1, 0x50), |
| 179 | + u'Τ': (0x03a4, 0x54), |
| 180 | + u'Χ': (0x03a7, 0x58), |
| 181 | + u'Υ': (0x03a5, 0x59), |
| 182 | + u'Ζ': (0x0396, 0x5a), |
| 183 | +} |
| 184 | + |
| 185 | +QUIRK_MAP = { |
| 186 | + u'Ç': (0x00c7, 0x09), # LATIN CAPITAL LETTER C WITH CEDILLA |
| 187 | +} |
| 188 | + |
| 189 | +BAD = -1 |
| 190 | + |
| 191 | + |
| 192 | +class TestEncodingFunctions(unittest.TestCase): |
| 193 | + |
| 194 | + def test_encoding_supported_unicode_gsm(self): |
| 195 | + |
| 196 | + for key in MAP.keys(): |
| 197 | + # Use 'ignore' so that we see the code tested, not an exception |
| 198 | + s_gsm = key.encode('gsm0338', 'ignore') |
| 199 | + |
| 200 | + if len(s_gsm) == 1: |
| 201 | + i_gsm = ord(s_gsm) |
| 202 | + elif len(s_gsm) == 2: |
| 203 | + i_gsm = (ord(s_gsm[0]) << 8) + ord(s_gsm[1]) |
| 204 | + else: |
| 205 | + i_gsm = BAD # so we see the comparison, not an exception |
| 206 | + |
| 207 | + # We shouldn't generate an invalid escape sequence |
| 208 | + if key == unichr(0x00a0): |
| 209 | + self.assertEqual(BAD, i_gsm) |
| 210 | + else: |
| 211 | + self.assertEqual(MAP[key][1], i_gsm) |
| 212 | + |
| 213 | + def test_encoding_supported_greek_unicode_gsm(self): |
| 214 | + # Note: Conversion is one way, hence no corresponding decode test |
| 215 | + |
| 216 | + for key in GREEK_MAP.keys(): |
| 217 | + # Use 'ignore' so that we see the code tested, not an exception |
| 218 | + s_gsm = key.encode('gsm0338', 'ignore') |
| 219 | + |
| 220 | + if len(s_gsm) == 1: |
| 221 | + i_gsm = ord(s_gsm) |
| 222 | + else: |
| 223 | + i_gsm = BAD # so we see the comparison, not an exception |
| 224 | + |
| 225 | + self.assertEqual(GREEK_MAP[key][1], i_gsm) |
| 226 | + |
| 227 | + def test_encoding_supported_quirk_unicode_gsm(self): |
| 228 | + # Note: Conversion is one way, hence no corresponding decode test |
| 229 | + |
| 230 | + for key in QUIRK_MAP.keys(): |
| 231 | + # Use 'ignore' so that we see the code tested, not an exception |
| 232 | + s_gsm = key.encode('gsm0338', 'ignore') |
| 233 | + |
| 234 | + if len(s_gsm) == 1: |
| 235 | + i_gsm = ord(s_gsm) |
| 236 | + else: |
| 237 | + i_gsm = BAD # so we see the comparison, not an exception |
| 238 | + |
| 239 | + self.assertEqual(QUIRK_MAP[key][1], i_gsm) |
| 240 | + |
| 241 | + def test_decoding_supported_unicode_gsm(self): |
| 242 | + for key in MAP.keys(): |
| 243 | + i_gsm = MAP[key][1] |
| 244 | + if i_gsm <= 0xff: |
| 245 | + s_gsm = chr(i_gsm) |
| 246 | + elif i_gsm <= 0xffff: |
| 247 | + s_gsm = chr((i_gsm & 0xff00) >> 8) |
| 248 | + s_gsm += chr(i_gsm & 0x00ff) |
| 249 | + |
| 250 | + s_unicode = s_gsm.decode('gsm0338', 'strict') |
| 251 | + self.assertEqual(MAP[key][0], ord(s_unicode)) |
| 252 | + |
| 253 | + def test_is_gsm_text_true(self): |
| 254 | + _MAP = dict(MAP.items() + GREEK_MAP.items() + QUIRK_MAP.items()) |
| 255 | + |
| 256 | + for key in _MAP.keys(): |
| 257 | + if key == unichr(0x00a0): |
| 258 | + continue |
| 259 | + self.assertEqual(messaging.sms.gsm0338.is_gsm_text(key), True) |
| 260 | + |
| 261 | + def test_is_gsm_text_false(self): |
| 262 | + _MAP = dict(MAP.items() + GREEK_MAP.items() + QUIRK_MAP.items()) |
| 263 | + |
| 264 | + self.assertEqual( |
| 265 | + messaging.sms.gsm0338.is_gsm_text(unichr(0x00a0)), False) |
| 266 | + |
| 267 | + for i in xrange(1, 0xffff + 1): |
| 268 | + if unichr(i) not in _MAP: |
| 269 | + # Note: it's a little odd, but on error we want to see values |
| 270 | + if messaging.sms.gsm0338.is_gsm_text(unichr(i)) is not False: |
| 271 | + self.assertEqual(BAD, i) |
0 commit comments