-
Notifications
You must be signed in to change notification settings - Fork 196
/
Copy pathjp_mobile_carriers.py
193 lines (178 loc) · 7.46 KB
/
jp_mobile_carriers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# Copyright 2011 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
import unicodedata
import urllib
import urllib2
DOCOMO_URL = 'http://dengon.docomo.ne.jp/inoticelist.cgi'
DOCOMO_HIDDEN_RE = re.compile(
r'\<INPUT TYPE\=\"HIDDEN\" NAME\=\"ep\" VALUE\=\"(\w+)\"\>', re.I)
NUMBER_SEPARATOR_RE = re.compile(
ur'[\(\)\.\-\s\u2010-\u2015\u2212\u301c\u30fc\ufe58\ufe63\uff0d]')
PHONE_NUMBER_RE = re.compile(r'^\+?(01181|81)?(\d{9,11})$')
MOBILE_NUMBER_RE = re.compile(r'^0(7|8|9)0\d{8}$')
AU_URL_RE = re.compile(
r'\<a href\=\"(http:\/\/dengon\.ezweb\.ne\.jp\/[^\"]+)"\>', re.I)
DOCOMO_URL_RE = re.compile(
r'\<a href\=\"(http:\/\/dengon\.docomo\.ne\.jp\/[^\"]+)"\>', re.I)
SOFT_BANK_URL_RE = re.compile(
r'\<a href\=\"(http:\/\/dengon\.softbank\.ne\.jp\/[^\"]+)"\>', re.I)
WILLCOM_URL_RE = re.compile(
r'\<a href\=\"(http:\/\/dengon\.willcom\-inc\.com\/[^\"]+)"\>', re.I)
EMOBILE_URL_RE = re.compile(
r'\<a href\=\"(http:\/\/dengon\.emnet\.ne\.jp\/[^\"]+)"\>', re.I)
WEB171_URL_RE = re.compile(
r'<a href="(https://www\.web171\.jp/[^"]+)">', re.I)
# An re for an actual message stored at Docomo
DOCOMO_MESSAGE_RE = re.compile(
r'\<a href\=\"(http:\/\/dengon\.docomo\.ne\.jp\/' +
r'inoticelist\.cgi\?[^\"]+)".*\>', re.I)
def get_phone_number(string):
"""Normalize the given string, which may be a phone number, and returns
a normalized phone number if the string is a phone number, or None
otherwise. Gets rid of separator characters, converts unicode characters to
ascii chars, and if the phone number contains the country code for Japan
(81), strips of the code and prepend '0'.
Args:
string: unicode string to normalize.
Returns:
A normalized phone number if the input string is phone number, or
None otherwise.
"""
normalized = NUMBER_SEPARATOR_RE.sub(
'', unicodedata.normalize('NFKC', string))
number_match = PHONE_NUMBER_RE.match(normalized)
if number_match:
if number_match.groups()[0]:
return '0' + number_match.groups()[1]
else:
return number_match.groups()[1]
def is_mobile_number(string):
"""Tests the given string matches the pattern for the Japanese mobile phone
number.
Args:
string: unicode string that is stripped of phone number separators such
as '(', ')', and '-' and converted into ascii numeric characters.
Returns:
True if the string is a Jp mobile phone number, and False otherwise.
"""
return bool(MOBILE_NUMBER_RE.match(string))
def extract_redirect_url(scrape):
"""Tries to extract a further redirect URL for the correct mobile carrier
page from the given page scraped from Docomo. If finds a further redirect
url to other carrier's page, returns that final destination url, otherwise
returns None.
Args:
scrape: the scraped content from the url.
Returns:
url for further redirect to an appropriate mobile carrier's message
board page if it's found, otherwise None.
"""
au_urls = AU_URL_RE.findall(scrape)
if au_urls:
return au_urls[0]
soft_bank_urls = SOFT_BANK_URL_RE.findall(scrape)
if soft_bank_urls:
return soft_bank_urls[0]
willcom_urls = WILLCOM_URL_RE.findall(scrape)
if willcom_urls:
return willcom_urls[0]
emobile_urls = EMOBILE_URL_RE.findall(scrape)
if emobile_urls:
return emobile_urls[0]
web171_urls = WEB171_URL_RE.findall(scrape)
if web171_urls:
return web171_urls[0]
def docomo_has_messages(scrape):
"""Checks if Docomo has messages for a number being inquired in its own
system, that is, the given scrape contains urls for the stored messages.
Args:
scrape: the scraped content from Docomo.
Returns:
True if Docomo has messaes, and False otherwise.
"""
return bool(DOCOMO_MESSAGE_RE.findall(scrape))
def get_docomo_post_data(number, hidden_param):
"""Returns a mapping for POST data to Docomo's url to inquire for messages
for the given number.
Args:
number: a normalized mobile number.
Returns:
a mapping for the POST data.
"""
return {'es': 1,
'si': 1,
'bi1': 1,
'ep': hidden_param,
'sm': number}
def look_up_number(number):
"""Look up messages for the number, registered in the Japanese mobile
carriers-provided emergency message board services. The five Japanese mobile
carriers maintain separate message indices, but their systems can talk to
one another when they don't find messages for the given number in their own
indices. This function first talks to Docomo's system as a main entry point.
Docomo returns urls of registered messages if it finds ones in its system.
If it doesn't, Docomo's system talks to the other 4 carriers' and returns an
url for an appropriate carrier if messages are found. If no messages are
found registered for the number, Docomo's system simply indicates so.
Args:
number: A mobile phone number.
Returns:
A url for messages found registered to some carrier (including Docomo)
or None if no are found.
Throws:
Exception when failed to scrape.
"""
# Scrape Docomo's gateway page and get a hidden time stamp param.
scrape = urllib2.urlopen(DOCOMO_URL).read()
hidden_param = DOCOMO_HIDDEN_RE.findall(scrape)[0]
# Encode the number and the above param as POST data
data = get_docomo_post_data(number, hidden_param)
encoded_data = urllib.urlencode(data)
# Scrape Docomo's answer on the number
scrape = urllib2.urlopen(DOCOMO_URL, encoded_data).read()
# Extract a further redirect url, if any.
url = extract_redirect_url(scrape)
if url:
return url
elif docomo_has_messages(scrape):
# Checks if Docomo has messages for the number, and returns the url
# for Docomo if it does.
return DOCOMO_URL + '?' + encoded_data
def handle_phone_number(handler, query):
"""Handles a phone number query. If the query is a mobile phone number,
looks up the number for registered messages in the mobile carriers-provided
message board services and redirects to the results page. If the query is a
non-mobile phone number, shows a 171 suggestion.
Args:
handler: a request handler for this request.
query: a query string to the Person Finder query page.
Returns:
True if the query string is a phone number and has been properly
handled, and False otherwise.
"""
phone_number = get_phone_number(unicode(query))
if phone_number:
if is_mobile_number(phone_number):
url = look_up_number(phone_number)
if url:
handler.redirect(url)
else:
handler.render('results.html',
results=[], jp_phone_number_query=True)
else:
handler.render('query.html',
show_jp_171_suggestion=True)
return True
return False