Loto6 ã®éå»çµæãåå¾ãã¦ã¿ã
ããï¼è²·ã£ããã¨ã¯ï¼åº¦ãããªãã·ã¥ã³ãä½ããããï¼çµæåå¾ã¹ã¯ãªãããå æ¥ã®ãã®ãä¸åº¦å´©ãã¦ãããªããã
éå»ã®ãã®ãå¾ãããããã«ãã¦ã¿ãããã¨ã HTML ã®è§£ææ¹æ³ãæ£è¦è¡¨ç¾ãã BeautifulSoup ã«å¤æ´ã HTML ã®ãã¨ã¯ HTML ãã¼ãµã¼ã«ä»»ãã¦ã¿ãã
éå»ã®çµæã調ã¹ãã¨ããã§å½ãããã®ã§ã¯ãªãã®ã§ãå½¹ç«ã¡åº¦ã¯ä½ãã®ã ããã©ãããã
loto6.py
# coding: utf-8 u"""ã¿ãã»éè¡ã®ãµã¤ããã Loto6 ã®éå»çµæãã²ãã ä½æ fgshun (http://d.hatena.ne.jp/fgshun/) ãªãªã¸ãã« http://d.hatena.ne.jp/CortYuming/20091024/p1 """ import re import datetime import time from BeautifulSoup import BeautifulSoup def iter_loto6_url(html): u"""HTML ãã Loto6 ã®çµæãå ¬éãã¦ãã URL ãå¾ã 2009/10/30 ç¾å¨ããã® HTML 㯠http://www.takarakuji.mizuhobank.co.jp/miniloto/index.html ããå¾ããã¨ãã§ããã""" soup = BeautifulSoup(html) for a in soup.findAll( u'a', {u'href': re.compile(u'^lt6-(?:new|h\d*?)\.html$')}): yield a[u'href'] def iter_loto6_old_url(html): u"""HTML ãã Loto6 ã®éå»ã®çµæãå ¬éãã¦ãã URL ãå¾ã 2009/10/30 ç¾å¨ããã® HTML 㯠http://www.takarakuji.mizuhobank.co.jp/miniloto/index.html ããå¾ããã¨ãã§ããã""" soup = BeautifulSoup(html) for a in soup.findAll( u'a', {u'href': re.compile(u'^loto6\d*?\.html$')}): yield a[u'href'] def iter_loto6_result(html): u"""Loto6 ã®çµæã HTML ããèªã¿åºãã¦ä¸ã¤ãã¤è¿ã""" soup = BeautifulSoup(html) for table in soup.findAll(u'table', {u'class': u'mB12 number'}): tr = table.findAll(u'tr') _count = re.match( u'第(?P<count>\d+)å', tr[0].findAll(u'th')[1].string) count = int(_count.group(u'count')) date = datetime.date( *time.strptime( tr[1].findAll(u'td')[0].string, u'%Yå¹´%mæ%dæ¥')[0:3]) numbers = tuple( int(i.string) for i in tr[2].findAll(u'td')) bonus_number = int(tr[3].td.string) items = [] prize = [] for tag in tr[4:9]: _item, _prize = ( i.string for i in tag.findAll(u'td')[0:3:2]) items.append( int(_item.replace(u',', u'')) if _item != u'該å½ãªã' else 0) prize.append( int(_prize.replace(u',', u'')) if _prize != u'該å½ãªã' else 0) items = tuple(items) prize = tuple(prize) total = int(tr[9].td.string.replace(u',', u'')) carry_over = int(tr[10].td.string.replace(u',', u'')) yield dict( count=count, date=date, numbers=numbers, bonus_number=bonus_number, items=items, prize=prize, total=total, carry_over=carry_over) def iter_loto6_old_result(html): u"""Loto6 ã®éå»ã®çµæã HTML ããèªã¿åºãã¦ä¸ã¤ãã¤è¿ã""" soup = BeautifulSoup(html) for table in soup.findAll(u'table', {u'class': u'mB12 number'}): tbody = table.tbody for tr in tbody.findAll(u'tr'): td = tr.findAll(u'td') _count = re.match( u'第(?P<count>\d+)å', td[0].string) count = int(_count.group(u'count')) date = datetime.date( *time.strptime( td[1].string, u'%Yå¹´%mæ%dæ¥')[0:3]) numbers = tuple( int(i.string) for i in td[2:8]) bonus_number = int(td[8].span.string) yield dict( count=count, date=date, numbers=numbers, bonus_number=bonus_number) def test(): import urllib import urlparse import os import pickle base_url = u'http://www.takarakuji.mizuhobank.co.jp/miniloto/' index_url = urlparse.urljoin(base_url, u'index.html') def _get_html(reader, encoding): html = reader.read() return html.decode(encoding) path = os.path.join( os.path.dirname(__file__), u'result.pickle') html = _get_html(urllib.urlopen(index_url), 'shift_jis') result_list = [] for url in iter_loto6_url(html): url = urlparse.urljoin(index_url, url) for result in iter_loto6_result( _get_html(urllib.urlopen(url), 'shift_jis')): result_list.append(result) for url in iter_loto6_old_url(html): url = urlparse.urljoin(index_url, url) for result in iter_loto6_old_result( _get_html(urllib.urlopen(url), 'shift_jis')): result_list.append(result) result = {} for r in result_list: result[r[u'count']] = r pickle.dump( result, open(path, 'wb'), pickle.HIGHEST_PROTOCOL) if __name__ == '__main__': test()
test é¢æ°ãå®è¡ããã¨ãã¨ãããã result.pickle ãã¡ã¤ã«ã«çµæã®è¾æ¸ãä¿åãããèªã¿åºãã¨ããã®ãããªæãã numbers ãæ¬çªå·ã bonus_number ããã¼ãã¹çªå·ã
å¤ããã®ã«ã¯å½ããéé¡ãã販売å®ç¸¾é¡ããã£ãªã¼ãªã¼ãã¼ã®æ å ±ã¯ä»å±ãã¦ããªãã
>>> from pickle import load >>> r = load(open(u'result.pickle', 'rb')) >>> r[469] {'count': 469, 'prize': (200000000, 14546600, 633700, 10200, 1000), 'items': (1, 22, 606, 33186, 502816), 'carry_over': 155586703, 'bonus_number': 28, 'numbers' : (10, 12, 20, 35, 40, 43), 'date': datetime.date(2009, 10, 29), 'total': 422493 8000L} >>> r[1] {'date': datetime.date(2000, 10, 5), 'count': 1, 'numbers': (2, 8, 10, 13, 27, 3 0), 'bonus_number': 39}
ãã® Python è¾æ¸ã csv, json, sqlite3 ãã¼ã¿ãã¼ã¹ãªã©ã«å¤æããã°ãä»ã®ãã¼ã«ãè¨èªã§ãèªãããã¼ã¿ãä½ããã¨ãã§ãããå¤æç¨é¢æ°ã¯ãããã«ãã¦ã 20 è¡ç¨åº¦ã§æ¸ããã
def csv_dump(): import pickle import csv result = pickle.load(open(u'result.pickle', 'rb')) with open(u'result.csv', 'wb') as f: c = csv.writer(f) c.writerow([ u'count', u'date', u'number1', u'number2', u'number3', u'number4', u'number5', u'number6', u'bonus_number']) for key in sorted(result.keys()): r = result[key] row = [] row.append(r[u'count']) row.append(unicode(r[u'date'])) row.extend(r[u'numbers']) row.append(r[u'bonus_number']) c.writerow(row) def json_dump(): import pickle import json import datetime class DateEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, datetime.date): return str(obj) result = pickle.load(open(u'result.pickle', 'rb')) json.dump( result, open(u'result.json', 'wb'), sort_keys=True, indent=4, cls=DateEncoder) def sqlite3_dump(): import pickle import sqlite3 result = pickle.load(open(u'result.pickle', 'rb')) con = sqlite3.connect( u'result.sqlite3', detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES) cur = con.cursor() cur.execute("""CREATE TABLE loto6 (count INTEGER, date TEXT, number1 INTEGER, number2 INTEGER, number3 INTEGER, number4 INTEGER, number5 INTEGER, number6 INTEGER, bonus_number INTEGER )""") for key in sorted(result.keys()): r = result[key] row = [] row.append(r[u'count']) row.append(unicode(r[u'date'])) row.extend(r[u'numbers']) row.append(r[u'bonus_number']) cur.execute("""INSERT INTO loto6 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""", row) con.commit()
æ¸ãåºãããã®ã Python ã§å度èªã¿åºãã¨ãã¯ããã
def csv_load(): import csv import datetime import time result = {} with open(u'result.csv', 'rb') as f: c = csv.DictReader(f) for row in c: d = {} d[u'count'] = int(row[u'count']) d[u'date'] = datetime.date( *time.strptime(row[u'date'], u'%Y-%m-%d')[0:3]) d[u'numbers'] = tuple(int(row[key]) for key in ( u'number%d' % i for i in range(1, 7))) d[u'bonus_number'] = int(row[u'count']) result[d[u'count']] = d print result def json_load(): import json import datetime import time result = json.load(open(u'result.json', 'rb')) for key in result: d = result[key] d[u'date'] = datetime.date( *time.strptime(d[u'date'], u'%Y-%m-%d')[0:3]) d[u'numbers'] = tuple(d[u'numbers']) print result def sqlite3_load(): import sqlite3 con = sqlite3.connect( u'result.sqlite3', detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES) cur = con.cursor() cur.execute("""SELECT count, date as "date [DATE]", number1, number2, number3, number4, number5, number6, bonus_number FROM loto6""") result = {} for row in cur: d = {} d[u'count'] = row[0] d[u'date'] = row[1] d[u'numbers'] = row[2:8] d[u'bonus_number'] = row[8] result[d[u'count']] = d print result
loto6latest.py
ææ°ã®çµæãåå¾ï¼è¡¨ç¤ºããæ©è½ããªããªã£ã¦ããã®ã§ãåä½æã
# coding: utf-8 import urllib import urlparse from loto6 import iter_loto6_result def loto6_latest_result(): base_url = u'http://www.takarakuji.mizuhobank.co.jp/miniloto/' latest_url = urlparse.urljoin(base_url, u'lt6-new.html') html = urllib.urlopen(latest_url).read().decode('shift_jis') result = max( iter_loto6_result(html), key=lambda d: d[u'count']) print u'第%då' % result[u'count'] print result[u'numbers'], result[u'bonus_number'] if __name__ == '__main__': loto6_latest_result()
åºåçµæã
第469å (10, 12, 20, 35, 40, 43) 28