This repository has been archived by the owner on Apr 16, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathsteam_discounts
executable file
·208 lines (170 loc) · 5.92 KB
/
steam_discounts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Author: Ricardo Garcia
# Code released to the public domain.
#
import HTMLParser
import htmlentitydefs
import itertools
import re
class Entry(object):
def __init__(self):
self.title = u''
self.orig_price = u''
self.discount = u''
self.price = u''
self.url = u''
# Entries can be sorted by title.
def __cmp__(self, other):
return cmp(self.title.lower(), other.title.lower())
# Entries can be hashed by URL.
def __hash__(self):
return hash(self.url)
class DiscountsParser(HTMLParser.HTMLParser):
def __init__(self):
self.url_re_ = re.compile(
r'^(http://store\.steampowered\.com/(?:app|sub)/\d+/)')
HTMLParser.HTMLParser.__init__(self)
@staticmethod
def entity2uni(name):
return unichr(htmlentitydefs.name2codepoint[name])
@staticmethod
def ref2uni(ref):
return unichr(int(ref))
def reset(self):
HTMLParser.HTMLParser.reset(self)
self.current_entry_ = None
self.entries_ = []
self.in_h4_ = False
self.in_discount_pct_ = False
self.in_tab_price_ = False
self.in_strike_ = False
def handle_starttag(self, tag, attrs):
attrs_map = dict(attrs)
if tag == 'a':
match = self.url_re_.search(attrs_map.get('href', ''))
if match is not None:
# First field to extract, hence new entry.
self.current_entry_ = Entry()
self.current_entry_.url += match.group(1)
if tag == 'h4':
self.in_h4_ = True
elif tag == 'div':
if attrs_map.get('class', '') == 'tab_discount discount_pct':
self.in_discount_pct_ = True
elif attrs_map.get('class', '') == 'tab_price':
self.in_tab_price_ = True
elif tag == 'strike':
self.in_strike_ = True
def handle_endtag(self, tag):
if tag == 'h4':
self.in_h4_ = False
elif tag == 'div':
if self.in_discount_pct_:
self.in_discount_pct_ = False
elif self.in_tab_price_:
self.in_tab_price_ = False
# This was the last field to extract.
self.entries_.append(self.current_entry_)
elif tag == 'strike':
self.in_strike_ = False
def append_text(self, text):
if self.in_h4_:
self.current_entry_.title += text
elif self.in_discount_pct_:
self.current_entry_.discount += text
elif self.in_strike_:
self.current_entry_.orig_price += text
elif self.in_tab_price_:
# Note we only enter here if not in <strike>.
self.current_entry_.price += text
def handle_data(self, data):
self.append_text(data.strip())
def handle_entityref(self, name):
self.append_text(self.entity2uni(name))
def handle_charref(self, ref):
self.append_text(self.ref2uni(ref))
# Behave like a sequence of Entries.
def __len__(self):
return self.entries_.__len__()
def __getitem__(self, key):
return self.entries_.__getitem__(key)
def __iter__(self):
return self.entries_.__iter__()
def __reversed__(self):
return self.entries_.__reversed__()
def __contains__(self, item):
return self.entries_.__contains__(item)
# Program entry point.
if __name__ == '__main__':
# Extra imports
import os
import re
import subprocess
import sys
import urllib
# Constants.
TITLE_WIDTH = 40
MAX_BATCH_SIZE = 10
# Get the number of discounts first.
conn = urllib.urlopen('http://store.steampowered.com/search/?specials=1')
page = conn.read()
conn.close()
mo = re.search(r'showing.*of +(\d+)', page)
if mo is not None:
max_discounts = int(mo.group(1))
else:
sys.exit('ERROR: unable to obtain the number of discounts')
# Retrieve the discounts.
obtained = 0
remaining = max_discounts
batches = []
while remaining > 0:
# Get batch page.
batch_size = min(remaining, MAX_BATCH_SIZE)
discounts_url = \
'http://store.steampowered.com/search/tab?bHoverEnabled=true&' + \
'style=&navcontext=1_4_4_&tab=Discounts&start=%d&count=%d' % \
(obtained, batch_size)
conn = urllib.urlopen(discounts_url)
page = conn.read()
encoding = conn.headers.getparam('charset')
conn.close()
page = page.decode(encoding)
# Debug.
#file('page%s.html' % (obtained, ), "w").write(page.encode('utf-8'))
# Parse discounts.
discounts_parser = DiscountsParser()
discounts_parser.feed(page)
# Compose output.
batches.append(discounts_parser)
# Update data
received_size = len(discounts_parser)
obtained += received_size
remaining -= received_size
# The server sometimes "lies" about the exact number of discounts.
if received_size < batch_size:
break
# Compose output.
entries = sorted(list(set(itertools.chain(*batches))))
output = 'Listing %d discounts.\n' % (len(entries), )
for entry in entries:
output += (u'%s ...%s %7s [%12s] -- %s\n' %
(
entry.title[:TITLE_WIDTH],
'.' * (TITLE_WIDTH - len(entry.title)),
entry.price,
u'%7s %s' % (entry.orig_price, entry.discount),
entry.url,
)
).encode('utf-8')
# Print output, paging it if appropriate.
if sys.stdout.isatty():
cmd = os.getenv('PAGER', default='less')
pager = subprocess.Popen(cmd.split(), stdin=subprocess.PIPE)
pager.stdin.write(output)
pager.stdin.close()
pager.wait()
else:
sys.stdout.write(output)