-
Notifications
You must be signed in to change notification settings - Fork 255
/
Copy pathsource.py
executable file
·85 lines (67 loc) · 1.38 KB
/
source.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env python
"""
Util to count which clients are most used.
Example usage:
utils/source.py tweets.jsonl > sources.html
"""
import json
import fileinput
from collections import defaultdict
summary = defaultdict(int)
for line in fileinput.input():
tweet = json.loads(line)
source = tweet["source"]
summary[source] += 1
sumsort = sorted(summary, key=summary.get, reverse=True)
print(
"""<!doctype html>
<html>
<head>
<meta charset="utf-8">
<title>Twitter client sources</title>
<style>
body {
font-family: Arial, Helvetica, sans-serif;
font-size: 12pt;
margin-left: auto;
margin-right: auto;
width: 95%;
}
footer#page {
margin-top: 15px;
clear: both;
width: 100%;
text-align: center;
font-size: 20pt;
font-weight: heavy;
}
header {
text-align: center;
margin-bottom: 20px;
}
</style>
</head>
<body>
<header>
<h1>Twitter client sources</h1>
<em>created on the command line with <a href="https://github.com/DocNow/twarc">twarc</a></em>
</header>
<table>
"""
)
for source in sumsort:
print("<tr><td>{}</td><td>{}</td></tr>".format(source, summary[source]))
print(
"""
</table>
<footer id="page">
<hr>
<br>
created on the command line with <a href="https://github.com/DocNow/twarc">twarc</a>.
<br>
<br>
</footer>
</body>
</html>"""
)
# End of file