|
2 | 2 |
|
3 | 3 | """ |
4 | 4 | This is a utility for getting the friend-of-a-friend network for a |
5 | | -given twitter user. The network is expressed as tuples of user identifiers for |
6 | | -the user and their friend (who they follow). |
| 5 | +given twitter user. It writes a sqlite database as it collects the data |
| 6 | +{user-id}.sqlite and once complete it exports that data to two csv files: |
7 | 7 |
|
8 | | -User identifiers are used rather than the handles or screen_name, since the |
9 | | -handles can change, and Twitter's API allows you to get friends as ids much |
10 | | -faster. |
| 8 | +* {user-id}.csv - the user id links |
| 9 | +* {user-id}-users.csv - metadata about the users keyed off their user id |
11 | 10 |
|
12 | | -You can of course turn the IDs back into usernames later if you want using |
13 | | -twarc. |
14 | 11 | """ |
15 | 12 |
|
16 | 13 | import re |
|
30 | 27 | format="%(asctime)s %(levelname)s %(message)s" |
31 | 28 | ) |
32 | 29 |
|
33 | | -parser = argparse.ArgumentParser("tweet.py") |
34 | | -parser.add_argument("user", action="store", help="user_id") |
35 | | -parser.add_argument("--level", type=int, action="store", default=2, |
36 | | - help="how far out into the social graph to follow") |
37 | | - |
38 | | -args = parser.parse_args() |
39 | | - |
40 | | -# create twarc instance for querying Twitter |
41 | | -t = twarc.Twarc() |
42 | | - |
43 | | -# setup sqlite db for storing information as it is collected |
44 | | -db = sqlite3.connect('foaf.sqlite3') |
45 | | - |
46 | | -db.execute( |
47 | | - ''' |
48 | | - CREATE TABLE IF NOT EXISTS friends ( |
49 | | - user_id INT, |
50 | | - friend_id INT, |
51 | | - PRIMARY KEY (user_id, friend_id) |
52 | | - ) |
53 | | - ''' |
54 | | -) |
55 | | - |
56 | | -db.execute( |
57 | | - ''' |
58 | | - CREATE TABLE IF NOT EXISTS users ( |
59 | | - user_id INT, |
60 | | - screen_name TEXT, |
61 | | - name TEXT, |
62 | | - description TEXT, |
63 | | - location TEXT, |
64 | | - created TEXT, |
65 | | - statuses INT, |
66 | | - verified TEXT, |
67 | | - PRIMARY KEY (user_id) |
68 | | - ) |
69 | | - ''' |
70 | | -) |
71 | | - |
72 | 30 |
|
73 | 31 | def friendships(user_id, level=2): |
74 | 32 | """ |
@@ -164,33 +122,65 @@ def add_user(u): |
164 | 122 | ) |
165 | 123 | db.commit() |
166 | 124 |
|
| 125 | +# get command line arguments |
| 126 | +parser = argparse.ArgumentParser("tweet.py") |
| 127 | +parser.add_argument("user", action="store", help="user_id") |
| 128 | +parser.add_argument("--level", type=int, action="store", default=2, |
| 129 | + help="how far out into the social graph to follow") |
| 130 | +args = parser.parse_args() |
167 | 131 |
|
168 | | -# lookup friendship data |
| 132 | +# create twarc instance for querying Twitter |
| 133 | +t = twarc.Twarc() |
| 134 | + |
| 135 | +# get the seed user_id, potentially from their screen name |
169 | 136 | if re.match("^\d+$", args.user): |
170 | 137 | seed_user_id = args.user |
171 | 138 | else: |
172 | 139 | seed_user_id = next(t.user_lookup([args.user]))['id_str'] |
173 | 140 |
|
174 | | -""" |
| 141 | +# setup sqlite db for storing information as it is collected |
| 142 | +db = sqlite3.connect(f'{seed_user_id}.sqlite3') |
| 143 | +db.execute( |
| 144 | + ''' |
| 145 | + CREATE TABLE IF NOT EXISTS friends ( |
| 146 | + user_id INT, |
| 147 | + friend_id INT, |
| 148 | + PRIMARY KEY (user_id, friend_id) |
| 149 | + ) |
| 150 | + ''' |
| 151 | +) |
| 152 | +db.execute( |
| 153 | + ''' |
| 154 | + CREATE TABLE IF NOT EXISTS users ( |
| 155 | + user_id INT, |
| 156 | + screen_name TEXT, |
| 157 | + name TEXT, |
| 158 | + description TEXT, |
| 159 | + location TEXT, |
| 160 | + created TEXT, |
| 161 | + statuses INT, |
| 162 | + verified TEXT, |
| 163 | + PRIMARY KEY (user_id) |
| 164 | + ) |
| 165 | + ''' |
| 166 | +) |
| 167 | + |
| 168 | +# lookup friendship data |
175 | 169 | for friendship in friendships(seed_user_id, args.level): |
176 | 170 | print("%s,%s" % friendship) |
177 | | -""" |
178 | 171 |
|
179 | 172 | # lookup user metadata |
180 | 173 | for user in t.user_lookup(user_ids()): |
181 | 174 | add_user(user) |
182 | 175 |
|
183 | 176 | # write out friendships |
184 | | - |
185 | 177 | with open('{}.csv'.format(seed_user_id), 'w') as fh: |
186 | 178 | w = csv.writer(fh) |
187 | 179 | w.writerow(['user_id', 'friend_user_id']) |
188 | 180 | for row in db.execute('SELECT * FROM friends'): |
189 | 181 | w.writerow(row) |
190 | 182 |
|
191 | | - |
192 | 183 | # write out user data as csv |
193 | | - |
194 | 184 | with open('{}-users.csv'.format(seed_user_id), 'w') as fh: |
195 | 185 | w = csv.writer(fh) |
196 | 186 | w.writerow([ |
|
0 commit comments