Skip to content

Commit 708c551

Browse files
committed
foaf.py fixes
The code to actually walk the network was commented out for some reason, perhaps for debugging at some point. That has been fixed, and the internal documentation has been updated to better reflect what is going on. refs #392
1 parent 596f429 commit 708c551

File tree

1 file changed

+42
-52
lines changed

1 file changed

+42
-52
lines changed

utils/foaf.py

Lines changed: 42 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,12 @@
22

33
"""
44
This is a utility for getting the friend-of-a-friend network for a
5-
given twitter user. The network is expressed as tuples of user identifiers for
6-
the user and their friend (who they follow).
5+
given twitter user. It writes a sqlite database as it collects the data
6+
{user-id}.sqlite and once complete it exports that data to two csv files:
77
8-
User identifiers are used rather than the handles or screen_name, since the
9-
handles can change, and Twitter's API allows you to get friends as ids much
10-
faster.
8+
* {user-id}.csv - the user id links
9+
* {user-id}-users.csv - metadata about the users keyed off their user id
1110
12-
You can of course turn the IDs back into usernames later if you want using
13-
twarc.
1411
"""
1512

1613
import re
@@ -30,45 +27,6 @@
3027
format="%(asctime)s %(levelname)s %(message)s"
3128
)
3229

33-
parser = argparse.ArgumentParser("tweet.py")
34-
parser.add_argument("user", action="store", help="user_id")
35-
parser.add_argument("--level", type=int, action="store", default=2,
36-
help="how far out into the social graph to follow")
37-
38-
args = parser.parse_args()
39-
40-
# create twarc instance for querying Twitter
41-
t = twarc.Twarc()
42-
43-
# setup sqlite db for storing information as it is collected
44-
db = sqlite3.connect('foaf.sqlite3')
45-
46-
db.execute(
47-
'''
48-
CREATE TABLE IF NOT EXISTS friends (
49-
user_id INT,
50-
friend_id INT,
51-
PRIMARY KEY (user_id, friend_id)
52-
)
53-
'''
54-
)
55-
56-
db.execute(
57-
'''
58-
CREATE TABLE IF NOT EXISTS users (
59-
user_id INT,
60-
screen_name TEXT,
61-
name TEXT,
62-
description TEXT,
63-
location TEXT,
64-
created TEXT,
65-
statuses INT,
66-
verified TEXT,
67-
PRIMARY KEY (user_id)
68-
)
69-
'''
70-
)
71-
7230

7331
def friendships(user_id, level=2):
7432
"""
@@ -164,33 +122,65 @@ def add_user(u):
164122
)
165123
db.commit()
166124

125+
# get command line arguments
126+
parser = argparse.ArgumentParser("tweet.py")
127+
parser.add_argument("user", action="store", help="user_id")
128+
parser.add_argument("--level", type=int, action="store", default=2,
129+
help="how far out into the social graph to follow")
130+
args = parser.parse_args()
167131

168-
# lookup friendship data
132+
# create twarc instance for querying Twitter
133+
t = twarc.Twarc()
134+
135+
# get the seed user_id, potentially from their screen name
169136
if re.match("^\d+$", args.user):
170137
seed_user_id = args.user
171138
else:
172139
seed_user_id = next(t.user_lookup([args.user]))['id_str']
173140

174-
"""
141+
# setup sqlite db for storing information as it is collected
142+
db = sqlite3.connect(f'{seed_user_id}.sqlite3')
143+
db.execute(
144+
'''
145+
CREATE TABLE IF NOT EXISTS friends (
146+
user_id INT,
147+
friend_id INT,
148+
PRIMARY KEY (user_id, friend_id)
149+
)
150+
'''
151+
)
152+
db.execute(
153+
'''
154+
CREATE TABLE IF NOT EXISTS users (
155+
user_id INT,
156+
screen_name TEXT,
157+
name TEXT,
158+
description TEXT,
159+
location TEXT,
160+
created TEXT,
161+
statuses INT,
162+
verified TEXT,
163+
PRIMARY KEY (user_id)
164+
)
165+
'''
166+
)
167+
168+
# lookup friendship data
175169
for friendship in friendships(seed_user_id, args.level):
176170
print("%s,%s" % friendship)
177-
"""
178171

179172
# lookup user metadata
180173
for user in t.user_lookup(user_ids()):
181174
add_user(user)
182175

183176
# write out friendships
184-
185177
with open('{}.csv'.format(seed_user_id), 'w') as fh:
186178
w = csv.writer(fh)
187179
w.writerow(['user_id', 'friend_user_id'])
188180
for row in db.execute('SELECT * FROM friends'):
189181
w.writerow(row)
190182

191-
192183
# write out user data as csv
193-
194184
with open('{}-users.csv'.format(seed_user_id), 'w') as fh:
195185
w = csv.writer(fh)
196186
w.writerow([

0 commit comments

Comments
 (0)