forked from sdiehl/kaylee
-
Notifications
You must be signed in to change notification settings - Fork 0
/
example.py
45 lines (35 loc) · 925 Bytes
/
example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import time
import numpy
import mmap
from itertools import count
from kaylee import Server
# Note, we never load the whole file into memory.
f = open('mobydick.txt')
mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
# This just enumerates all lines in the file, but is able to
# get data from disk into ZeroMQ much faster than read/writes.
def datafn():
i = count(0)
total = mm.size()
while mm.tell() < total:
yield next(i), mm.readline()
mm.close()
# map :: (k1,v1) -> [ (k2, v2) ]
def mapfn(k1, v):
for w in v.bytes.split():
yield w, 1
# reduce :: (k2, [v2]) -> [ (k3, v3) ]
def reducefn(k2, v):
return sum(v)
# Server
s = Server()
s.connect()
s.mapfn = mapfn
s.reducefn = reducefn
s.datafn = datafn
start = time.time()
s.start()
stop = time.time()
print stop-start
#print s.results()
print sorted(s.results().iteritems(), key=lambda x: x[1], reverse=True)[1:25]