Slope One Predictors in Python
Slope One Predictors for Online Rating-Based Collaborative Filteringãèªãã ã¡ã¢1 (http://d.hatena.ne.jp/ytakano/20081002/1222970856)ã®ç¶ãã§ï¼D. Lemire and A. Maclachlan, "Slope One Predictors for Online Rating-Based Collaborative Filtering", In SIAM Data Mining (SDM'05), Newport Beach, California, April 21-23, 2005.ã®ã¡ã¢ã§ã
slope oneã¯è±èªçã®Wikipediaã«ãå¤å°è¼ã£ã¦ãã¾ããï¼è±èªãèªãããªãï¼å ã®è«æãåç §ããæ¹ãããã§ãããï¼(http://en.wikipedia.org/wiki/Slope_One)
slope oneã§ã¯ï¼ã¦ã¼ã¶ãã¤ããã¢ã¤ãã éã®è©ä¾¡ã®å¹³ååå·®ããï¼ããã¢ã¤ãã ã®è©ä¾¡å¤ãäºæ¸¬ãã¾ãï¼ãã¾ï¼ä¸è¨ã®ãããªè©ä¾¡ããã£ãã¨ãã¾ãï¼
A | B | |
user1 | 3 | 4 |
user2 | 4 |
ããã§ã¯ï¼user2ã¯ï¼ã¾ã ï¼ã¢ã¤ãã Bã«è©ä¾¡ãä¸ãã¦ãã¾ããï¼ãããï¼user1ãã¢ã¤ãã Aã«3ï¼ã¢ã¤ãã Bã«4ã®è©ä¾¡ãã¤ãã¦ããï¼ãã®å·®ã¯1ã§ããã®ã§ï¼user2ã¯ã¢ã¤ãã Bã«è©ä¾¡5ãã¤ããã§ãããã¨äºæ¸¬ã§ãã¾ãï¼slope oneã§ã¯ãã®ããã«ãã¦ï¼ã¦ã¼ã¶ãã¤ããã¢ã¤ãã éã®è©ä¾¡ãå©ç¨ãã¦ï¼äºæ¸¬ãè¡ãã¾ãï¼
ããã§ï¼ããã¢ã¤ãã jã¨iã®å¹³ååå·®ã以ä¸ã®ããã«å®ç¾©ãã¾ãï¼
ãã ãï¼ãã¢ã¤ãã jã¨i両æ¹ã«è©ä¾¡ãä¸ããã¦ã¼ã¶ã®éåï¼
ãã¦ã¼ã¶uã®ã¢ã¤ãã iã«å¯¾ããè©ä¾¡ã¨ãã¾ãï¼
ããããã¨ï¼ããã¦ã¼ã¶uã®ã¢ã¤ãã jã«å¯¾ããè©ä¾¡ã¯ä»¥ä¸ã®ããã«ãã¦æ±ã¾ãã¾ãï¼
ã¡ãªã¿ã«ï¼ä¸è¨ã®å¼ã¯ã¦ã¼ã¶uãã¢ã¤ãã jã«è©ä¾¡ãä¸ãã¦ããªãå ´åï¼ä»¥ä¸ã®å¼ã¨ç価ã«ãªãã¾ãï¼
以ä¸ããªãã®ä¿®é£¾ãä»ããªãslope oneæ¹å¼ã®èª¬æã§ããï¼ããããªããï¼å®éåé¡ã¨ãã¦ï¼1ã¦ã¼ã¶ã«ããè©ä¾¡ãä¸ããã¦ããªãã¢ã¤ãã ã¨ï¼1000ã¦ã¼ã¶ã«è©ä¾¡ãä¸ãããã¢ã¤ãã ã§ã¯ï¼ãã®éã¿ã¯éã£ã¦ããã¨èãããã¾ãï¼ããã§ï¼weighted slope oneã§ã¯ï¼æ®éã®slope oneã«ï¼è©ä¾¡ããã¦ã¼ã¶ã®æ°ãéã¿ã¨ãã¦æãåããã¾ãï¼weighted slope oneã®äºæ¸¬é¢æ°ã¯ä»¥ä¸ã®ããã«ãªãã¾ãï¼
ããã§å°ãï¼è©ä¾¡ã¨ããã¢ããèãã¦ã¿ã¾ãï¼ä¾ãã°ï¼user1ãã¢ã¤ãã Aãæªãï¼ã¢ã¤ãã Bãè¯ãã¨è©ä¾¡ããã¨ãã¾ãï¼ããã¾ã§èª¬æããslope oneã®ã¢ã«ã´ãªãºã ã§ã¯ï¼ããããåºå¥ãããã¨ãªãäºæ¸¬ãã¦ãã¾ããï¼ããããªããï¼è¯ãè©ä¾¡ãä¸ãããã¢ã¤ãã Bããæ¨æ¸¬ãã¦ï¼æªãè©ä¾¡ãä¸ãããã¢ã¤ãã Aãæ¨è¦ãã¦ãã¾ããã¨ãèãããã¾ãï¼ããããããããã«ï¼bi-polar slope oneã§ã¯ï¼è¯ãè©ä¾¡ã¨æªãè©ä¾¡ã®ã¢ã¤ãã ãå¥ã ã«åºå¥ãã¦ï¼äºæ¸¬ãè¡ãã¾ãï¼
ã¾ãï¼bi-polar slope oneã§ã¯ï¼è¯ãè©ä¾¡ã¨æªãè©ä¾¡ã«åããå¹³ååå·®ãæ±ãã¾ãï¼è¯ãè©ä¾¡ï¼ããªãã¡è©ä¾¡ãå¹³åå¾ç¹ããä¸ã§ããã¢ã¤ãã ã®å¹³ååå·®ã®å¼ã¯ä»¥ä¸ã¨ãªãã¾ãï¼
ãã ãï¼ã¨ãªãã¾ãï¼
æªãè©ä¾¡ã®å¹³ååå·®ã§ãããå®ç¾©ããã¾ããï¼likeã®å ´åã¨ä¸çå·ãéãªã ããªã®ã§è©³ç´°ã¯çãã¾ãï¼
bi-polar slope oneã§ã¯ä¸è¨ã®ï¼è¯ãè©ä¾¡ã¨æªãè©ä¾¡ã«åãã¦æ±ããå¹³ååå·®ãç¨ãã¦äºæ¸¬ãè¡ãã¾ãï¼ãã®äºæ¸¬é¢æ°ã¯ä»¥ä¸ã®ããã«ãªãã¾ãï¼
æ£ç¢ºãã¯ï¼bi-polar slope one, weighted slope one, slope oneã®é ã«ãããªãï¼å ¨ã¦ã®slope oneã¯item basedãããè¯ããªã£ãããã§ãï¼
ãããpythonã§å®è£ ãã¦ã¿ãã¨ï¼ä»¥ä¸ã®ããã«ãªãã¾ãï¼
#!/usr/bin/env python # this program implements slope one predictors # # D. Lemire and A. Maclachlan, "Slope One Predictors for Online Rating-Based # Collaborative Filtering", In SIAM Data Mining (SDM'05), Newport Beach, # California, April 21-23, 2005. # slope one scheme class slopeone: def __init__(self, users, items): self._users = users self._items = items def _ave(self): self._ave = {} for i in range(len(self._users)): user = self._users[i] self._ave[i] = sum(user.values()) / float(len(user)) def _avedev(self): self._dev = {} num = len(self._items) for i in range(num): for j in range(i + 1, num): item1 = self._items[i] item2 = self._items[j] r = 0.0 n = 0 for k in range(len(self._users)): user = self._users[k] if user.has_key(item1) and user.has_key(item2): r += user[item2] - user[item1] n += 1 if n > 0: r /= float(n) self._dev[(item1, item2)] = (r, n) self._dev[(item2, item1)] = (-r, n) def _predict(self, user, item): if user.has_key(item): return user[item] if len(user) == 0: return 0 r = 0.0 for key in user.keys(): dev = self._dev[(key, item)][0] r += dev + user[key] return r / len(user) def _doFirst(self, user): pass def recommends(self, user): self._doFirst(user) self._ave() self._avedev() items = [item for item in self._items if not user.has_key(item)] result = [] for item in items: p = self._predict(user, item) result.append((item, p)) result.sort(lambda x, y: cmp(y[1], x[1])) return result # wheighted slope one scheme class wslopeone(slopeone): def _predict(self, user, item): if user.has_key(item): return user[item] if len(user) == 0: return 0 r1 = 0.0 r2 = 0.0 for key in user.keys(): dev, n = self._dev[(key, item)] r1 += (dev + user[key]) * n r2 += n try: return r1 / r2 except: return 0 # bi-polar slope one scheme class bpslopeone(slopeone): def __init__(self, users, items): self._users = users self._items = items def _avedev2(self, item1, item2, cmpf): r = 0.0 n = 0 for i in range(len(self._users)): user = self._users[i] if (user.has_key(item1) and user.has_key(item2) and cmpf(user[item1], self._ave[i]) > 0 and cmpf(user[item2], self._ave[i]) > 0): r += user[item2] - user[item1] n += 1 if n > 0: r /= float(n) return r, n def _avedev(self): self._devLike = {} self._devDislike = {} num = len(self._items) for i in range(num): for j in range(i + 1, num): item1 = self._items[i] item2 = self._items[j] r1, n1 = self._avedev2(item1, item2, lambda x, y: cmp(x, y)) r2, n2 = self._avedev2(item1, item2, lambda x, y: cmp(y, x)) self._devLike[(item1, item2)] = (r1, n1) self._devLike[(item2, item1)] = (-r1, n1) self._devDislike[(item1, item2)] = (r2, n2) self._devDislike[(item2, item1)] = (-r2, n2) def _predict(self, user, item): if user.has_key(item): return user[item] if len(user) == 0: return 0 r1 = 0.0 n1 = 0.0 r2 = 0.0 n2 = 0.0 for key in user.keys(): if user[key] > self._uave: dev, n = self._devLike[(key, item)] r1 += (dev + user[key]) * n n1 += n else: dev, n = self._devDislike[(key, item)] r2 += (dev + user[key]) * n n2 += n try: return (r1 + r2) / (n1 + n2) except: return 0 def _doFirst(self, user): self._uave = sum(user.values()) / float(len(user)) users = [{'A': 4, 'B': 5, 'C': 2, 'D': 4, 'F': 5}, # user 0 {'A': 2, 'C': 3, 'D': 4, 'E': 3 }, # user 1 {'A': 1, 'B': 4, 'D': 5, 'E': 3, 'F': 4}, # user 2 { 'B': 5, 'E': 2, 'F': 4}, # user 3 { 'B': 3, 'C': 1, 'D': 3, 'F': 3}] # user 4 items = ['A', 'B', 'C', 'D', 'E', 'F'] s = slopeone(users, items) print s.recommends(users[3]) ws = wslopeone(users, items) print ws.recommends(users[3]) bps = bpslopeone(users, items) print bps.recommends(users[3]) # output is # [('D', 4.166666666666667), ('C', 2.0), ('A', 1.8333333333333333)] # [('D', 4.25), ('C', 2.0), ('A', 1.8333333333333333)] # [('D', 5.0), ('A', 0.0), ('C', 0)]
ãã¼ã¿ãå°ãªãç¶æ³ã ã¨ï¼bi-polar slope oneã¯ã¤ãã¤ãã®ããã§ãï¼