Item based collaborative filtering
D. Lemire and A. Maclachlan, "Slope One Predictors for Online Rating-Based Collaborative Filtering", In SIAM Data Mining (SDM'05), Newport Beach, California, April 21-23, 2005.ãèªãã ã¡ã¢ã§ãï¼
ãã®è«æã§ã¯ï¼ããããrecommendationã¢ã«ã´ãªãºã ã¨ãã¦"slope one"ã¨ããæ¹å¼ãææ¡ãã¦ãã¾ããï¼ãã®æ¹å¼ã®èª¬æã¯æ¬¡åè¡ããã¨ã«ãã¦ï¼ä»åã¯related workã§æãããã¦ããï¼B. Sarwar, G. Karypis, J. Konstan and J. Riedl, "Item-based collaborative filtering recommendation algorithms," in Proceedings of the Tenth International Conference on the World Wide Web (WWW 10), pp. 285-295, 2001.ã®ææ³ã«ã¤ãã¦èª¬æãã¾ãï¼(http://www10.org/cdrom/papers/519/)
item-basedã®recommendationã¯ï¼ãªã©ã¤ãªã¼ããåºã¦ãã"Programing Collective Inteligence"ã«ãè¼ã£ã¦ãã¾ããï¼ãã¡ãã®æ¹ãå¤å°å³å¯ã§ãï¼ã¡ãªã¿ã«ãã®æ¬ã¯æè¿ï¼æ¥æ¬èªçãçºå£²ããããããã§ãï¼
item-basedã®æ¹å¼ã§ã¯ï¼ã¢ã¤ãã éã®ç¸é¢ä¿æ°ãè¨ç®ãï¼ãã®ç¸é¢ä¿æ°ãå ã«recommendãè¡ãã¾ãï¼ä¾ãã°ï¼ããã¢ã¤ãã Aã¨Bã®åå¾ãããã¤ã³ãã¯ä»¥ä¸ã®ããã«ãªã£ãã¨ãã¾ãï¼
A | B | |
user1 | 4 | 3 |
user2 | 5 | 3 |
user3 | 3 | 2 |
ãã®ã¨ãï¼ç¸é¢ä¿æ°ã¯
ã¨ãªãã¾ãï¼ããã§ã¯ã¦ã¼ã¶ã®éåã§ï¼ã¯ã¦ã¼ã¶uãã¢ã¤ãã iã«ä¸ããè©ä¾¡ã§ãï¼ãã®ä¾ã®å ´åï¼ç¸é¢ä¿æ°ã¯-1.0ã¨ãªãã¾ãï¼ãã¢ã½ã³ã®ç¸é¢ä¿æ°ã¨ä¼¼ã¦ãã¾ããï¼å¹³åå¤ãï¼ã¢ã¤ãã ã®å¹³åã§ã¯ãªãï¼ã¦ã¼ã¶ã®å¹³åã§ããã¨ããç¹ã«ããã¦éããããã¾ãï¼
ããã«ï¼Aãç¬ç«å¤æ°ï¼Bãå¾å±å¤æ°ã¨ãã¦ï¼å帰ç´ç·ããã¨ãã¾ãï¼å帰ç´ç·ã¨ã¯ï¼
ã§è¡¨ããããããªä¸æ¬¡å¼ã§ããï¼è¿ä¼¼ç´ç·ã§ãããã¾ãï¼ããã§ï¼å帰ä¿æ°a, bã¯æå°äºä¹æ³ã«ããæ±ãããã¨ãåºæ¥ã¦ï¼
ã¨ãªãã¾ãï¼ä¸ã®ä¾ã®å ´åï¼å帰ä¿æ°ã¯a = 0.5, b = 0.66ã¨ãªãã¾ãï¼
ä»ï¼å¥ã®user4ãAã«å¯¾ãã¦ï¼è©ä¾¡3ãã¤ããã¨ãã¾ãï¼ãã®ã¨ãï¼user4ãBã«å¯¾ãã¦ï¼ã©ã®ãããªè©ä¾¡ãä¸ãããï¼item-basedã®æ¹æ³ã§äºæ¸¬ãã¦ã¿ãã¨ï¼
ã¨ãªãï¼user4ã¯Bã«å¯¾ãã¦ï¼è©ä¾¡2.166ãã¤ããã§ãããã¨äºæ¸¬ã§ãã¾ãï¼
item-basedã§ã¯ï¼ãã®ããã«ãã¦ï¼ç¸é¢ä¿æ°ã¨å帰åæãç¨ãã¦ï¼ã¦ã¼ã¶ã®è©ä¾¡ãäºæ¸¬ãã¾ãï¼ã¦ã¼ã¶uã®ã¢ã¤ãã iã«å¯¾ããè©ä¾¡ã®äºæ¸¬å¤ã¯ä»¥ä¸ã®å¼ã§æ±ãããã¾ãï¼
ããã§ï¼iã¯äºæ¸¬ããã¢ã¤ãã ï¼ã¯ä¸è¨ã§èª¬æããã¨ããï¼ã¢ã¤ãã iã¨jã®ç¸é¢ä¿æ°ã表ãã¾ãï¼ã¾ãï¼ã¨ã¯ï¼ã¢ã¤ãã jã®è©ä¾¡ãç¬ç«å¤æ°ï¼iã®è©ä¾¡ãå¾å±å¤æ°ã¨ããã¨ãã®å帰ä¿æ°ã§ãï¼
ãã¨ã¯ï¼ã¦ã¼ã¶uãè©ä¾¡ãã¦ããªãã¢ã¤ãã ã«é¢ãã¦è©ä¾¡å¤ã®äºæ¸¬ãè¡ãï¼äºæ¸¬å¤ã®é«ãã¢ããrecommendããã°ï¼recommendationãã§ãã¾ãï¼
ãããPythonã§æ¸ãã¦ã¿ããï¼ä»¥ä¸ã®ããã«ãªãã¾ãï¼
#!/usr/bin/env python # this program implements item based recommender system # # B. Sarwar, G. Karypis, J. Konstan and J. Riedl, # "Item-based collaborative filtering recommendation algorithms," in # Proceedings of the Tenth International Conference on the World Wide Web # (WWW 10), pp. 285-295, 2001. import math def regress(data): xm = 0.0 ym = 0.0 sx2 = 0.0 sxy = 0.0 i = 0 for x, y in data: i += 1 x -= xm xm += x / i sx2 += (i - 1) * x * x / i y -= ym ym += y / i sxy += (i - 1) * x * y / i try: a = sxy / sx2 except: a = 1.0 return a, ym - a * xm class recommender: def __init__(self, users, items): self._users = users self._items = items def _ave(self): self._ave = {} for i in range(len(self._users)): user = self._users[i] self._ave[i] = sum(user.values()) / float(len(user)) def _corr(self): self._correlations = {} self._regressions = {} ave = {} num = len(self._items) for i in range(num): for j in range(i + 1, num): item1 = self._items[i] item2 = self._items[j] if item1 > item2: tmp = item1 item1 = item2 item2 = tmp r1 = 0.0 r2 = 0.0 r3 = 0.0 for i in range(len(self._users)): user = self._users[i] if user.has_key(item1) and user.has_key(item2): r1 += ((user[item1] - self._ave[i]) * (user[item2] - self._ave[i])) tmp = user[item1] - self._ave[i] r2 += tmp * tmp tmp = user[item2] - self._ave[i] r3 += tmp * tmp try: p = r1 / (math.sqrt(r2) * math.sqrt(r3)) except: p = 0.0 self._correlations[(item1, item2)] = p def _reg(self): num = len(self._items) for i in range(num): for j in range(num): if i != j: item1 = self._items[i] item2 = self._items[j] data = [(x[item1], x[item2]) for x in self._users if x.has_key(item1) and x.has_key(item2)] a, b = regress(data) self._regressions[(item1, item2)] = (a, b) def _predict(self, user, item): if user.has_key(item): return user[item] denomi = sum([abs(self._correlations[k]) for k in self._correlations.keys() if ((k[0] == item and user.has_key(k[1])) or (k[1] == item and user.has_key(k[0])))]) numerator = 0.0 for k in self._correlations.keys(): if ((k[0] == item and user.has_key(k[1])) or (k[1] == item and user.has_key(k[0]))): if k[0] == item: uitem = k[1] key = (k[1], k[0]) else: uitem = k[0] key = k p = (user[uitem] * self._regressions[key][0] + self._regressions[key][1]) * abs(self._correlations[k]) numerator += p try: return numerator / denomi except: return 0.0 def recommends(self, user): self._ave() self._corr() self._reg() items = [item for item in self._items if not user.has_key(item)] result = [] for item in items: p = self._predict(user, item) result.append((item, p)) result.sort(lambda x, y: cmp(y[1], x[1])) return result users = [{'A': 4, 'B': 5, 'C': 2, 'D': 4, 'F': 5}, # user 0 {'A': 2, 'C': 3, 'D': 4, 'E': 3 }, # user 1 {'A': 1, 'B': 4, 'D': 5, 'E': 3, 'F': 4}, # user 2 { 'B': 5, 'E': 2, 'F': 4}, # user 3 { 'B': 3, 'C': 1, 'D': 3, 'F': 3}] # user 4 items = ['A', 'B', 'C', 'D', 'E', 'F'] r = recommender(users, items) print r.recommends(users[3]) # output is # [('A', 4.0), ('D', 3.5), ('C', 2.0)]
ã¡ãªã¿ã«ï¼å帰ä¿æ°ã¯1ãã¹ã§æ±ãããã¨ãã§ããããã§ï¼å帰ä¿æ°ãæ±ããé¨åã¯ä¸è¨URLããæåãã¾ããï¼
Algorithms with Pythonçªå¤ç·¨ï¼çµ±è¨å¦ã®åºç¤ç¥è [3] (http://www.geocities.jp/m_hiroi/light/pystat03.html)