WEB+DBãã¬ã¹ã®ã[éç¿]ã¬ã³ã¡ã³ãã¨ã³ã¸ã³ãã®ãµã³ãã«ããã°ã©ã ãè¨æ£ãã¦ã¿ã
ããªãã¡ã¼ãã¤ã³ãã©ã¹ãã©ã¯ãã£ã¼ã®id:tkngããã¨岡野原さんãWEB+DBãã¬ã¹vol.49ã«ã[éç¿]ã¬ã³ã¡ã³ãã¨ã³ã¸ã³ãã¨ããè¨äºãæ¸ããã¦ãã¾ãã
ã¬ã³ã¡ã³ãã¨ã³ã¸ã³ã«ã¯åã ããèå³ãæã£ã¦ããã®ã§ãæ©éãµã³ãã«ã³ã¼ããèªåã§ãæ¸ãã¦ã¿ãããã¨æãèªé¢ã®ã½ã¼ã¹ãXCodeã«æã¡è¾¼ãã§åããã¦ããã®ã§ãããã³ã³ãã¤ã«ãéããªãã
ãªã«ã¶ãC++ãªãã¦ã»ã¨ãã©æ¸ãããã¨ããªãã®ã§ãèªåã®ããæ¹ãæªãã®ãã¨çã£ãã®ã§ããã代å
¥ããå¤æ°ãééã£ã¦ããããã¹ã¹ãã«ããã£ãããªã©ãããããããã¯èªé¢ã®ã½ã¼ã¹ãééã£ã¦ããã®ã ããã¨ããçµè«ã«è³ãã¾ããã
ä¾ãã°ã»ã»ã»
p130 ãªã¹ãï¼ãã
#include <vector> typedef vector<pair<int, int> > SparseDoc;Svec; // âã³ã³ãã¤ã«ã¨ããã¾ããã typedef SparseVec::iterator Svec::iterator sit; // âã³ã³ãã¤ã«ã¨ããã¾ããã
p130 ãªã¹ã3ãã
float calcInp(sparceVec& Svec& il1, sparceVec& SVec& il2) { // âã³ã³ãã¤ã«ã¨ããã¾ãããããã«typedefã§ã¯SparseVecã¨ãªã£ã¦ããã¯ããªã®ã«sparceVecã§ãã ... }
ãªã©ãªã©ã
åã®ããã«ãèªåã®ããæ¹ãæªãããããªããã¨çã£ã¦è¦å´ãã¦ãã人ãããã®ã§ã¯ãªããã¨æããä¿®æ£ããã½ã¼ã¹ã³ã¼ãããããã¦ããã¾ãã
ã¡ãªã¿ã«ãå 容ã¨ãã¦ã¯ä»¥ä¸ã®ãããªç¸¦è»¸ã«ã¦ã¼ã¶çªå·ã横軸ã«ã¢ã¤ãã çªå·ãåããå¤ã¨ãã¦ã¦ã¼ã¶ã®ã¢ã¤ãã ã«å¯¾ããè©ä¾¡ç¹ãæã£ãè¡åãä¸ããããã¨ãã«ãã¢ã¤ãã éã®é¡ä¼¼åº¦ãè¨ç®ããã¨ãããã®ã§ãã
ã¦ã¼ã¶çªå·\ã¢ã¤ãã çªå· | 0 | 1 | 2 | 3 | 4 |
---|---|---|---|---|---|
0 | 1 | 3 | |||
1 | 2 | 3 | |||
2 | 3 | 5 | |||
3 | 2 | 2 | |||
4 | 3 | 4 |
以ä¸ãä¿®æ£ããã½ã¼ã¹ã³ã¼ããWEB+DBãã¬ã¹vol.49ã®130ãã¼ã¸ã131ãã¼ã¸ããããä¿®æ£ã®å¯¾è±¡ã§ãã¢ã¤ãã çªå·0ã«å¯¾ããåã¢ã¤ãã ã®é¡ä¼¼åº¦ãè¨ç®ãã¦åºåããç®æãmain()é¢æ°ã¨ãã¦è¿½å ãã¦ãã¾ãã
[main.cpp]
#include <iostream> #include <cmath> #include <vector> using namespace std; typedef vector< pair<int,int> > SparseVec; typedef SparseVec::iterator sit; // åã¦ã¼ã¶ãã¨ã«ã¬ãã¥ã¼ããã¢ã¤ãã çªå·ã¨ãã®çµæãè¨é²ããçµæ // idãæé ã§ä¸¦ãã§ãã vector<SparseVec> userLog; // åã¢ã¤ãã ãã¨ã«ã¬ãã¥ã¼ãããã¦ã¼ã¶çªå·ã¨ãã®çµæãè¨é²ããçµæ vector<SparseVec> itemLog; // il1ã¨il2ã®å ç©ãè¨ç®ãã float calcInp (SparseVec& il1, SparseVec& il2) { sit it1 = il1.begin(); sit it2 = il2.begin(); int inp = 0; while (it1 != il1.end() && it2 != il2.end()) { int id1 = it1->first; int id2 = it2->first; if (id1 < id2) ++it1; else if (id1 > id2) ++it2; else { inp += it1->second*it2->second; ++it1; ++it2; } } return inp; } // åãã¯ãã«ã®é·ãã¯åãã£ã¦è¨ç®ãã¦ãã vector<float> norms; void calcNorms () { for (size_t i = 0; i < itemLog.size(); i++) { float norm = sqrt(calcInp(itemLog[i], itemLog[i])); norms.push_back(norm); } } // içªç®ã®ã¢ã¤ãã ã¨jçªç®ã®ã¢ã¤ãã éã®ã³ãµã¤ã³é¡ä¼¼åº¦ãè¨ç®ãã float calcCosSim (int i, int j) { float inp = calcInp(itemLog[i], itemLog[j]); return inp / norms[i] / norms[j]; } // ã¢ã¤ãã iã¨ä»ã®ãã¹ã¦ã®ã¢ã¤ãã ã®ã³ãµã¤ã³é¡ä¼¼åº¦ãã¾ã¨ãã¦è¨ç®ãã¦retã«çµæãè¿ã void calcMatchNum (int i, vector<float>& ret) { ret.resize(itemLog.size()); SparseVec& sv(itemLog[i]); for (sit it = sv.begin(); it != sv.end(); it++) { // it->firstãè³¼å ¥ããã¦ã¼ã¶ãåæãã SparseVec sv2(userLog[it->first]); int val = it->second; for (sit it2 = sv2.begin(); it2 != sv2.end(); ++it2) { ret[it2->first] += val * it2->second; } } // çµæã®æ£è¦åãè¡ã for (size_t j = 0; j < itemLog.size(); j++) { ret[j] /= (norms[i] * norms[j]); } // ret[j]ã«ã¯calcCosSim(i,j)ã¨åãçµæãå ¥ã£ã¦ãã } int main (int argc, char * const argv[]) { // itemLogãåæå itemLog.resize(5); itemLog[0].push_back(make_pair(2,3)); itemLog[0].push_back(make_pair(4,3)); itemLog[1].push_back(make_pair(0,1)); itemLog[1].push_back(make_pair(1,2)); itemLog[1].push_back(make_pair(3,2)); itemLog[2].push_back(make_pair(2,5)); itemLog[2].push_back(make_pair(3,2)); itemLog[3].push_back(make_pair(4,4)); itemLog[4].push_back(make_pair(0,3)); itemLog[4].push_back(make_pair(1,3)); // userLogãåæå userLog.resize(5); userLog[0].push_back(make_pair(1,1)); userLog[0].push_back(make_pair(4,3)); userLog[1].push_back(make_pair(1,2)); userLog[1].push_back(make_pair(4,3)); userLog[2].push_back(make_pair(0,3)); userLog[2].push_back(make_pair(2,5)); userLog[3].push_back(make_pair(1,2)); userLog[3].push_back(make_pair(2,2)); userLog[4].push_back(make_pair(0,3)); userLog[4].push_back(make_pair(3,4)); // åãã¯ãã«ã®é·ããåæå calcNorms(); // ããã¢ã¤ãã ã¨ä»ã®ã¢ã¤ãã ã¨ã®é¡ä¼¼åº¦ãããããåå¥ã«æ±ãã cout << "calcCosSim" << endl; for (size_t i = 0; i < itemLog.size(); i++) { cout << "itemNo:" << i << " similarity:" << calcCosSim(0,i) << endl; } // ããã¢ã¤ãã ã¨ä»ã®ã¢ã¤ãã ã¨ã®é¡ä¼¼åº¦ããã¹ã¦ã¾ã¨ãã¦æ±ãã cout << "calcMatchNum" << endl; vector<float> cosSimVec; calcMatchNum(0, cosSimVec); for (size_t i = 0; i < itemLog.size(); i++) { cout << "itemNo:" << i << " similarity:" << cosSimVec[i] << endl; } return 0; }
ã¡ãªã¿ã«å®è¡ããçµæã¯ä»¥ä¸ã®éãã
calcCosSim
itemNo:0 similarity:1
itemNo:1 similarity:0
itemNo:2 similarity:0.656532
itemNo:3 similarity:0.707107
itemNo:4 similarity:0
calcMatchNum
itemNo:0 similarity:1
itemNo:1 similarity:0
itemNo:2 similarity:0.656532
itemNo:3 similarity:0.707107
itemNo:4 similarity:0
ã¨ãªããcalcCosSimã§ãcalcMatchNumã§ãåæ§ã®çµæãå¾ããã¦ãããã¨ãåããã¾ãã
ã¾ãã¢ã¤ãã 0ã¨é¡ä¼¼åº¦ãä¸çªé«ãã®ã¯ã¢ã¤ãã 3ã ã¨ãããã¨ãåããã¾ãã
ãªãã ãæã足åãã¿ããã«ãªã£ã¦ãã¾ãã¾ãããããã®è¨äºèªä½ã¯ã¬ã³ã¡ã³ãã®ã¢ã«ã´ãªãºã ãå®éã®ã½ã¼ã¹ã³ã¼ããæ°å¼ãªã©ã交ããªãã説æããæ°å°ãªãä¸è¬ç´ã§ã®è¨äºã ã¨æãã¾ãããLSHãSVDãRBMã¨ãã£ãä¸æ©è¸ã¿è¾¼ãã ã¬ã³ã¡ã³ãã®ææ³ãç´¹ä»ããã¦ãã¦é常ã«é¢ç½ãã§ããã¨ã£ã¦ããã°ãããè¨äºã§ãã®ã§ãä¸ã®ã³ã¼ãã¨ããåèã«ããå¤ãã®äººãç解ãæ·±ãã¦ãããã¨ãããªãã¨æãã¾ããä¿®æ£ãããå³æ¸å¸ã¨ããããããããããªããã
id:tkngããã岡éåããããã°ãããè¨äºãã©ããæãé£ããããã¾ãï¼