[ããã°ã©ãã³ã°] ããã並åã¢ã«ã´ãªãºã ã使ã£ãç·¨éè·é¢
ãµã¨ãããã並åã¢ã«ã´ãªãºã ã使ã£ãç·¨éè·é¢ãè¨ç®ããã¢ã«ã´ãªãºã ãæ¸ããããªã£ãã®ã§æ¸ãã¦ã¿ãã
ã¾ããé常ã®ç·¨éè·é¢ã§ããLevenshtein Distanceãæ±ããã¢ã«ã´ãªãºã ã¯ä»¥ä¸ã®ããã«æ¸ãã
int levenshteinDistance(String A, String B) { int m = A.length(); int n = B.length(); int dp[] = new int[n + 1]; int next[] = new int[n + 1]; for (int i = 0; i <= n; i++) dp[i] = i; for (int i = 1; i <= m; i++) { next[0] = i; for (int j = 1; j <= n; j++) { if (A.charAt(i - 1) == B.charAt(j - 1)) { next[j] = dp[j - 1]; } else { int r = dp[j - 1]; r = min(r, dp[j]); r = min(r, next[j - 1]); next[j] = r + 1; } } int tmp[] = dp; dp = next; next = tmp; } return dp[n]; }
ãã®ã¢ã«ã´ãªãºã ã®æéè¨ç®éã¯O(nm)ã空éè¨ç®éã¯O(n)ã¨ãªã£ã¦ããã(ããã§n,mã¯ããããæååã®é·ã)
ãã®åé¡ã«å¯¾ãã¦ã1999å¹´ã«Myersãããã並åææ³ãç¨ãã¦O(floor(m/w) n)ã®æ¹æ³ãææ¡ãã[1]ãã¾ã2003å¹´ã«HyyröãO(floor(m/w) d)(dã¯2ã¤ã®æååã®ç·¨éè·é¢)ã®æ¹æ³ãææ¡ãã¦ãã[2]ã
ããã並åææ³ã¯é常ã®CPUã32ãããã64ãããã®æ°å¤ã1å½ä»¤ã§æ±ãããã¨ãå©ç¨ãã¦è¤æ°ã®è¨ç®ãä¸åº¦ã«å®è¡ããææ³ã§ãããç·¨éè·é¢ã®åé¡ã«ããã¦ã¯DPã®åå¤ãåããã³è¡ã1ã¤ããããã¦ããªãã¨ãã«é«ã 1ããããã¦ããªãã¨ãããã¨ãå©ç¨ãã¦ãããã¨ãã§ãããããã«é¢ãã¦ã¯æç®[3]ãªã©ã«è©³ãããã¾ã[4]ã®Hyyröã®è«æãªã¹ãã«ã¯ããã並åææ³ãå©ç¨ããæååç §åã¢ã«ã´ãªãºã ã«é¢ãã¦å¤ãã®è«æãããã
以ä¸ã«æç®[2]ã®Figure 3ãå®è£ ããã³ã¼ãã示ããä¸ã®ã³ã¼ãã§ã¯m <= 64ãä»®å®ãã¦ãããããã«é¢ãã¦ã¯ä¾ãã°ã¹ãã«ãã§ãã¯ãªã©ã®å¿ç¨ã«ããã¦ã¯64æå以ä¸ã§ååãªãã¨ãå¤ããå ´åã«ãã£ã¦ã¯ä»¥ä¸ã®ã³ã¼ãã§ãååã§ããã
void createPM(String query, long PM[]) { for (int i = 0; i < query.length(); i++) { char c = query.charAt(i); PM[c] |= 1l << i; } } int levenshteinDistance(int m, long PMs[], String B) { int D = m; long D0, HP, HN, VP, VN; long top = 1l << (m - 1); VP = ~0; VN = 0; for (char c : B.toCharArray()) { long PM = PMs[c]; D0 = (((PM & VP) + VP) ^ VP) | PM | VN; HP = VN | ~(D0 | VP); HN = D0 & VP; if ((HP & top) != 0) { ++D; } else if ((HN & top) != 0) { --D; } VP = (HN << 1) | ~(D0 | ((HP << 1) | 1)); VN = D0 & ((HP << 1) | 1); } return D; }
å®é¨çµæ
ä»åå®é¨ã¨ãã¦ã©ã³ãã ãªé·ã50ã®æåå1000åã«å¯¾ãã¦ãã©ã³ãã ãªåºå®é·ã®ã¯ã¨ãªã100åçæããç·¨éè·é¢ã®è¿ããã®ãè¦ã¤ããã®ã«ãããæéã測å®ãããã¯ã¨ãªã®é·ãã¯4ãã64ã¾ã§4å»ã¿ã§å¤åãããã
çµæã¯ä»¥ä¸ã®å³ã®ããã«ãªããã¯ã¨ãªé·ãããç¨åº¦é·ãã¨ãã«ã¯ããã並ååã®å¹æãé«ããªããã¨ãåããã
ãã®ä»
ã¾ããlongã2åå©ç¨ãã¦128ãããã¾ã§å¯¾å¿ãããã®ã以ä¸ã«ç¤ºããã¾ããå¤åé·æ¼ç®ãå®è£ ããã¯ã©ã¹ãä½æããä»»æã®é·ãã®æååãæ±ããããã°ã©ã ãä½æãããããªããã£ãåãå©ç¨ãããã®ã«æ¯ã¹10åç¨åº¦é ããç¾å¨é度ãæ¹åã§ããªããã¨èãã¦ããã¨ããã§ããã
void createPM(String query, long PMl[] , long PMh[]) { for (int i = 0; i < query.length(); i++) { char c = query.charAt(i); if(i < 64){ PMl[c] |= 1l << i; }else{ PMh[c] |= 1l << (i - 64); } } } int levenshteinDistance128(int m, long PMl[] , long PMh[], String B) { if(m > 128)return -1; int D = m; long D0, HP, HN, VP, VN; long D0_h, HP_h, HN_h, VP_h, VN_h; long top = 1l << (m - 1); if(m > 64){ top = 1l << (m - 65); } VP = ~0; VN = 0; VP_h = ~0; VN_h = 0; for (char c : B.toCharArray()) { long PM = PMl[c]; long PM_h = PMh[c]; long x = (PM & VP); //check x + VP is overflow //(cf. Hacker's Delight 2-12) long carry = ((x & VP) | ((x | VP) & ~(x + VP))) >>> 63; D0 = ((x + VP) ^ VP) | PM | VN; D0_h = (((PM_h & VP_h) + VP_h + carry) ^ VP_h) | PM_h | VN_h; HP = VN | ~(D0 | VP); HP_h = VN_h | ~(D0_h | VP_h); HN = D0 & VP; HN_h = D0_h & VP_h; if(m <= 64){ if ((HP & top) != 0)++D; else if ((HN & top) != 0)--D; }else{ if ((HP_h & top) != 0)++D; else if ((HN_h & top) != 0)--D; } long H_h = (HP_h << 1) | (HP >>> 63); long H2_h = (HN_h << 1) | (HN >>> 63); VP = (HN << 1) | ~(D0 | ((HP << 1) | 1)); VP_h = H2_h | ~(D0_h | H_h); VN = D0 & ((HP << 1) | 1); VN_h = D0_h & H_h; } return D; }
åèæç®
[1] G. Myers: A fast bit-vector algorithm for approximate string matching based on dynamic progamming. Journal of the ACM, 46(3): 395-415, 1999.
[2] Heikki Hyyrö: A Bit-Vector Algorithm for Computing Levenshtein and Damerau Edit Distances, In the proceedings of the Prague Stringology Conference 2002 (PSC 2002)
[3] Heikki Hyyrö: Extending and Explaining the Bit-Parallel Approximate String Matching Algorithm of Myers, Technical report A2001-10 of the Department of Computer and Information Sciences, University of Tampere.
[4] http://www.cs.uta.fi/~helmu/pubs/pubs.html