Pythonã®å é¨å¦çã§ãä¸ããããUTF-8ã®æååãASCIIæåã®ã¿ãå«ããããã¹ãããã³ã¼ããæ¸ãã¦ãã¾ãã
size_tåã8ãã¤ãã®æããã¤ã³ã¿ã char*
ãã size_t*
ã«ãã£ã¹ããã¦ã0x8080808080808080ullã¨è«çç©ãåãã°8ãã¤ããä¸åº¦ã«å¦çã§ãã¾ãã
åç´åããã¨æ¬¡ã®ãããªã³ã¼ãã«ãªãã¾ãã
#define ASCII_MASK 0x8080808080808080ull size_t find_first_nonascii(const char *start, const char *end) { const char *p = start; // 8ãã¤ããã¤å¦ç while (p <= end - 8) { if (*(size_t*)p & ASCII_MASK) { break; } p += 8; } // 1ãã¤ããã¤å¦ç while (p < end) { if (*p & 0x80) { return p-start; } p++; } return end-start; }
1ãã¤ããã¤å¦çããé¨åãæ°ã«ãªãã¾ããããã¯æä¸ä½ããããã1ã«ãªã£ã¦ããããããæ¢ãã¦ä½ç½®ãè¿ãã¦ãããGCC/clangã®ãã«ãã¤ã³é¢æ° __builtin_ctzll
ã使ãã°ãååé¨åããã®ããã«é«éåã§ãã¾ãã
while (p <= end - 8) { size_t x = *(size_t*)p & ASCII_MASK; if (x) { // p[0] ãASCIIå¤ã®æã ctzll(0x80) == 7, (7-7)/8 == 0 // p[1] ãASCIIå¤ã®æã ctzll(0x8000) == 15, (15-7)/8 == 1 return p-start + (__builtin_ctzll(x) - 7) / 8; } p += 8; }
ãããåé¡ã¯å¾åé¨åã§ãã 8ãã¤ãæªæºã®ç«¯æ°é¨åãsize_tã«æ ¼ç´ã§ããã°åãé«éåãã§ãã¾ãããã¨ãã°æ¬¡ã®ããã«ãªãã¾ãã
// (end-p)ã3ã®ã¨ãã 3*8=24, 1ull << 24 = 0x1000000, 0x1000000-1 = 0xffffff. // ããã¨maskãã¦ããã°ã4ãã¤ã以éã¯ç¡è¦ã§ããã size_t x = *(size_t*)p & ((1ull << (end - p) * 8) - 1) & ASCII_MASK; if (x) { return p - start + (__builtin_ctzll(x) - 7) / 8; } return end - start;
ããã§ãã¤ãæ°*2åã ã£ãåå²ã1åã«æ¸ããã¦ãã§ãããã§ããããããªã®ã§ãããæ®å¿µãªããå ¥åã®æååã®ç¯å²å¤ãèªãã§ãããã¹ã¯ãã¦ããã ããªã®ã§ãç¯å²å¤ã¢ã¯ã»ã¹ãçºçãã¦ãã¾ãã 試ãã« clang ã® AddressSanitizer ã使ã£ã¦ã¿ãã¨ã¨ã©ã¼ãåºã¦ãã¾ãã¾ããã
ã¢ã©ã¤ã¡ã³ãããåã£ã¦ããã°ã8ãã¤ãã®readãã»ã°ã¡ã³ãã¼ã·ã§ã³éåãèµ·ãããã¨ã¯ç¡ãã¨æãã¾ãããç¯å²å¤ã¢ã¯ã»ã¹ã¯ãã¯ãé¿ãããã§ãã ããã§æ¬¡ã®ããã« switch æãæ¸ãã¦ã¿ã¾ããã
size_t u = (size_t)(p[0]); switch (end - p) { default: u |= (size_t)(p[7]) << 56ull; // fall through case 7: u |= (size_t)(p[6]) << 48ull; // fall through case 6: u |= (size_t)(p[5]) << 40ull; // fall through case 5: u |= (size_t)(p[4]) << 32ull; // fall through case 4: u |= (size_t)(p[3]) << 24; // fall through case 3: u |= (size_t)(p[2]) << 16; // fall through case 2: u |= (size_t)(p[1]) << 8; break; case 1: break; } if (u & ASCII_CHAR_MASK) { return p - start + (__builtin_ctzll(u & ASCII_CHAR_MASK) - 7) / 8; }
ããã§ç¯å²å¤ã¢ã¯ã»ã¹ã¯ãªããªãã¾ããã clang ãçæããã¢ã»ã³ããª(intelå½¢å¼)ã¯æ¬¡ã®ãããªæãã«ãªãã¾ãã
... u |= (size_t)(p[7]) << 56ull; 147: 44 0f b6 59 07 movzx r11d,BYTE PTR [rcx+0x7] 14c: 49 c1 e3 38 shl r11,0x38 150: 4d 09 da or r10,r11 u |= (size_t)(p[6]) << 48ull; 153: 44 0f b6 59 06 movzx r11d,BYTE PTR [rcx+0x6] 158: 49 c1 e3 30 shl r11,0x30 15c: 4d 09 da or r10,r11 u |= (size_t)(p[5]) << 40ull; 15f: 44 0f b6 59 05 movzx r11d,BYTE PTR [rcx+0x5] 164: 49 c1 e3 28 shl r11,0x28 168: 4d 09 da or r10,r11 u |= (size_t)(p[4]) << 32ull; ...
1ãã¤ããã¤ãªã¼ããã¦ãå·¦ã·ãããã¦ãORãã¦ã¾ãããswitchã®ã¸ã£ã³ããã¼ãã«ã¯ä½¿ããã¦ããã®ã§åå²ãæ¸ããç®çã¯éæã§ãã¾ããã
ããããã¢ã©ã¤ã¡ã³ããä¿è¨¼ãããã¨ã§ãç¯å²å¤ãªã¼ããå®å ¨ã«è¡ããããªæ¹æ³ãããã°ãã£ã¡ã®ã»ããããã®ã§ããã®æ¹æ³ãç¾å¨æ¢ãã¦ãã¾ãã