é空æ庫ã®æ¬ãè²·ã£ãã
ã§ãHyper Estraier ã§ä»é²ã® DVD-ROM ã«åé²ããã¦ããä½å群ã®ã¤ã³ããã¯ã¹ãä½ã£ã¦ã¿ããã¨ã«ã
æ®éã« estcmd gather ã§ãã£ã¬ã¯ããªãæå®ããã ãã§ã¯è¸ãç¡ãã®ã§ãã¹ããèè
ã¨é¡åãæ½åºãã¦å±æ§ã¨ãã¦ç»é²ããããã®ï¼åã« TSV ãåºåããã ãã®åç´ãªï¼ã¹ã¯ãªãããæ¸ãã¦ã¿ãã
ã¾ã㯠PHP ã§ã
<?php /** * è¨å® */ //define('AOZR_DIR', '/Volumes/é空æ庫/ä½å®¶å¥ããã¹ããã¡ã¤ã«'); define('AOZR_DIR', dirname(__FILE__) . '/archives'); define('AOZR_RE1', '^[\\x{3040}-\\x{309F}]+?ã»|ï¼\\d+ï¼$'); define('AOZR_RE2','\\.txt$'); define('NFDKANA_RE', '/([ãã-ãã-ãã-ã¨ã¯-ã»ã¦ã«-ã³ãµ-ã½ã¿-ãã-ããã½])\\x{3099}|([ã¯-ã»ã-ã])\\x{309A}/u'); define('EST_DB', 'casket'); define('EST_OPT', '-ft -ic Shift_JIS -il ja -pc UTF-8 -sd -cm'); /** * ãã£ã¬ã¯ããªãèµ°æ»ãããã¹ã»èè ã»ä½ååãTSVã§æ¨æºåºåã«æ¸ãåºã */ $dir = new RecursiveDirectoryIterator(AOZR_DIR); foreach ($dir as $author) { $name = $author->getFilename(); if (!$author->isDot() && $author->isDir() && $author->hasChildren()) { $name = mb_ereg_replace(AOZR_RE1, '', NFD2NFC($name)); foreach ($author->getChildren() as $work) { $title = $work->getFilename(); if (!$work->isDot() && $work->isFile() && $title != '.DS_Store') { $title = mb_ereg_replace(AOZR_RE2, '', NFD2NFC($title)); fwrite(STDOUT, sprintf("%s\t%s\t%s\n", $work->getPathname(), $name, $title)); } } } } /** * 使ãæ¹ãæ¨æºã¨ã©ã¼åºåã«æ¸ãåºã */ fwrite(STDERR, "\n"); fwrite(STDERR, "usage:\n"); fwrite(STDERR, sprintf("php %s 2>/dev/null | estcmd gather %s -px @author -px @title %s -\n", __FILE__, EST_OPT, EST_DB)); fwrite(STDERR, sprintf("estcmd extkeys -kn 32 -um %s\n", EST_DB)); fwrite(STDERR, sprintf("estcmd optimize %s\n", EST_DB)); /** * Apple HFS+ ã®ä»æ§ã«ããNFDã§æ£è¦åããã¦ããã²ãããªã»ã«ã¿ã«ããNFCã§åæ£è¦åããé¢æ° * 欧æã®æ£è¦åã«ã¯éå¯¾å¿ */ function NFD2NFC($str) { return preg_replace_callback(NFDKANA_RE, 'NFD2NFC_cb', $str); } /** * NFD2NFC() ããå¼ã³åºãããã³ã¼ã«ããã¯é¢æ° */ function NFD2NFC_cb($m) { if ($m[1]) { $C = unpack('C*', $m[1]); $C[3] += 1; } else { $C = unpack('C*', $m[2]); $C[3] += 2; } return pack('C*', $C[1], $C[2], $C[3]); } ?>
ãã®ããã« SPL ã® (Recursive)DirectoryIterator ã使ãã¨ãã£ã¬ã¯ããªã®èµ°æ»ã»ãã¡ã¤ã«ã®å¤å¥ãã¹ãã¼ãã«æ¸ãã¦ããæãã
ãã¨å°ãå¤ãã£ãç¹ã¨ããã°ãHFS+ (MacOS ã®ãã¡ã¤ã«ã·ã¹ãã ) ã®ãã¹ã¯ Unicode ã® NFD ã¨ããæ¹å¼ã§æ£è¦åããã¦ããããã®ã¾ã¾ã ã¨èè
ãé¡åã§æ¤ç´¢ããã¨ãã¡ãã£ã¨å°ãã®ã§ãNFC ã¨ããæ¹å¼ã§æ£è¦åãç´ãã¦ããã¨ããã
æ®å¿µãªãã PHP ã¯æ¨æºã§ Unicode æ£è¦åãã§ãããã©ã¤ãã©ãªãè¦ã¤ãããã¨ãã§ããªãã£ãã®ã§*1ãæ£è¦åã®å¯¾è±¡ãã²ãããªã»ã«ã¿ã«ãã«çµã£ã¦æ£è¦è¡¨ç¾ã¨ã³ã¼ã«ããã¯é¢æ°ã使ã£ã¦å¦çãï¼ã¨ãããããã®é¨åã®ã³ã¼ãã¯å¥ã®ã¨ããããæµç¨ï¼
欧æã®ãã¤ã¢ã¯ãªãã£ã«ã«ãã¼ã¯ä»ãã®æåã絡ãã¨ã²ã©ãé¢åã ãã©ãä»åã¯ä¸è¦ãªã®ã§ã¹ã«ã¼ãããã ICU ã使ã PHP6(CVS) ã«ã Unicode æ£è¦åé¢æ°ãåå¨ããªãã£ã¦ã®ã¯ã©ããªã®ãã
ããã«åããã®ãå¦ç¿ä¸ã® Python ã§æ¸ãç´ãã¦ã¿ãã
import os import re import sys from unicodedata import normalize join = os.path.join echo1 = sys.stdout.write echo2 = sys.stderr.write basepath = unicode(join(os.getcwd(), 'archives')) est_db = 'casket' est_opt = '-ft -ic Shift_JIS -il ja -pc UTF-8 -sd -cm' authors = os.listdir(basepath) aozr_re1 = re.compile(u'^[\u3040-\u309F]+?ã»|ï¼\\d+ï¼$') aozr_re2 = re.compile(u'\\.txt$') for author in authors: name = aozr_re1.sub('', normalize('NFC', author)) workspath = join(basepath, author) works = os.listdir(workspath) for work in works: # æ¡å¼µå .txt ãä»ãã¦ããªããã®ãè¥å¹²ããã®ã§ #if aozr_re2.search(work) == None: # continue if work == u'.DS_Store': continue title = aozr_re2.sub('', normalize('NFC', work)) workpath = join(workspath, work) echo1(u'%(path)s\t%(author)s\t%(title)s\n' % \ {'path': workpath, 'author': name, 'title': title}) echo2('\n') echo2('usage:\n') echo2('python %(self)s 2>/dev/null | estcmd gather %(opt)s -px @author -px @title %(db)s -\n' % \ {'self': sys.argv[0], 'opt': est_opt, 'db': est_db}) echo2('estcmd extkeys -kn 32 -um %(db)s\n' % {'db': est_db}) echo2('estcmd optimize %(db)s\n' % {'db': est_db})
ã¡ãã£ã¨ç解ãæ·±ã¾ã£ãæ°ãããã移æ¤ã¨ããã®ã¯ç®çãã¯ã£ãããã¦ããã ãã«æ°ããè¨èªãè¦ããéã®ãã¬ã¼ãã³ã°ã«åãã¦ããããããªããã¨æã£ãã
ã¨ãã㧠Python ã«ã¯ PHP ã§ããã¨ããã®å®æ°ã«ç¸å½ãããã®ãç¡ãã£ã½ããï¼ ã«ã¯ä¸æ¸ãä¸å¯ã®ã¡ã³ãå¤æ°ã¨ãã¦å®æ°ãå®è£
ããæ¹æ³ãè¼ã£ã¦ããï¼
ä»ã«ããã¸ãã¯å®æ° __FILE__ ã __LINE__ ã«ç¸å½ãããã®ãããã®ããªãã®ããåããããææ¢ãç¶æ
ã