PHP ãã CaboCha ãå©ç¨ã§ããããã«ãæ¡å¼µã¢ã¸ã¥ã¼ã«ãä½æãã¦ãã¾ãã
github.com
ä¸è¬ç㪠PHP æ¡å¼µã¢ã¸ã¥ã¼ã«ã¨åæ§ã«ã次ã®æé ã§ã¤ã³ã¹ãã¼ã«ã§ãã¾ãããã ããç¾æç¹ã§ã¯ PHP 7 å°ç¨ã§ãã
$ git clone https://github.com/y-uti/php-cabocha.git
$ cd php-cabocha
$ phpize
$ ./configure
$ make
$ sudo make install
以ä¸ã®ããã«ãã¦å ¥åæã解æã§ãã¾ãã
<?php $cabocha = cabocha_new(); $tree = cabocha_parse($cabocha, 'å¾è¼©ã¯ããã§å§ãã¦äººéã¨ãããã®ãè¦ãã');
cabocha_parse é¢æ°ãå®è¡ãã¦å¾ããã $tree ã¯ä»¥ä¸ã®ãã¼ãæã¤é åã§ã*1ã主è¦ãªæ å ±ã¯ $tree['chunk'] 㨠$tree['token'] ã«æ ¼ç´ããã¾ãã
ãã¼ | å¤ã®å | å¤ã®æå³ |
---|---|---|
sentence | string | å ¥åæ |
chunk | array | æç¯ |
token | array | å½¢æ ç´ |
charset | int | ãã¤ããªè¾æ¸ã®æåã³ã¼ã |
posset | int | å½¢æ ç´ è§£æå¨ã®åè©ä½ç³» |
output_layer | int | åºåã®ã¬ã¤ã¤ |
$tree['chunk'] ã®åè¦ç´ ã«ã¯ãæç¯ã®æ å ±ãæ ¼ç´ããã¾ã*2ã以ä¸ã®ãããªæ å ±ãå«ã¾ãã¾ãã
<?php $chunk0 = $tree['chunk'][0]; // å é ã®æç¯ãåå¾ echo $chunk0['token_pos']; // ãã®æç¯ã«å±ããå½¢æ ç´ ã®éå§ä½ç½® (0) echo $chunk0['token_size']; // ãã®æç¯ã«å±ããå½¢æ ç´ ã®è¦ç´ æ° (2) echo $chunk0['link']; // ãã®æç¯ã®ä¿ãå ã®æç¯çªå· (5)
ä¸æ¹ã$tree['token'] ã®åè¦ç´ ã«ã¯ãå½¢æ ç´ ã®æ å ±ãæ ¼ç´ããã¾ã*3ã以ä¸ã®ãããªæ å ±ãå«ã¾ãã¾ã*4ã
<?php $token0 = $tree['token'][0]; // å é ã®å½¢æ ç´ ãåå¾ echo $token0['surface']; // 表層形 ("å¾è¼©") echo $token0['feature']; // ç´ æ§ ("åè©,代åè©,ä¸è¬,*,*,*,å¾è¼©,ã¯ã¬ãã¤,ã¯ã¬ãã¤") echo $token0['ne']; // åºæ表ç¾è§£æã®çµæ ("O")
ãã¨ãã°æ¬¡ã®ãããªã³ã¼ãã§ãæç¯éã®ä¿ãåãã®é¢ä¿ãåãåºãã¾ãã
<?php function surface($tree, $chunk) { $tokens = array_slice($tree['token'], $chunk['token_pos'], $chunk['token_size']); return implode('', array_map(function ($t) { return $t['surface']; }, $tokens)); } foreach ($tree['chunk'] as $i => $chunk) { $src = surface($tree, $chunk); $link = $chunk['link']; if ($link !== -1) { $dst = surface($tree, $tree['chunk'][$link]); echo "$i: $src -> $dst ($link)\n"; } else { echo "$i: $src\n"; } }
å®è¡çµæã¯ä»¥ä¸ã®ã¨ããã§ãã
0: å¾è¼©ã¯ -> è¦ãã (5) 1: ãã㧠-> å§ã㦠(2) 2: å§ã㦠-> 人éã¨ãã (3) 3: 人éã¨ãã -> ãã®ã (4) 4: ãã®ã -> è¦ãã (5) 5: è¦ãã
*1:$tree 㯠CaboCha ã® cabocha_sparse_totree é¢æ°ããå¾ããã cabocha_tree_t æ§é ä½ã PHP ã®é åã«è©°ãæ¿ãããã®ã§ãããã¼ã¿æ§é ã®è©³ç´°ã¯ CaboCha ã®ã½ã¼ã¹ã³ã¼ããåç §ãã¦ãã ããã
*2:cabocha_chunk_t æ§é ä½ã«å¯¾å¿ãã¾ãã
*3:cabocha_token_t æ§é ä½ã«å¯¾å¿ãã¾ãã
*4:åºæ表ç¾è§£æã®çµæ㯠IOB2 å½¢å¼ã§æ ¼ç´ããã¾ããCaboCha ã®ã¦ã§ããµã¤ãã«èª¬æãããã¾ãã®ã§ããã¡ããåç §ãã¦ãã ããã