è½åå¦ç¿ã·ã¹ãã DUALISTãæ¥æ¬èªããã¹ãã«é©ç¨ãã
æ©æ¢°å¦ç¿ææ³ã«åºã¥ãããã¹ãåé¡ã¯ååãªå¦ç¿ãã¼ã¿ãããã°é«ã精度ãæå¾ ã§ãã¾ãããåé¡ã©ãã«ã人æã§ã¤ãã使¥ã«æéããããã¾ããããã§ãå¹ççã«åé¡å¨ãå¦ç¿ãããææ³ã¨ãã¦ã广çãªåé¡å¯¾è±¡ãåªå çã«ã©ãã«ä»ããããè½åå¦ç¿(active learning)ã¨ããã¢ããã¼ããããã¾ãã
DUALISTã¯ãã¢ããã¼ã¿ã«å¯¾è±¡ã®ã©ãã«ä»ãã¨åæã«ãç´ æ§ã§ãããã¼ã¯ã¼ããé©åãã©ããã®å¤å®ãå§ããè½åå¦ç¿ã·ã¹ãã ã§ã7æã«éå¬ãããEMNLP 2011ã«æ¡æãããè«æã§ææ¡ããã¦ãããå®è£
ãå
¬éããã¦ãã¾ãã
Google Code Archive - Long-term storage for Google Code Project Hosting.
DUALISTã®ã¤ã³ã¹ãã¼ã«ã¨å®è¡ã¯ç°¡åã§ããã·ã¹ãã ã¯Javaã§å®è£ ããã¦ãã¦ãæ©æ¢°å¦ç¿ããã±ã¼ã¸ã®MALLETã忢±ããã¦ãã¾ããä»ã«ãWebãã¬ã¼ã ã¯ã¼ã¯ã®Play!ãã¤ã³ã¹ãã¼ã«ããå¿ è¦ãããã¾ãããã®ä¸ã§ãDUALISTãå±éãããã£ã¬ã¯ããªå ã§play runãå®è¡ããã¨ããã©ã¦ã¶ã§http://localhost:9000/ã«ã¢ã¯ã»ã¹ãã¦ã·ã¹ãã ã試ããã¨ãã§ãã¾ãã
ãããæ®å¿µãªããããã®ã¾ã¾ã§ã¯æ¥æ¬èªã®ããã¹ããæ±ããã¨ãã§ãã¾ãããããã§ãMeCabã®Javaãã¤ã³ãã£ã³ã°ãå©ç¨ãã¦DUALISTãæ¥æ¬èªããã¹ãã«é©ç¨ãããã¨ã«ãã¾ãã
ã¾ããMeCabã使ã£ã¦åè©ã¨æªç¥èªãæ½åºããMALLETã®Pipeãå®ç¾©ãã¾ãã
package dualist.ja; import cc.mallet.pipe.Pipe; import cc.mallet.extract.StringSpan; import cc.mallet.extract.StringTokenization; import cc.mallet.types.Instance; import cc.mallet.types.TokenSequence; import org.chasen.mecab.Tagger; import org.chasen.mecab.Node; public class SimpleMecabPipe extends Pipe { static { try { System.loadLibrary("mecab-java"); } catch (UnsatisfiedLinkError e) { System.err.println("ERROR: Failed to load mecab-java native code."); System.err.println(e); System.exit(1); } } public Instance pipe (Instance carrier) { CharSequence input = (CharSequence) carrier.getData(); String string = input.toString(); Tagger tagger = new Tagger(); Node node = tagger.parseToNode(string); int cursor = 0; TokenSequence ts = new StringTokenization(input); while (node != null) { node = node.getNext(); if (node == null) break; String[] f = node.getFeature().split(","); if (f[0].equals("åè©") && !f[1].equals("æ°") && !f[1].equals("ãµå¤æ¥ç¶") && !f[1].equals("æ¥å°¾") || f[0].equals("æªç¥èª")) { String surface = node.getSurface(); cursor = string.indexOf(surface, cursor); ts.add (new StringSpan(input, cursor, cursor + surface.length())); } } carrier.setData(ts); return carrier; } }
次ã«ãdualist/app/guts/pipes/DocumentPipe.javaã§SerialPipesãçæãã弿°ã®ãã¡ãCharSequence2TokenSequenceã®ã¨ããããå ã»ã©ã®SimpleMecabPipeã«ç½®ãæãã¾ãããªããPlay!ãã¬ã¼ã ã¯ã¼ã¯ã¯Javaãã¼ã¹ã§ãããªãããã¹ã¯ãªããè¨èªããããã³ã³ãã¤ã«ããã«å®è¡ã§ããåªããã®ã§ãããJNIã«å¿ è¦ãªãã¤ãã£ãã©ã¤ãã©ãªã®èªã¿è¾¼ã¿ãã§ããªãããã§ããããã§ãSimpleMecabPipeã¯ã©ã¹ã¯ãããããã³ã³ãã¤ã«ãã¦ãããdualist/lib/dualist-ja.jarã«æ ¼ç´ãã¦åç §ãããã¨ã«ãã¾ããã¾ããMeCabã®JNIç¨jarãã¡ã¤ã«ãåããã£ã¬ã¯ããªã«ç½®ãã¾ãã
ãã¦ãåä½ç¢ºèªã§ãããJNIç¨ãã¤ãã£ãã©ã¤ãã©ãªã®ãã¹ãæå®ãã¦playã³ãã³ããå®è¡ãã¾ãããã©ã¦ã¶ã§http://localhost:9000/ã«ã¢ã¯ã»ã¹ãã¦ãæ¥æ¬èªã®ãã¼ã¿ãµã³ãã«ãèªã¿è¾¼ãã°ãç»é¢å³å´ã®ç´ æ§ãªã¹ãã«æ¥æ¬èªã®åèªã表示ãããã¯ãã§ãã
以ä¸ã®å 容ãMacPortsã®å½¢ã«ã¾ã¨ãã¦ç¾å¨ç»é²ç³è«ä¸ã§ãããã®portã使ãã¨ãsudo port install dualist +mecabã§ã¤ã³ã¹ãã¼ã«ãã§ãã¦ãdualist-mecabãå®è¡ãããã¨ã§ãæ¥æ¬èªããã¹ãã対象ã¨ããã·ã¹ãã ãå©ç¨ã§ãã¾ãã