OpenNLP Toolsã«ããè±èªã®è§£æ: æ§æè§£æç·¨
以前のエントリã§ãJavaã®èªç¶è¨èªå¦çã©ã¤ãã©ãªOpenNLP Toolsã使ã£ãè±èªã®åè©è§£æï¼POS taggingï¼ã«ã¤ãã¦æ¸ãããã§ãããæ§æè§£æã«ã¤ãã¦ãæ¹æ³ãç¥ãããã£ã¦ããæ¹ãå¤ããããªã®ã§ãæé ãç°¡åã«èª¬æãã¾ãã
1. OpenNLP Toolsã®ãã¦ã³ãã¼ã
以ä¸ã®OpenNLPã®ãµã¤ãããOpenNLP Toolsããã¦ã³ãã¼ããã¾ãã
2. ã¢ãã«ãã¡ã¤ã«ã®ãã¦ã³ãã¼ã
http://opennlp.sourceforge.net/models/english/parser/
ä¸è¨ãã¼ã¸å ã®å ¨ã¦ã®ãã¡ã¤ã«ããã¦ã³ãã¼ããã¦ã1ã¤ã®ãã£ã¬ã¯ããªã«ä¿åãã¾ãã
3. opennlp.tools.lang.english.TreebankParserã®ä½¿ç¨
以ä¸ããTreebankParserã使ã£ã¦"I am a student."ãæ§æè§£æãããµã³ãã«ã§ãã1ã§ãã¦ã³ãã¼ãããOpenNLP Toolsä¸ã®jarãã¡ã¤ã«ã¨srcãã£ã¬ã¯ããªä»¥ä¸ãã¯ã©ã¹ãã¹ã«è¿½å ããå¿ è¦ãããã¾ããã¾ãã2ã®ãã¡ã¤ã«ãC:\opennlpã«ä¿åãã¦ãããã®ã¨ãã¾ãã
import opennlp.tools.lang.english.TreebankParser; import opennlp.tools.parser.Parse; import opennlp.tools.parser.ParserME; import opennlp.tools.util.Span; public class Test { public static void main(String[] args) throws Exception { ParserME parser = TreebankParser.getParser("C:\\opennlp", true, false, ParserME.defaultBeamSize, ParserME.defaultAdvancePercentage); String input = "I am a student."; Parse[] parses = TreebankParser.parseLine(input, parser, 1); for (Parse parse : parses) { output(parse); } } private static void output(Parse parse) { if (parse.getChildCount() == 0) { Span span = parse.getSpan(); String word = parse.getText().substring( span.getStart(), span.getEnd()); System.out.print(word); } else { System.out.print("(" + parse.getType() + " "); for (Parse child : parse.getChildren()) { output(child); } System.out.print(")"); } } }
ããã°ã©ã ãå®è¡ããã¨ã以ä¸ã®åºåãå¾ããã¾ãã
(TOP (S (NP (PRP I))(VP (VBP am)(NP (DT a)(NN student.)))))
ãããã"I am a student."ãè§£æããçµæã®æ§ææ¨ã«ãªãã¾ããNP, PRP, VPã¨ãã£ãã©ãã«ã®æå³ã¯品詞解析ã®ã¨ãã¨åãã§ãPenn Treebankæºæ ã§ãã