public static void DemoAPI(LexicalizedParser lp) { // This option shows parsing a list of correctly tokenized words var sent = new[] { "This", "is", "an", "easy", "sentence", "." }; java.util.List rawWords = Sentence.toCoreLabelList(sent); Tree parse = lp.apply(rawWords); parse.pennPrint(); // This option shows loading and using an explicit tokenizer const string Sent2 = "This is another sentence."; TokenizerFactory tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new StringReader(Sent2); java.util.List rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); parse = lp.apply(rawWords2); var tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); java.util.List tdl = gs.typedDependenciesCCprocessed(); Console.WriteLine("\n{0}\n", tdl); var tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(parse); }
public ParsedStatementFactory.ParseResult ParseStatement(string input) { var sent2Reader = new StringReader(input); var rawWords2 = _tokenizerFactory.getTokenizer(sent2Reader).tokenize(); var parse = _lp.apply(rawWords2); var gs = _structureFactory.newGrammaticalStructure(parse); var tdl = gs.typedDependenciesCCprocessed(); //System.Console.WriteLine("newGrammaticalStructure:\n{0}\n", gs); //System.Console.WriteLine("typedDependenciesCCprocessed:\n{0}\n", tdl); //var tp = new TreePrint("penn,typedDependenciesCollapsed"); //tp.printTree(parse); //return new ParsedStatement(parse); var xmlTreePrint = new TreePrint("xmlTree, dependencies", "xml, collapsedDependencies", _tlp); var stream = new ByteArrayOutputStream(); xmlTreePrint.printTree(parse, new PrintWriter(stream)); string xmlOutput = stream.toString() + "</s>"; //System.Console.WriteLine("xml:\n{0}\n", xmlOutput); return(ParsedStatementFactory.CreateParsedStatement(xmlOutput)); //System.Console.WriteLine("TreePrint: \n{0}\n", parse); }
public ParsedStatement ParseSentence(string input) { var sent2Reader = new StringReader(input); var rawWords2 = _tokenizerFactory.getTokenizer(sent2Reader).tokenize(); var parse = _lp.apply(rawWords2); var gs = _structureFactory.newGrammaticalStructure(parse); var tdl = gs.typedDependenciesCCprocessed(); System.Console.WriteLine("\n{0}\n", tdl); var tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(parse); return(new ParsedStatement(parse)); //System.Console.WriteLine("TreePrint: \n{0}\n", parse); }
public static void DemoDP(LexicalizedParser lp, string fileName) { // This option shows loading and sentence-segment and tokenizing // a file using DocumentPreprocessor var tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); // You could also create a tokenizer here (as below) and pass it // to DocumentPreprocessor foreach (List sentence in new DocumentPreprocessor(fileName)) { Tree parse = lp.apply(sentence); parse.pennPrint(); GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); java.util.List tdl = gs.typedDependenciesCCprocessed(true); Console.WriteLine("\n{0}\n", tdl); } }
public List <TargetCandidate> GetAllNounPhrases(string[] sentence, string[] target) { var tree = lexParser.apply(SentenceUtils.toCoreLabelList(sentence)); var dependencies = grammaticalStructureFactory.newGrammaticalStructure(tree).typedDependenciesCCprocessed(); List <TargetCandidate> nounPhrases = new List <TargetCandidate>(); var subTrees = tree.subTreeList(); for (int i = 0; i < subTrees.size(); i++) { Tree subTree = (Tree)subTrees.get(i); if (subTree.label().value() == "NP") { NounPhrase phrase = NounPhrase.SetSentence(sentence, tree, dependencies, target); phrase.SetPhrase(SentenceUtils.listToString(subTree.yield())); nounPhrases.Add(new TargetCandidate(phrase, caching)); } } return(nounPhrases); }