public static void DemoAPI(LexicalizedParser lp) { // This option shows parsing a list of correctly tokenized words var sent = new[] { "This", "is", "an", "easy", "sentence", "." }; java.util.List rawWords = Sentence.toCoreLabelList(sent); Tree parse = lp.apply(rawWords); parse.pennPrint(); // This option shows loading and using an explicit tokenizer const string Sent2 = "This is another sentence."; TokenizerFactory tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new StringReader(Sent2); java.util.List rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); parse = lp.apply(rawWords2); var tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); java.util.List tdl = gs.typedDependenciesCCprocessed(); Console.WriteLine("\n{0}\n", tdl); var tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(parse); }
public void ParseEasySentence() { // This option shows parsing a list of correctly tokenized words var sent = new[] { "This", "is", "an", "easy", "sentence", "." }; var rawWords = SentenceUtils.toCoreLabelList(sent); var parse = _lp.apply(rawWords); Assert.NotNull(parse); parse.pennPrint(); // This option shows loading and using an explicit tokenizer var sent2 = "This is another sentence."; var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); using var sent2Reader = new StringReader(sent2); var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); parse = _lp.apply(rawWords2); Assert.NotNull(parse); var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(parse); var tdl = gs.typedDependenciesCCprocessed(); TestContext.Out.WriteLine($"\n{tdl}\n"); var tp = new TreePrint("penn,typedDependenciesCollapsed"); Assert.NotNull(tp); tp.printTree(parse); }
public ParsedStatementFactory.ParseResult ParseStatement(string input) { var sent2Reader = new StringReader(input); var rawWords2 = _tokenizerFactory.getTokenizer(sent2Reader).tokenize(); var parse = _lp.apply(rawWords2); var gs = _structureFactory.newGrammaticalStructure(parse); var tdl = gs.typedDependenciesCCprocessed(); //System.Console.WriteLine("newGrammaticalStructure:\n{0}\n", gs); //System.Console.WriteLine("typedDependenciesCCprocessed:\n{0}\n", tdl); //var tp = new TreePrint("penn,typedDependenciesCollapsed"); //tp.printTree(parse); //return new ParsedStatement(parse); var xmlTreePrint = new TreePrint("xmlTree, dependencies", "xml, collapsedDependencies", _tlp); var stream = new ByteArrayOutputStream(); xmlTreePrint.printTree(parse, new PrintWriter(stream)); string xmlOutput = stream.toString() + "</s>"; //System.Console.WriteLine("xml:\n{0}\n", xmlOutput); return(ParsedStatementFactory.CreateParsedStatement(xmlOutput)); //System.Console.WriteLine("TreePrint: \n{0}\n", parse); }
public static void DemoAPI(LexicalizedParser lp) { // This option shows parsing a list of correctly tokenized words var sent = new[] { "This", "is", "an", "easy", "sentence", "." }; var rawWords = Sentence.toCoreLabelList(sent); var parse = lp.apply(rawWords); parse.pennPrint(); // This option shows loading and using an explicit tokenizer const string Sent2 = "This is another sentence."; var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new StringReader(Sent2); var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); parse = lp.apply(rawWords2); var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(parse); var tdl = gs.typedDependenciesCCprocessed(); System.Console.WriteLine("\n{0}\n", tdl); var tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(parse); }
public ParsedStatement ParseSentence(string input) { var sent2Reader = new StringReader(input); var rawWords2 = _tokenizerFactory.getTokenizer(sent2Reader).tokenize(); var parse = _lp.apply(rawWords2); var gs = _structureFactory.newGrammaticalStructure(parse); var tdl = gs.typedDependenciesCCprocessed(); System.Console.WriteLine("\n{0}\n", tdl); var tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(parse); return(new ParsedStatement(parse)); //System.Console.WriteLine("TreePrint: \n{0}\n", parse); }
public static void DemoDP(LexicalizedParser lp, string fileName) { // This option shows loading and sentence-segment and tokenizing // a file using DocumentPreprocessor var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); // You could also create a tokenizer here (as below) and pass it // to DocumentPreprocessor foreach (List sentence in new DocumentPreprocessor(fileName)) { var parse = lp.apply(sentence); parse.pennPrint(); var gs = gsf.newGrammaticalStructure(parse); var tdl = gs.typedDependenciesCCprocessed(true); System.Console.WriteLine("\n{0}\n", tdl); } }
public static void DemoDP(LexicalizedParser lp, string fileName) { // This option shows loading and sentence-segment and tokenizing // a file using DocumentPreprocessor var tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); // You could also create a tokenizer here (as below) and pass it // to DocumentPreprocessor foreach (List sentence in new DocumentPreprocessor(fileName)) { Tree parse = lp.apply(sentence); parse.pennPrint(); GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); java.util.List tdl = gs.typedDependenciesCCprocessed(true); Console.WriteLine("\n{0}\n", tdl); } }
public static List <string> ExtractNounsFromSemantics(string sentence) { string assemblyPath = Assembly.GetExecutingAssembly().GetName().CodeBase; string projectPath = Directory.GetParent(new Uri(Path.GetDirectoryName(Path.GetDirectoryName(Path.GetDirectoryName(assemblyPath)))).LocalPath).FullName; string modelsDirectory = Path.GetFullPath(projectPath + @"\Parser\CoreNLP-3.9.1-Models\edu\stanford\nlp\models"); // Loading english PCFG parser from file LexicalizedParser lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz"); // This shows loading and using an explicit tokenizer var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new java.io.StringReader(sentence); var rawWords = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); sent2Reader.close(); var tree = lp.apply(rawWords); return(tree.toArray().Cast <LabeledScoredTreeNode>().Where(n => n.isLeaf() && nounLabels.Contains(n.parent(tree).label().value())).Select(n => n.label().ToString()).ToList()); }
public List <TargetCandidate> GetAllNounPhrases(string[] sentence, string[] target) { var tree = lexParser.apply(SentenceUtils.toCoreLabelList(sentence)); var dependencies = grammaticalStructureFactory.newGrammaticalStructure(tree).typedDependenciesCCprocessed(); List <TargetCandidate> nounPhrases = new List <TargetCandidate>(); var subTrees = tree.subTreeList(); for (int i = 0; i < subTrees.size(); i++) { Tree subTree = (Tree)subTrees.get(i); if (subTree.label().value() == "NP") { NounPhrase phrase = NounPhrase.SetSentence(sentence, tree, dependencies, target); phrase.SetPhrase(SentenceUtils.listToString(subTree.yield())); nounPhrases.Add(new TargetCandidate(phrase, caching)); } } return(nounPhrases); }
//use Stanford.NLP.Net to parse the sentence Tree Parse(string sent) { var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sentReader = new java.io.StringReader(sent); var rawWords = tokenizerFactory.getTokenizer(sentReader).tokenize(); sentReader.close(); var tree = lp.apply(rawWords); // Extract dependencies from lexical tree var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(tree); var tdl = gs.typedDependenciesCCprocessed(); // Extract collapsed dependencies from parsed tree //var tp = new TreePrint("penn,typedDependenciesCollapsed"); var tp = new TreePrint("penn"); tp.printTree(tree); return(tree); }