//use Stanford.NLP.Net to parse the sentence static Tree Parse(string sent) { // Loading english PCFG parser from file var lp = LexicalizedParser.loadModel(modelsDirectory + "\\lexparser\\englishPCFG.ser.gz"); var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sentReader = new java.io.StringReader(sent); var rawWords = tokenizerFactory.getTokenizer(sentReader).tokenize(); sentReader.close(); var tree = lp.apply(rawWords); // Extract dependencies from lexical tree var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(tree); var tdl = gs.typedDependenciesCCprocessed(); // Extract collapsed dependencies from parsed tree //var tp = new TreePrint("penn,typedDependenciesCollapsed"); var tp = new TreePrint("penn"); tp.printTree(tree); return(tree); }
static void Main() { // Path to models extracted from `stanford-parser-3.6.0-models.jar` var jarRoot = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-parser-full-2016-10-31\models\"; var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models"; // Loading english PCFG parser from file var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz"); // This sample shows parsing a list of correctly tokenized words var sent = new[] { "This", "is", "an", "easy", "sentence", "." }; var rawWords = SentenceUtils.toCoreLabelList(sent); var tree = lp.apply(rawWords); tree.pennPrint(); // This option shows loading and using an explicit tokenizer var sent2 = "This is another sentence."; var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new StringReader(sent2); var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); sent2Reader.close(); var tree2 = lp.apply(rawWords2); // Extract dependencies from lexical tree var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(tree2); var tdl = gs.typedDependenciesCCprocessed(); Console.WriteLine("\n{0}\n", tdl); // Extract collapsed dependencies from parsed tree var tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(tree2); }
Tree parseSentence(string sentence) { var sentenceReader = new StringReader(sentence); var result = _tokenizerFactory.getTokenizer(sentenceReader).tokenize(); sentenceReader.close(); var tree = _parser.parse(result); return tree; }
static void Main(string[] args) { using (TextReader reader = System.IO.File.OpenText("C:\\Data\\msr_paraphrase_train.txt")) { TextWriter writer1 = System.IO.File.CreateText("C:\\Data\\msr_paraphrase_train_s1.token"); TextWriter writer2 = System.IO.File.CreateText("C:\\Data\\msr_paraphrase_train_s2.token"); string[] inputdata = reader.ReadToEnd().Split('\n'); foreach (string line in inputdata) { string[] sp = line.Split('\t'); //writer.Write(sp[0] + '\t'); var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader1 = new java.io.StringReader(sp[3]); java.util.List rawWords1 = tokenizerFactory.getTokenizer(sent2Reader1).tokenize(); sent2Reader1.close(); var sent2Reader2 = new java.io.StringReader(sp[4]); java.util.List rawWords2 = tokenizerFactory.getTokenizer(sent2Reader2).tokenize(); sent2Reader2.close(); for (int i = 0; i < rawWords1.size(); ++i) { writer1.Write(rawWords1.get(i) + " "); } writer1.Write('\n'); for (int i = 0; i < rawWords2.size(); ++i) { writer2.Write(rawWords2.get(i) + " "); } writer2.Write('\n'); } writer1.Close(); writer2.Close(); } System.Console.ReadKey(); }
//use Stanford.NLP.Net to parse the sentence Tree Parse(string sent) { var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sentReader = new java.io.StringReader(sent); var rawWords = tokenizerFactory.getTokenizer(sentReader).tokenize(); sentReader.close(); var tree = lp.apply(rawWords); // Extract dependencies from lexical tree var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(tree); var tdl = gs.typedDependenciesCCprocessed(); // Extract collapsed dependencies from parsed tree //var tp = new TreePrint("penn,typedDependenciesCollapsed"); var tp = new TreePrint("penn"); tp.printTree(tree); return(tree); }
public void SentenceParser(string sent2) { var modelsDirectory = jarRoot + @"edu\stanford\nlp\models"; // Loading english PCFG parser from file var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz"); // This option shows loading and using an explicit tokenizer sent2.ToLower(); var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new java.io.StringReader(sent2); var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); sent2Reader.close(); var tree2 = lp.apply(rawWords2); // Extract dependencies from lexical tree var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(tree2); var tdl = gs.typedDependenciesCCprocessed(); //Console.WriteLine("\n{0}\n", tdl); // Extract collapsed dependencies from parsed tree var tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(tree2); ArrayList dep = gs.typedDependenciesCollapsed() as ArrayList; foreach (TypedDependency td in dep) { for (int i = 0; i < keyword.Length; i++) { if (td.dep().originalText().Equals(keyword[i])) { keyFlag = true; key = keyword[i]; break; } } if (keyFlag) { break; } } keyFlag = false; switch (key) { case "circle": Circle circle = new Circle(); shape = circle.GetProps(); propsUsed = Associator(shape, dep); break; case "rectangle": Rectangle rect = new Rectangle(); shape = rect.GetProps(); propsUsed = Associator(shape, dep); break; case "triangle": Triangle tri = new Triangle(); shape = tri.GetProps(); propsUsed = Associator(shape, dep); break; case "square": Square square = new Square(); shape = square.GetProps(); propsUsed = Associator(shape, dep); break; default: break; } //End of Switch dependency = tdl.ToString(); } //End of SentenceParser
public string Tags(string input) { // Path to models extracted from `stanford-parser-3.6.0-models.jar` var jarRoot = @""; var modelsDirectory = jarRoot; var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz"); // This option shows loading and using an explicit tokenizer var sent2 = input; var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new java.io.StringReader(sent2); var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); sent2Reader.close(); var tree2 = lp.apply(rawWords2); // Extract dependencies from lexical tree var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(tree2); var tdl = gs.typedDependenciesCCprocessed(); // Extract collapsed dependencies from parsed tree var tp = new TreePrint("penn,typedDependenciesCollapsed"); UnityEngine.Debug.Log(tdl); //tp.printTree(tree2); for (int i = 0; i < tdl.size(); i++) { TypedDependency node = (TypedDependency)tdl.get(i); string relation = node.reln().getShortName(); if (relation.Contains("nsubj")) { IndexedWord act = node.gov(); //node.dep().getword() action = act.value(); UnityEngine.Debug.Log("This is the action " + action); IndexedWord subject = node.dep(); subj = subject.value(); UnityEngine.Debug.Log("This is the subject " + subj); } if (relation.Contains("dobj")) { IndexedWord act = node.gov(); //node.dep().getword() action = act.value(); UnityEngine.Debug.Log("This is the action " + action); IndexedWord tar = node.dep(); target = tar.value(); UnityEngine.Debug.Log("This is the target " + target); } if (relation.Contains("nmod")) { IndexedWord tar_two = node.dep(); second_target = tar_two.value(); UnityEngine.Debug.Log("This is the target second " + second_target); } } return(tdl.ToString()); }