static void Main() { // Path to models extracted from `stanford-parser-3.6.0-models.jar` var jarRoot = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-parser-full-2016-10-31\models\"; var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models"; // Loading english PCFG parser from file var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz"); // This sample shows parsing a list of correctly tokenized words var sent = new[] { "This", "is", "an", "easy", "sentence", "." }; var rawWords = SentenceUtils.toCoreLabelList(sent); var tree = lp.apply(rawWords); tree.pennPrint(); // This option shows loading and using an explicit tokenizer var sent2 = "This is another sentence."; var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new StringReader(sent2); var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); sent2Reader.close(); var tree2 = lp.apply(rawWords2); // Extract dependencies from lexical tree var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(tree2); var tdl = gs.typedDependenciesCCprocessed(); Console.WriteLine("\n{0}\n", tdl); // Extract collapsed dependencies from parsed tree var tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(tree2); }
public static void DemoAPI(LexicalizedParser lp) { // This option shows parsing a list of correctly tokenized words var sent = new[] { "This", "is", "an", "easy", "sentence", "." }; java.util.List rawWords = Sentence.toCoreLabelList(sent); Tree parse = lp.apply(rawWords); parse.pennPrint(); // This option shows loading and using an explicit tokenizer const string Sent2 = "This is another sentence."; TokenizerFactory tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new StringReader(Sent2); java.util.List rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); parse = lp.apply(rawWords2); var tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); java.util.List tdl = gs.typedDependenciesCCprocessed(); Console.WriteLine("\n{0}\n", tdl); var tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(parse); }
public void ParseEasySentence() { // This option shows parsing a list of correctly tokenized words var sent = new[] { "This", "is", "an", "easy", "sentence", "." }; var rawWords = SentenceUtils.toCoreLabelList(sent); var parse = _lp.apply(rawWords); Assert.NotNull(parse); parse.pennPrint(); // This option shows loading and using an explicit tokenizer var sent2 = "This is another sentence."; var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); using var sent2Reader = new StringReader(sent2); var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); parse = _lp.apply(rawWords2); Assert.NotNull(parse); var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(parse); var tdl = gs.typedDependenciesCCprocessed(); TestContext.Out.WriteLine($"\n{tdl}\n"); var tp = new TreePrint("penn,typedDependenciesCollapsed"); Assert.NotNull(tp); tp.printTree(parse); }
public void ProcessText(string inputText) { var jarRoot = "C:\\stanford-parser-full-2016-10-31\\stanford-parser-3.7.0-models";//\\edu\\stanford\\nlp\\models";//"nlp.stanford.edu\\stanford-parser-full-2017-06-09\\models"; var modelsDirectory = jarRoot + "\\edu\\stanford\\nlp\\models"; // Loading english PCFG parser from file var lp = LexicalizedParser.loadModel(modelsDirectory + "\\lexparser\\englishPCFG.ser.gz"); // This option shows loading and using an explicit tokenizer var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sentReader = new StringReader(inputText); var rawWords = tokenizerFactory.getTokenizer(sentReader).tokenize(); sentReader.close(); var tree = lp.apply(rawWords); //Extract dependencies from lexical tree var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(tree); var tdl = gs.typedDependenciesCCprocessed(); Console.WriteLine("\n{0}\n", tdl); // Extract collapsed dependencies from parsed tree var tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(tree); }
public static void ParseString(string sentence) { // Path to models extracted from `stanford-parser-3.6.0-models.jar` var modelsDirectory = @"../../../data/paket-files/stanford-corenlp-3.9.1-models/edu/stanford/nlp/models"; var model = @"/lexparser/englishPCFG.ser.gz"; //var model = @"/parser/nndep/english_SD.gz"; // Loading english PCFG parser from file var lp = LexicalizedParser.loadModel(modelsDirectory + model); // This sample shows parsing a list of correctly tokenized words //var rawWords = SentenceUtils.toCoreLabelList(sentence); //var tree = lp.apply(rawWords); //tree.pennPrint(); // This option shows loading and using an explicit tokenizer var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new StringReader(sentence); var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); //sent2Reader.close(); var tree2 = lp.apply(rawWords2); // Extract dependencies from lexical tree //var tlp = new PennTreebankLanguagePack(); //var gsf = tlp.grammaticalStructureFactory(); //var gs = gsf.newGrammaticalStructure(tree2); //var tdl = gs.typedDependenciesCCprocessed(); //Console.WriteLine("\n{0}\n", tdl); // Extract collapsed dependencies from parsed tree var tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(tree2); }
public ParsedStatementFactory.ParseResult ParseStatement(string input) { var sent2Reader = new StringReader(input); var rawWords2 = _tokenizerFactory.getTokenizer(sent2Reader).tokenize(); var parse = _lp.apply(rawWords2); var gs = _structureFactory.newGrammaticalStructure(parse); var tdl = gs.typedDependenciesCCprocessed(); //System.Console.WriteLine("newGrammaticalStructure:\n{0}\n", gs); //System.Console.WriteLine("typedDependenciesCCprocessed:\n{0}\n", tdl); //var tp = new TreePrint("penn,typedDependenciesCollapsed"); //tp.printTree(parse); //return new ParsedStatement(parse); var xmlTreePrint = new TreePrint("xmlTree, dependencies", "xml, collapsedDependencies", _tlp); var stream = new ByteArrayOutputStream(); xmlTreePrint.printTree(parse, new PrintWriter(stream)); string xmlOutput = stream.toString() + "</s>"; //System.Console.WriteLine("xml:\n{0}\n", xmlOutput); return(ParsedStatementFactory.CreateParsedStatement(xmlOutput)); //System.Console.WriteLine("TreePrint: \n{0}\n", parse); }
//use Stanford.NLP.Net to parse the sentence static Tree Parse(string sent) { // Loading english PCFG parser from file var lp = LexicalizedParser.loadModel(modelsDirectory + "\\lexparser\\englishPCFG.ser.gz"); var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sentReader = new java.io.StringReader(sent); var rawWords = tokenizerFactory.getTokenizer(sentReader).tokenize(); sentReader.close(); var tree = lp.apply(rawWords); // Extract dependencies from lexical tree var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(tree); var tdl = gs.typedDependenciesCCprocessed(); // Extract collapsed dependencies from parsed tree //var tp = new TreePrint("penn,typedDependenciesCollapsed"); var tp = new TreePrint("penn"); tp.printTree(tree); return(tree); }
public ParsedStatement ParseSentence(string input) { var sent2Reader = new StringReader(input); var rawWords2 = _tokenizerFactory.getTokenizer(sent2Reader).tokenize(); var parse = _lp.apply(rawWords2); var gs = _structureFactory.newGrammaticalStructure(parse); var tdl = gs.typedDependenciesCCprocessed(); System.Console.WriteLine("\n{0}\n", tdl); var tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(parse); return(new ParsedStatement(parse)); //System.Console.WriteLine("TreePrint: \n{0}\n", parse); }
static void Main() { // Path to models extracted from `stanford-parser-3.6.0-models.jar` var jarRoot = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-parser-full-2015-12-09\models\"; var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models"; // Loading english PCFG parser from file var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz"); // This sample shows parsing a list of correctly tokenized words var sent = new[] { "This", "is", "an", "easy", "sentence", "." }; var rawWords = Sentence.toCoreLabelList(sent); var tree = lp.apply(rawWords); tree.pennPrint(); // This option shows loading and using an explicit tokenizer var sent2 = "This is another sentence."; var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new StringReader(sent2); var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); sent2Reader.close(); var tree2 = lp.apply(rawWords2); // Extract dependencies from lexical tree var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(tree2); var tdl = gs.typedDependenciesCCprocessed(); Console.WriteLine("\n{0}\n", tdl); // Extract collapsed dependencies from parsed tree var tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(tree2); }
//use Stanford.NLP.Net to parse the sentence Tree Parse(string sent) { var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sentReader = new java.io.StringReader(sent); var rawWords = tokenizerFactory.getTokenizer(sentReader).tokenize(); sentReader.close(); var tree = lp.apply(rawWords); // Extract dependencies from lexical tree var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(tree); var tdl = gs.typedDependenciesCCprocessed(); // Extract collapsed dependencies from parsed tree //var tp = new TreePrint("penn,typedDependenciesCollapsed"); var tp = new TreePrint("penn"); tp.printTree(tree); return(tree); }
public static void DemoAPI(LexicalizedParser lp) { // This option shows parsing a list of correctly tokenized words var sent = new[] { "This", "is", "an", "easy", "sentence", "." }; var rawWords = Sentence.toCoreLabelList(sent); var parse = lp.apply(rawWords); parse.pennPrint(); // This option shows loading and using an explicit tokenizer const string Sent2 = "This is another sentence."; var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new StringReader(Sent2); var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); parse = lp.apply(rawWords2); var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(parse); var tdl = gs.typedDependenciesCCprocessed(); System.Console.WriteLine("\n{0}\n", tdl); var tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(parse); }
public void SentenceParser(string sent2) { var modelsDirectory = jarRoot + @"edu\stanford\nlp\models"; // Loading english PCFG parser from file var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz"); // This option shows loading and using an explicit tokenizer sent2.ToLower(); var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new java.io.StringReader(sent2); var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); sent2Reader.close(); var tree2 = lp.apply(rawWords2); // Extract dependencies from lexical tree var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(tree2); var tdl = gs.typedDependenciesCCprocessed(); //Console.WriteLine("\n{0}\n", tdl); // Extract collapsed dependencies from parsed tree var tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(tree2); ArrayList dep = gs.typedDependenciesCollapsed() as ArrayList; foreach (TypedDependency td in dep) { for (int i = 0; i < keyword.Length; i++) { if (td.dep().originalText().Equals(keyword[i])) { keyFlag = true; key = keyword[i]; break; } } if (keyFlag) { break; } } keyFlag = false; switch (key) { case "circle": Circle circle = new Circle(); shape = circle.GetProps(); propsUsed = Associator(shape, dep); break; case "rectangle": Rectangle rect = new Rectangle(); shape = rect.GetProps(); propsUsed = Associator(shape, dep); break; case "triangle": Triangle tri = new Triangle(); shape = tri.GetProps(); propsUsed = Associator(shape, dep); break; case "square": Square square = new Square(); shape = square.GetProps(); propsUsed = Associator(shape, dep); break; default: break; } //End of Switch dependency = tdl.ToString(); } //End of SentenceParser
static void Main(string[] args) { var tf = new edu.stanford.nlp.trees.LabeledScoredTreeFactory(new CustomStringLabelFactory()); var str = "(x2 / score :null_edge(x1 / null_tag) :null_edge(x3 / null_tag) :time(xap0 / before :quant(x5 / temporal - quantity :unit(y / year) :null_edge(x4 / null_tag))))"; var input = new java.io.StringReader(str); var treeReader = new edu.stanford.nlp.trees.PennTreeReader(input, tf, new CustomTreeNormalizer(), new CustomTokenizerAdapter(input)); var t = treeReader.readTree(); TreePrint p = new TreePrint("penn"); p.printTree(t); //READ RST INFORMATION RSTTree tree = new RSTTree("lincon"); tree.Load(Path.Combine(Root, "rst.xml")); tree.EvaluateODonell(); var sum = tree.Summarize(); //READ AMR INFORMATION FOR EACH EDU AND ASSOCIATTE THE ODONELL SCORE IGraph g = new Graph(); var parser = new VDS.RDF.Parsing.RdfXmlParser(); // NTriplesParser ntparser = new NTriplesParser(); parser.Load(g, Path.Combine(Root, "output.xml")); var document = new AMRDocument(); document.Load(g); foreach (var item in document.EDUSentences) { item.ApplyRSTWeight(sum.Where(c => c.edu == item.Id).Select(c => c.Weight).First()); } //var rstdocument = new RSTDocumentRepository(); //rstdocument.DeleteAllNodes(); //rstdocument.Save(tree); AMRNEORepository repo = new AMRNEORepository(); repo.DeleteAllNodes(); repo.SaveDocument(document); //var ids = Helper.ReadIds(g); //foreach (var item in ids) //{ // item.sentence = Helper.GetSentence(g, item); // item.AddNodes(g); // if (item.id == 22) // { // Console.WriteLine(item.urlid); // Console.WriteLine(item.sentence); // Console.WriteLine(item.Root.uriid); // Console.WriteLine(item.Root.Term.uriid); // Console.WriteLine(item.Root.Term.type); // } //} //SparqlQueryParser qparser = new SparqlQueryParser(); ////Then we can parse a SPARQL string into a query //StringBuilder querystr = new StringBuilder(); //querystr.AppendLine("PREFIX amr-core: <http://amr.isi.edu/rdf/core-amr#>"); //querystr.AppendLine("PREFIX amr-data: <http://amr.isi.edu/amr_data#>"); //querystr.AppendLine("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>"); //querystr.AppendLine("PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>"); //querystr.AppendLine("PREFIX amr-terms: <http://amr.isi.edu/rdf/amr-terms#>"); ////querystr.AppendLine("SELECT ?p WHERE { ?s rdf:type ?p }"); ////querystr.Append("SELECT ?s ?sentence ?id ?root ?rtype ?amrtype"); //querystr.Append("SELECT ?root ?rtype ?amrtypelbl "); //querystr.Append("WHERE {"); //querystr.Append("?s amr-core:has-sentence ?sentence."); //querystr.Append("?s amr-core:has-id ?id."); //querystr.Append("?s amr-core:root ?root. "); //querystr.Append("?root rdf:type ?rtype. "); //querystr.Append("?rtype rdf:type ?amrtype. "); //querystr.Append("?amrtype rdfs:label ?amrtypelbl. "); //querystr.Append("}"); //SparqlQuery q = qparser.ParseFromString(querystr.ToString()); ////http://amr.isi.edu/rdf/core-amr#has-id //var rset = (SparqlResultSet)g.ExecuteQuery(q); //var SB = new StringBuilder(); //if (rset.Result && rset.Results.Count > 0) //{ // foreach (var result in rset.Results) // { // foreach (var r in result) // { // Console.WriteLine(r.Key + " " + r.Value); // } // //Do what you want with each result // } //} //File.WriteAllText("dic.txt", SB.ToString()); //http://amr.isi.edu/amr_data/22#root01 //foreach (var item in g.Triples) //{ // Console.WriteLine(item.Subject); //} //foreach (var node in g.Nodes) //{ // Console.WriteLine(node.ToString()); //} //g.SaveToFile("output.rdf"); }