예제 #1
0
        static void Main()
        {
            // Path to models extracted from `stanford-parser-3.6.0-models.jar`
            var jarRoot = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-parser-full-2016-10-31\models\";
            var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models";

            // Loading english PCFG parser from file
            var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz");

            // This sample shows parsing a list of correctly tokenized words
            var sent = new[] { "This", "is", "an", "easy", "sentence", "." };
            var rawWords = SentenceUtils.toCoreLabelList(sent);
            var tree = lp.apply(rawWords);
            tree.pennPrint();

            // This option shows loading and using an explicit tokenizer
            var sent2 = "This is another sentence.";
            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sent2Reader = new StringReader(sent2);
            var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize();
            sent2Reader.close();
            var tree2 = lp.apply(rawWords2);

            // Extract dependencies from lexical tree
            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.grammaticalStructureFactory();
            var gs = gsf.newGrammaticalStructure(tree2);
            var tdl = gs.typedDependenciesCCprocessed();
            Console.WriteLine("\n{0}\n", tdl);

            // Extract collapsed dependencies from parsed tree
            var tp = new TreePrint("penn,typedDependenciesCollapsed");
            tp.printTree(tree2);
        }
예제 #2
0
        public static void DemoAPI(LexicalizedParser lp)
        {
            // This option shows parsing a list of correctly tokenized words
            var sent = new[] { "This", "is", "an", "easy", "sentence", "." };

            java.util.List rawWords = Sentence.toCoreLabelList(sent);
            Tree           parse    = lp.apply(rawWords);

            parse.pennPrint();

            // This option shows loading and using an explicit tokenizer
            const string     Sent2            = "This is another sentence.";
            TokenizerFactory tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sent2Reader = new StringReader(Sent2);

            java.util.List rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize();
            parse = lp.apply(rawWords2);

            var tlp = new PennTreebankLanguagePack();
            GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
            GrammaticalStructure        gs  = gsf.newGrammaticalStructure(parse);

            java.util.List tdl = gs.typedDependenciesCCprocessed();
            Console.WriteLine("\n{0}\n", tdl);

            var tp = new TreePrint("penn,typedDependenciesCollapsed");

            tp.printTree(parse);
        }
예제 #3
0
        public void ParseEasySentence()
        {
            // This option shows parsing a list of correctly tokenized words
            var sent     = new[] { "This", "is", "an", "easy", "sentence", "." };
            var rawWords = SentenceUtils.toCoreLabelList(sent);
            var parse    = _lp.apply(rawWords);

            Assert.NotNull(parse);
            parse.pennPrint();

            // This option shows loading and using an explicit tokenizer
            var sent2            = "This is another sentence.";
            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");

            using var sent2Reader = new StringReader(sent2);
            var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize();

            parse = _lp.apply(rawWords2);
            Assert.NotNull(parse);

            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.grammaticalStructureFactory();
            var gs  = gsf.newGrammaticalStructure(parse);
            var tdl = gs.typedDependenciesCCprocessed();

            TestContext.Out.WriteLine($"\n{tdl}\n");

            var tp = new TreePrint("penn,typedDependenciesCollapsed");

            Assert.NotNull(tp);
            tp.printTree(parse);
        }
        public void ProcessText(string inputText)
        {
            var jarRoot         = "C:\\stanford-parser-full-2016-10-31\\stanford-parser-3.7.0-models";//\\edu\\stanford\\nlp\\models";//"nlp.stanford.edu\\stanford-parser-full-2017-06-09\\models";
            var modelsDirectory = jarRoot + "\\edu\\stanford\\nlp\\models";

            // Loading english PCFG parser from file
            var lp = LexicalizedParser.loadModel(modelsDirectory + "\\lexparser\\englishPCFG.ser.gz");

            // This option shows loading and using an explicit tokenizer
            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sentReader       = new StringReader(inputText);
            var rawWords         = tokenizerFactory.getTokenizer(sentReader).tokenize();

            sentReader.close();
            var tree = lp.apply(rawWords);

            //Extract dependencies from lexical tree
            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.grammaticalStructureFactory();
            var gs  = gsf.newGrammaticalStructure(tree);
            var tdl = gs.typedDependenciesCCprocessed();

            Console.WriteLine("\n{0}\n", tdl);

            // Extract collapsed dependencies from parsed tree
            var tp = new TreePrint("penn,typedDependenciesCollapsed");

            tp.printTree(tree);
        }
예제 #5
0
        public static void ParseString(string sentence)
        {
            // Path to models extracted from `stanford-parser-3.6.0-models.jar`
            var modelsDirectory = @"../../../data/paket-files/stanford-corenlp-3.9.1-models/edu/stanford/nlp/models";
            var model           = @"/lexparser/englishPCFG.ser.gz";
            //var model = @"/parser/nndep/english_SD.gz";

            // Loading english PCFG parser from file
            var lp = LexicalizedParser.loadModel(modelsDirectory + model);

            // This sample shows parsing a list of correctly tokenized words
            //var rawWords = SentenceUtils.toCoreLabelList(sentence);
            //var tree = lp.apply(rawWords);
            //tree.pennPrint();

            // This option shows loading and using an explicit tokenizer
            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sent2Reader      = new StringReader(sentence);
            var rawWords2        = tokenizerFactory.getTokenizer(sent2Reader).tokenize();
            //sent2Reader.close();
            var tree2 = lp.apply(rawWords2);

            // Extract dependencies from lexical tree
            //var tlp = new PennTreebankLanguagePack();
            //var gsf = tlp.grammaticalStructureFactory();
            //var gs = gsf.newGrammaticalStructure(tree2);
            //var tdl = gs.typedDependenciesCCprocessed();
            //Console.WriteLine("\n{0}\n", tdl);

            // Extract collapsed dependencies from parsed tree
            var tp = new TreePrint("penn,typedDependenciesCollapsed");

            tp.printTree(tree2);
        }
예제 #6
0
파일: NLPService.cs 프로젝트: henkin/Nala
        public ParsedStatementFactory.ParseResult ParseStatement(string input)
        {
            var sent2Reader = new StringReader(input);
            var rawWords2   = _tokenizerFactory.getTokenizer(sent2Reader).tokenize();
            var parse       = _lp.apply(rawWords2);

            var gs  = _structureFactory.newGrammaticalStructure(parse);
            var tdl = gs.typedDependenciesCCprocessed();
            //System.Console.WriteLine("newGrammaticalStructure:\n{0}\n", gs);
            //System.Console.WriteLine("typedDependenciesCCprocessed:\n{0}\n", tdl);
            //var tp = new TreePrint("penn,typedDependenciesCollapsed");
            //tp.printTree(parse);
            //return new ParsedStatement(parse);

            var xmlTreePrint = new TreePrint("xmlTree, dependencies", "xml, collapsedDependencies", _tlp);
            var stream       = new ByteArrayOutputStream();

            xmlTreePrint.printTree(parse, new PrintWriter(stream));

            string xmlOutput = stream.toString() + "</s>";

            //System.Console.WriteLine("xml:\n{0}\n", xmlOutput);

            return(ParsedStatementFactory.CreateParsedStatement(xmlOutput));
            //System.Console.WriteLine("TreePrint: \n{0}\n", parse);
        }
예제 #7
0
        //use Stanford.NLP.Net to parse the sentence
        static Tree Parse(string sent)
        {
            // Loading english PCFG parser from file
            var lp = LexicalizedParser.loadModel(modelsDirectory + "\\lexparser\\englishPCFG.ser.gz");

            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sentReader       = new java.io.StringReader(sent);
            var rawWords         = tokenizerFactory.getTokenizer(sentReader).tokenize();

            sentReader.close();
            var tree = lp.apply(rawWords);

            // Extract dependencies from lexical tree
            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.grammaticalStructureFactory();
            var gs  = gsf.newGrammaticalStructure(tree);
            var tdl = gs.typedDependenciesCCprocessed();

            // Extract collapsed dependencies from parsed tree
            //var tp = new TreePrint("penn,typedDependenciesCollapsed");
            var tp = new TreePrint("penn");

            tp.printTree(tree);

            return(tree);
        }
예제 #8
0
        public ParsedStatement ParseSentence(string input)
        {
            var sent2Reader = new StringReader(input);
            var rawWords2   = _tokenizerFactory.getTokenizer(sent2Reader).tokenize();
            var parse       = _lp.apply(rawWords2);

            var gs  = _structureFactory.newGrammaticalStructure(parse);
            var tdl = gs.typedDependenciesCCprocessed();

            System.Console.WriteLine("\n{0}\n", tdl);

            var tp = new TreePrint("penn,typedDependenciesCollapsed");

            tp.printTree(parse);
            return(new ParsedStatement(parse));
            //System.Console.WriteLine("TreePrint: \n{0}\n", parse);
        }
예제 #9
0
        static void Main()
        {
            // Path to models extracted from `stanford-parser-3.6.0-models.jar`
            var jarRoot         = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-parser-full-2015-12-09\models\";
            var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models";

            // Loading english PCFG parser from file
            var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz");

            // This sample shows parsing a list of correctly tokenized words
            var sent     = new[] { "This", "is", "an", "easy", "sentence", "." };
            var rawWords = Sentence.toCoreLabelList(sent);
            var tree     = lp.apply(rawWords);

            tree.pennPrint();

            // This option shows loading and using an explicit tokenizer
            var sent2            = "This is another sentence.";
            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sent2Reader      = new StringReader(sent2);
            var rawWords2        = tokenizerFactory.getTokenizer(sent2Reader).tokenize();

            sent2Reader.close();
            var tree2 = lp.apply(rawWords2);

            // Extract dependencies from lexical tree
            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.grammaticalStructureFactory();
            var gs  = gsf.newGrammaticalStructure(tree2);
            var tdl = gs.typedDependenciesCCprocessed();

            Console.WriteLine("\n{0}\n", tdl);

            // Extract collapsed dependencies from parsed tree
            var tp = new TreePrint("penn,typedDependenciesCollapsed");

            tp.printTree(tree2);
        }
예제 #10
0
        //use Stanford.NLP.Net to parse the sentence
        Tree Parse(string sent)
        {
            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sentReader       = new java.io.StringReader(sent);
            var rawWords         = tokenizerFactory.getTokenizer(sentReader).tokenize();

            sentReader.close();
            var tree = lp.apply(rawWords);

            // Extract dependencies from lexical tree
            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.grammaticalStructureFactory();
            var gs  = gsf.newGrammaticalStructure(tree);
            var tdl = gs.typedDependenciesCCprocessed();

            // Extract collapsed dependencies from parsed tree
            //var tp = new TreePrint("penn,typedDependenciesCollapsed");
            var tp = new TreePrint("penn");

            tp.printTree(tree);

            return(tree);
        }
예제 #11
0
        public static void DemoAPI(LexicalizedParser lp)
        {
            // This option shows parsing a list of correctly tokenized words
            var sent = new[] { "This", "is", "an", "easy", "sentence", "." };
            var rawWords = Sentence.toCoreLabelList(sent);
            var parse = lp.apply(rawWords);
            parse.pennPrint();

            // This option shows loading and using an explicit tokenizer
            const string Sent2 = "This is another sentence.";
            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sent2Reader = new StringReader(Sent2);
            var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize();
            parse = lp.apply(rawWords2);

            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.grammaticalStructureFactory();
            var gs = gsf.newGrammaticalStructure(parse);
            var tdl = gs.typedDependenciesCCprocessed();
            System.Console.WriteLine("\n{0}\n", tdl);

            var tp = new TreePrint("penn,typedDependenciesCollapsed");
            tp.printTree(parse);
        }
예제 #12
0
        public void SentenceParser(string sent2)
        {
            var modelsDirectory = jarRoot + @"edu\stanford\nlp\models";

            // Loading english PCFG parser from file
            var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz");

            // This option shows loading and using an explicit tokenizer
            sent2.ToLower();
            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sent2Reader      = new java.io.StringReader(sent2);
            var rawWords2        = tokenizerFactory.getTokenizer(sent2Reader).tokenize();

            sent2Reader.close();
            var tree2 = lp.apply(rawWords2);

            // Extract dependencies from lexical tree

            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.grammaticalStructureFactory();
            var gs  = gsf.newGrammaticalStructure(tree2);
            var tdl = gs.typedDependenciesCCprocessed();
            //Console.WriteLine("\n{0}\n", tdl);


            // Extract collapsed dependencies from parsed tree

            var tp = new TreePrint("penn,typedDependenciesCollapsed");

            tp.printTree(tree2);



            ArrayList dep = gs.typedDependenciesCollapsed() as ArrayList;

            foreach (TypedDependency td in dep)
            {
                for (int i = 0; i < keyword.Length; i++)
                {
                    if (td.dep().originalText().Equals(keyword[i]))
                    {
                        keyFlag = true;
                        key     = keyword[i];
                        break;
                    }
                }
                if (keyFlag)
                {
                    break;
                }
            }

            keyFlag = false;


            switch (key)
            {
            case "circle":

                Circle circle = new Circle();
                shape     = circle.GetProps();
                propsUsed = Associator(shape, dep);

                break;

            case "rectangle":

                Rectangle rect = new Rectangle();
                shape     = rect.GetProps();
                propsUsed = Associator(shape, dep);

                break;

            case "triangle":

                Triangle tri = new Triangle();
                shape     = tri.GetProps();
                propsUsed = Associator(shape, dep);

                break;

            case "square":

                Square square = new Square();
                shape     = square.GetProps();
                propsUsed = Associator(shape, dep);

                break;

            default:

                break;
            } //End of Switch

            dependency = tdl.ToString();
        } //End of SentenceParser
예제 #13
0
        static void Main(string[] args)
        {
            var tf = new edu.stanford.nlp.trees.LabeledScoredTreeFactory(new CustomStringLabelFactory());

            var str   = "(x2 / score :null_edge(x1 / null_tag) :null_edge(x3 / null_tag)	:time(xap0 / before	:quant(x5 / temporal - quantity	:unit(y / year) :null_edge(x4 / null_tag))))";
            var input = new java.io.StringReader(str);

            var treeReader = new edu.stanford.nlp.trees.PennTreeReader(input, tf, new CustomTreeNormalizer(), new CustomTokenizerAdapter(input));

            var t = treeReader.readTree();


            TreePrint p = new TreePrint("penn");

            p.printTree(t);



            //READ RST INFORMATION
            RSTTree tree = new RSTTree("lincon");

            tree.Load(Path.Combine(Root, "rst.xml"));
            tree.EvaluateODonell();

            var sum = tree.Summarize();

            //READ AMR INFORMATION FOR EACH EDU AND ASSOCIATTE THE ODONELL SCORE
            IGraph g      = new Graph();
            var    parser = new VDS.RDF.Parsing.RdfXmlParser();

            //   NTriplesParser ntparser = new NTriplesParser();
            parser.Load(g, Path.Combine(Root, "output.xml"));
            var document = new AMRDocument();

            document.Load(g);

            foreach (var item in document.EDUSentences)
            {
                item.ApplyRSTWeight(sum.Where(c => c.edu == item.Id).Select(c => c.Weight).First());
            }

            //var rstdocument = new RSTDocumentRepository();
            //rstdocument.DeleteAllNodes();
            //rstdocument.Save(tree);

            AMRNEORepository repo = new AMRNEORepository();

            repo.DeleteAllNodes();
            repo.SaveDocument(document);



            //var ids = Helper.ReadIds(g);
            //foreach (var item in ids)
            //{

            //    item.sentence = Helper.GetSentence(g, item);
            //    item.AddNodes(g);


            //    if (item.id == 22)
            //    {
            //        Console.WriteLine(item.urlid);
            //        Console.WriteLine(item.sentence);
            //        Console.WriteLine(item.Root.uriid);
            //        Console.WriteLine(item.Root.Term.uriid);
            //        Console.WriteLine(item.Root.Term.type);
            //    }

            //}

            //SparqlQueryParser qparser = new SparqlQueryParser();
            ////Then we can parse a SPARQL string into a query

            //StringBuilder querystr = new StringBuilder();
            //querystr.AppendLine("PREFIX amr-core: <http://amr.isi.edu/rdf/core-amr#>");
            //querystr.AppendLine("PREFIX amr-data: <http://amr.isi.edu/amr_data#>");
            //querystr.AppendLine("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>");
            //querystr.AppendLine("PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>");
            //querystr.AppendLine("PREFIX amr-terms: <http://amr.isi.edu/rdf/amr-terms#>");
            ////querystr.AppendLine("SELECT  ?p WHERE { ?s rdf:type ?p }");
            ////querystr.Append("SELECT ?s ?sentence ?id ?root ?rtype ?amrtype");
            //querystr.Append("SELECT ?root ?rtype  ?amrtypelbl ");
            //querystr.Append("WHERE {");
            //querystr.Append("?s amr-core:has-sentence ?sentence.");
            //querystr.Append("?s amr-core:has-id ?id.");
            //querystr.Append("?s amr-core:root ?root. ");
            //querystr.Append("?root rdf:type ?rtype. ");
            //querystr.Append("?rtype rdf:type ?amrtype. ");
            //querystr.Append("?amrtype rdfs:label ?amrtypelbl. ");
            //querystr.Append("}");

            //SparqlQuery q = qparser.ParseFromString(querystr.ToString());

            ////http://amr.isi.edu/rdf/core-amr#has-id
            //var rset = (SparqlResultSet)g.ExecuteQuery(q);

            //var SB = new StringBuilder();
            //if (rset.Result && rset.Results.Count > 0)
            //{
            //    foreach (var result in rset.Results)
            //    {
            //        foreach (var r in result)
            //        {
            //            Console.WriteLine(r.Key + " " + r.Value);
            //        }

            //        //Do what you want with each result
            //    }
            //}
            //File.WriteAllText("dic.txt", SB.ToString());
            //http://amr.isi.edu/amr_data/22#root01

            //foreach (var item in g.Triples)
            //{


            //    Console.WriteLine(item.Subject);

            //}



            //foreach (var node in g.Nodes)
            //{
            //    Console.WriteLine(node.ToString());
            //}

            //g.SaveToFile("output.rdf");
        }