Exemple #1
0
        public static void DemoAPI(LexicalizedParser lp)
        {
            // This option shows parsing a list of correctly tokenized words
            var sent = new[] { "This", "is", "an", "easy", "sentence", "." };

            java.util.List rawWords = Sentence.toCoreLabelList(sent);
            Tree           parse    = lp.apply(rawWords);

            parse.pennPrint();

            // This option shows loading and using an explicit tokenizer
            const string     Sent2            = "This is another sentence.";
            TokenizerFactory tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sent2Reader = new StringReader(Sent2);

            java.util.List rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize();
            parse = lp.apply(rawWords2);

            var tlp = new PennTreebankLanguagePack();
            GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
            GrammaticalStructure        gs  = gsf.newGrammaticalStructure(parse);

            java.util.List tdl = gs.typedDependenciesCCprocessed();
            Console.WriteLine("\n{0}\n", tdl);

            var tp = new TreePrint("penn,typedDependenciesCollapsed");

            tp.printTree(parse);
        }
        public void ParseEasySentence()
        {
            // This option shows parsing a list of correctly tokenized words
            var sent     = new[] { "This", "is", "an", "easy", "sentence", "." };
            var rawWords = SentenceUtils.toCoreLabelList(sent);
            var parse    = _lp.apply(rawWords);

            Assert.NotNull(parse);
            parse.pennPrint();

            // This option shows loading and using an explicit tokenizer
            var sent2            = "This is another sentence.";
            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");

            using var sent2Reader = new StringReader(sent2);
            var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize();

            parse = _lp.apply(rawWords2);
            Assert.NotNull(parse);

            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.grammaticalStructureFactory();
            var gs  = gsf.newGrammaticalStructure(parse);
            var tdl = gs.typedDependenciesCCprocessed();

            TestContext.Out.WriteLine($"\n{tdl}\n");

            var tp = new TreePrint("penn,typedDependenciesCollapsed");

            Assert.NotNull(tp);
            tp.printTree(parse);
        }
Exemple #3
0
        public ParsedStatementFactory.ParseResult ParseStatement(string input)
        {
            var sent2Reader = new StringReader(input);
            var rawWords2   = _tokenizerFactory.getTokenizer(sent2Reader).tokenize();
            var parse       = _lp.apply(rawWords2);

            var gs  = _structureFactory.newGrammaticalStructure(parse);
            var tdl = gs.typedDependenciesCCprocessed();
            //System.Console.WriteLine("newGrammaticalStructure:\n{0}\n", gs);
            //System.Console.WriteLine("typedDependenciesCCprocessed:\n{0}\n", tdl);
            //var tp = new TreePrint("penn,typedDependenciesCollapsed");
            //tp.printTree(parse);
            //return new ParsedStatement(parse);

            var xmlTreePrint = new TreePrint("xmlTree, dependencies", "xml, collapsedDependencies", _tlp);
            var stream       = new ByteArrayOutputStream();

            xmlTreePrint.printTree(parse, new PrintWriter(stream));

            string xmlOutput = stream.toString() + "</s>";

            //System.Console.WriteLine("xml:\n{0}\n", xmlOutput);

            return(ParsedStatementFactory.CreateParsedStatement(xmlOutput));
            //System.Console.WriteLine("TreePrint: \n{0}\n", parse);
        }
        public static void DemoAPI(LexicalizedParser lp)
        {
            // This option shows parsing a list of correctly tokenized words
            var sent = new[] { "This", "is", "an", "easy", "sentence", "." };
            var rawWords = Sentence.toCoreLabelList(sent);
            var parse = lp.apply(rawWords);
            parse.pennPrint();

            // This option shows loading and using an explicit tokenizer
            const string Sent2 = "This is another sentence.";
            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sent2Reader = new StringReader(Sent2);
            var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize();
            parse = lp.apply(rawWords2);

            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.grammaticalStructureFactory();
            var gs = gsf.newGrammaticalStructure(parse);
            var tdl = gs.typedDependenciesCCprocessed();
            System.Console.WriteLine("\n{0}\n", tdl);

            var tp = new TreePrint("penn,typedDependenciesCollapsed");
            tp.printTree(parse);
        }
Exemple #5
0
        public ParsedStatement ParseSentence(string input)
        {
            var sent2Reader = new StringReader(input);
            var rawWords2   = _tokenizerFactory.getTokenizer(sent2Reader).tokenize();
            var parse       = _lp.apply(rawWords2);

            var gs  = _structureFactory.newGrammaticalStructure(parse);
            var tdl = gs.typedDependenciesCCprocessed();

            System.Console.WriteLine("\n{0}\n", tdl);

            var tp = new TreePrint("penn,typedDependenciesCollapsed");

            tp.printTree(parse);
            return(new ParsedStatement(parse));
            //System.Console.WriteLine("TreePrint: \n{0}\n", parse);
        }
        public static void DemoDP(LexicalizedParser lp, string fileName)
        {
            // This option shows loading and sentence-segment and tokenizing
            // a file using DocumentPreprocessor
            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.grammaticalStructureFactory();
            // You could also create a tokenizer here (as below) and pass it
            // to DocumentPreprocessor
            foreach (List sentence in new DocumentPreprocessor(fileName))
            {
                var parse = lp.apply(sentence);
                parse.pennPrint();

                var gs = gsf.newGrammaticalStructure(parse);
                var tdl = gs.typedDependenciesCCprocessed(true);
                System.Console.WriteLine("\n{0}\n", tdl);
            }
        }
Exemple #7
0
        public static void DemoDP(LexicalizedParser lp, string fileName)
        {
            // This option shows loading and sentence-segment and tokenizing
            // a file using DocumentPreprocessor
            var tlp = new PennTreebankLanguagePack();
            GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();

            // You could also create a tokenizer here (as below) and pass it
            // to DocumentPreprocessor
            foreach (List sentence in new DocumentPreprocessor(fileName))
            {
                Tree parse = lp.apply(sentence);
                parse.pennPrint();

                GrammaticalStructure gs  = gsf.newGrammaticalStructure(parse);
                java.util.List       tdl = gs.typedDependenciesCCprocessed(true);
                Console.WriteLine("\n{0}\n", tdl);
            }
        }
Exemple #8
0
        public static List <string> ExtractNounsFromSemantics(string sentence)
        {
            string assemblyPath    = Assembly.GetExecutingAssembly().GetName().CodeBase;
            string projectPath     = Directory.GetParent(new Uri(Path.GetDirectoryName(Path.GetDirectoryName(Path.GetDirectoryName(assemblyPath)))).LocalPath).FullName;
            string modelsDirectory = Path.GetFullPath(projectPath + @"\Parser\CoreNLP-3.9.1-Models\edu\stanford\nlp\models");

            // Loading english PCFG parser from file
            LexicalizedParser lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz");

            // This shows loading and using an explicit tokenizer
            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sent2Reader      = new java.io.StringReader(sentence);
            var rawWords         = tokenizerFactory.getTokenizer(sent2Reader).tokenize();

            sent2Reader.close();
            var tree = lp.apply(rawWords);

            return(tree.toArray().Cast <LabeledScoredTreeNode>().Where(n => n.isLeaf() && nounLabels.Contains(n.parent(tree).label().value())).Select(n => n.label().ToString()).ToList());
        }
Exemple #9
0
        public List <TargetCandidate> GetAllNounPhrases(string[] sentence, string[] target)
        {
            var tree         = lexParser.apply(SentenceUtils.toCoreLabelList(sentence));
            var dependencies = grammaticalStructureFactory.newGrammaticalStructure(tree).typedDependenciesCCprocessed();

            List <TargetCandidate> nounPhrases = new List <TargetCandidate>();

            var subTrees = tree.subTreeList();

            for (int i = 0; i < subTrees.size(); i++)
            {
                Tree subTree = (Tree)subTrees.get(i);
                if (subTree.label().value() == "NP")
                {
                    NounPhrase phrase = NounPhrase.SetSentence(sentence, tree, dependencies, target);
                    phrase.SetPhrase(SentenceUtils.listToString(subTree.yield()));
                    nounPhrases.Add(new TargetCandidate(phrase, caching));
                }
            }

            return(nounPhrases);
        }
Exemple #10
0
        //use Stanford.NLP.Net to parse the sentence
        Tree Parse(string sent)
        {
            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sentReader       = new java.io.StringReader(sent);
            var rawWords         = tokenizerFactory.getTokenizer(sentReader).tokenize();

            sentReader.close();
            var tree = lp.apply(rawWords);

            // Extract dependencies from lexical tree
            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.grammaticalStructureFactory();
            var gs  = gsf.newGrammaticalStructure(tree);
            var tdl = gs.typedDependenciesCCprocessed();

            // Extract collapsed dependencies from parsed tree
            //var tp = new TreePrint("penn,typedDependenciesCollapsed");
            var tp = new TreePrint("penn");

            tp.printTree(tree);

            return(tree);
        }