public void ParseEasySentence()
        {
            // This option shows parsing a list of correctly tokenized words
            var sent     = new[] { "This", "is", "an", "easy", "sentence", "." };
            var rawWords = SentenceUtils.toCoreLabelList(sent);
            var parse    = _lp.apply(rawWords);

            Assert.NotNull(parse);
            parse.pennPrint();

            // This option shows loading and using an explicit tokenizer
            var sent2            = "This is another sentence.";
            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");

            using var sent2Reader = new StringReader(sent2);
            var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize();

            parse = _lp.apply(rawWords2);
            Assert.NotNull(parse);

            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.grammaticalStructureFactory();
            var gs  = gsf.newGrammaticalStructure(parse);
            var tdl = gs.typedDependenciesCCprocessed();

            TestContext.Out.WriteLine($"\n{tdl}\n");

            var tp = new TreePrint("penn,typedDependenciesCollapsed");

            Assert.NotNull(tp);
            tp.printTree(parse);
        }
示例#2
0
        public List <string> FindTag(string text, List <string> tag)
        {
            List <string> adj = new List <string>();

            object[] sentences = MaxentTagger.tokenizeText(new java.io.StringReader(text)).toArray();
            foreach (ArrayList sentence in sentences)
            {
                List taggedSentence = this.tagger.tagSentence(sentence);
                foreach (CoreLabel word in SentenceUtils.toCoreLabelList(taggedSentence).toArray())
                {
                    if (tag.Contains(word.tag()))
                    {
                        adj.Add(word.word());
                    }
                }
            }
            return(adj);
        }
示例#3
0
        public List <string> FindNouns(string text)
        {
            List <string> nouns = new List <string>();

            object[] sentences = MaxentTagger.tokenizeText(new java.io.StringReader(text)).toArray();
            foreach (ArrayList sentence in sentences)
            {
                List taggedSentence = this.tagger.tagSentence(sentence);
                foreach (CoreLabel word in SentenceUtils.toCoreLabelList(taggedSentence).toArray())
                {
                    switch (word.tag())
                    {
                    case "NN":
                    case "NNS":
                        nouns.Add(word.word());
                        break;
                    }
                }
            }
            return(nouns);
        }
示例#4
0
        public List <TargetCandidate> GetAllNounPhrases(string[] sentence, string[] target)
        {
            var tree         = lexParser.apply(SentenceUtils.toCoreLabelList(sentence));
            var dependencies = grammaticalStructureFactory.newGrammaticalStructure(tree).typedDependenciesCCprocessed();

            List <TargetCandidate> nounPhrases = new List <TargetCandidate>();

            var subTrees = tree.subTreeList();

            for (int i = 0; i < subTrees.size(); i++)
            {
                Tree subTree = (Tree)subTrees.get(i);
                if (subTree.label().value() == "NP")
                {
                    NounPhrase phrase = NounPhrase.SetSentence(sentence, tree, dependencies, target);
                    phrase.SetPhrase(SentenceUtils.listToString(subTree.yield()));
                    nounPhrases.Add(new TargetCandidate(phrase, caching));
                }
            }

            return(nounPhrases);
        }
示例#5
0
        static void Main()
        {
            // Path to models extracted from `stanford-parser-3.6.0-models.jar`
            var jarRoot         = @"..\..\..\..\data\paket-files\nlp.stanford.edu\stanford-parser-full-2018-10-17\models\";
            var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models";

            // Loading english PCFG parser from file
            var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz");

            // This sample shows parsing a list of correctly tokenized words
            var sent     = new[] { "This", "is", "an", "easy", "sentence", "." };
            var rawWords = SentenceUtils.toCoreLabelList(sent);
            var tree     = lp.apply(rawWords);

            tree.pennPrint();

            // This option shows loading and using an explicit tokenizer
            var sent2            = "This is another sentence.";
            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sent2Reader      = new StringReader(sent2);
            var rawWords2        = tokenizerFactory.getTokenizer(sent2Reader).tokenize();

            sent2Reader.close();
            var tree2 = lp.apply(rawWords2);

            // Extract dependencies from lexical tree
            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.grammaticalStructureFactory();
            var gs  = gsf.newGrammaticalStructure(tree2);
            var tdl = gs.typedDependenciesCCprocessed();

            Console.WriteLine("\n{0}\n", tdl);

            // Extract collapsed dependencies from parsed tree
            var tp = new TreePrint("penn,typedDependenciesCollapsed");

            tp.printTree(tree2);
        }