Beispiel #1
0
        public static void DemoAPI(LexicalizedParser lp)
        {
            // This option shows parsing a list of correctly tokenized words
            var sent = new[] { "This", "is", "an", "easy", "sentence", "." };

            java.util.List rawWords = Sentence.toCoreLabelList(sent);
            Tree           parse    = lp.apply(rawWords);

            parse.pennPrint();

            // This option shows loading and using an explicit tokenizer
            const string     Sent2            = "This is another sentence.";
            TokenizerFactory tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sent2Reader = new StringReader(Sent2);

            java.util.List rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize();
            parse = lp.apply(rawWords2);

            var tlp = new PennTreebankLanguagePack();
            GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
            GrammaticalStructure        gs  = gsf.newGrammaticalStructure(parse);

            java.util.List tdl = gs.typedDependenciesCCprocessed();
            Console.WriteLine("\n{0}\n", tdl);

            var tp = new TreePrint("penn,typedDependenciesCollapsed");

            tp.printTree(parse);
        }
Beispiel #2
0
        public ParsedStatementFactory.ParseResult ParseStatement(string input)
        {
            var sent2Reader = new StringReader(input);
            var rawWords2   = _tokenizerFactory.getTokenizer(sent2Reader).tokenize();
            var parse       = _lp.apply(rawWords2);

            var gs  = _structureFactory.newGrammaticalStructure(parse);
            var tdl = gs.typedDependenciesCCprocessed();
            //System.Console.WriteLine("newGrammaticalStructure:\n{0}\n", gs);
            //System.Console.WriteLine("typedDependenciesCCprocessed:\n{0}\n", tdl);
            //var tp = new TreePrint("penn,typedDependenciesCollapsed");
            //tp.printTree(parse);
            //return new ParsedStatement(parse);

            var xmlTreePrint = new TreePrint("xmlTree, dependencies", "xml, collapsedDependencies", _tlp);
            var stream       = new ByteArrayOutputStream();

            xmlTreePrint.printTree(parse, new PrintWriter(stream));

            string xmlOutput = stream.toString() + "</s>";

            //System.Console.WriteLine("xml:\n{0}\n", xmlOutput);

            return(ParsedStatementFactory.CreateParsedStatement(xmlOutput));
            //System.Console.WriteLine("TreePrint: \n{0}\n", parse);
        }
Beispiel #3
0
        public ParsedStatement ParseSentence(string input)
        {
            var sent2Reader = new StringReader(input);
            var rawWords2   = _tokenizerFactory.getTokenizer(sent2Reader).tokenize();
            var parse       = _lp.apply(rawWords2);

            var gs  = _structureFactory.newGrammaticalStructure(parse);
            var tdl = gs.typedDependenciesCCprocessed();

            System.Console.WriteLine("\n{0}\n", tdl);

            var tp = new TreePrint("penn,typedDependenciesCollapsed");

            tp.printTree(parse);
            return(new ParsedStatement(parse));
            //System.Console.WriteLine("TreePrint: \n{0}\n", parse);
        }
Beispiel #4
0
        public static void DemoDP(LexicalizedParser lp, string fileName)
        {
            // This option shows loading and sentence-segment and tokenizing
            // a file using DocumentPreprocessor
            var tlp = new PennTreebankLanguagePack();
            GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();

            // You could also create a tokenizer here (as below) and pass it
            // to DocumentPreprocessor
            foreach (List sentence in new DocumentPreprocessor(fileName))
            {
                Tree parse = lp.apply(sentence);
                parse.pennPrint();

                GrammaticalStructure gs  = gsf.newGrammaticalStructure(parse);
                java.util.List       tdl = gs.typedDependenciesCCprocessed(true);
                Console.WriteLine("\n{0}\n", tdl);
            }
        }
Beispiel #5
0
        public List <TargetCandidate> GetAllNounPhrases(string[] sentence, string[] target)
        {
            var tree         = lexParser.apply(SentenceUtils.toCoreLabelList(sentence));
            var dependencies = grammaticalStructureFactory.newGrammaticalStructure(tree).typedDependenciesCCprocessed();

            List <TargetCandidate> nounPhrases = new List <TargetCandidate>();

            var subTrees = tree.subTreeList();

            for (int i = 0; i < subTrees.size(); i++)
            {
                Tree subTree = (Tree)subTrees.get(i);
                if (subTree.label().value() == "NP")
                {
                    NounPhrase phrase = NounPhrase.SetSentence(sentence, tree, dependencies, target);
                    phrase.SetPhrase(SentenceUtils.listToString(subTree.yield()));
                    nounPhrases.Add(new TargetCandidate(phrase, caching));
                }
            }

            return(nounPhrases);
        }