示例#1
0
        /// <summary>
        /// demoAPI demonstrates other ways of calling the parser with
        /// already tokenized text, or in some cases, raw text that needs to
        /// be tokenized as a single sentence.
        /// </summary>
        /// <remarks>
        /// demoAPI demonstrates other ways of calling the parser with
        /// already tokenized text, or in some cases, raw text that needs to
        /// be tokenized as a single sentence.  Output is handled with a
        /// TreePrint object.  Note that the options used when creating the
        /// TreePrint can determine what results to print out.  Once again,
        /// one can capture the output by passing a PrintWriter to
        /// TreePrint.printTree. This code is for English.
        /// </remarks>
        public static void DemoAPI(LexicalizedParser lp)
        {
            // This option shows parsing a list of correctly tokenized words
            string[]          sent     = new string[] { "This", "is", "an", "easy", "sentence", "." };
            IList <CoreLabel> rawWords = SentenceUtils.ToCoreLabelList(sent);
            Tree parse = lp.Apply(rawWords);

            parse.PennPrint();
            System.Console.Out.WriteLine();
            // This option shows loading and using an explicit tokenizer
            string sent2 = "This is another sentence.";
            ITokenizerFactory <CoreLabel> tokenizerFactory = PTBTokenizer.Factory(new CoreLabelTokenFactory(), string.Empty);
            ITokenizer <CoreLabel>        tok       = tokenizerFactory.GetTokenizer(new StringReader(sent2));
            IList <CoreLabel>             rawWords2 = tok.Tokenize();

            parse = lp.Apply(rawWords2);
            ITreebankLanguagePack tlp = lp.TreebankLanguagePack();
            // PennTreebankLanguagePack for English
            IGrammaticalStructureFactory gsf = tlp.GrammaticalStructureFactory();
            GrammaticalStructure         gs  = gsf.NewGrammaticalStructure(parse);
            IList <TypedDependency>      tdl = gs.TypedDependenciesCCprocessed();

            System.Console.Out.WriteLine(tdl);
            System.Console.Out.WriteLine();
            // You can also use a TreePrint object to print trees and dependencies
            TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");

            tp.PrintTree(parse);
        }
        // just static main
        public static void Main(string[] args)
        {
            string treeString = "(ROOT  (S (NP (PRP$ My) (NN dog)) (ADVP (RB also)) (VP (VBZ likes) (S (VP (VBG eating) (NP (NN sausage))))) (. .)))";
            // Typically the tree is constructed by parsing or reading a
            // treebank.  This is just for example purposes
            Tree tree = Tree.ValueOf(treeString);
            // This creates English uncollapsed dependencies as a
            // SemanticGraph.  If you are creating many SemanticGraphs, you
            // should use a GrammaticalStructureFactory and use it to generate
            // the intermediate GrammaticalStructure instead
            SemanticGraph graph = SemanticGraphFactory.GenerateUncollapsedDependencies(tree);
            // Alternatively, this could have been the Chinese params or any
            // other language supported.  As of 2014, only English and Chinese
            ITreebankLangParserParams    @params = new EnglishTreebankParserParams();
            IGrammaticalStructureFactory gsf     = @params.TreebankLanguagePack().GrammaticalStructureFactory(@params.TreebankLanguagePack().PunctuationWordRejectFilter(), @params.TypedDependencyHeadFinder());
            GrammaticalStructure         gs      = gsf.NewGrammaticalStructure(tree);

            log.Info(graph);
            SemgrexPattern semgrex = SemgrexPattern.Compile("{}=A <<nsubj {}=B");
            SemgrexMatcher matcher = semgrex.Matcher(graph);

            // This will produce two results on the given tree: "likes" is an
            // ancestor of both "dog" and "my" via the nsubj relation
            while (matcher.Find())
            {
                log.Info(matcher.GetNode("A") + " <<nsubj " + matcher.GetNode("B"));
            }
        }
示例#3
0
        /// <summary>
        /// demoDP demonstrates turning a file into tokens and then parse
        /// trees.
        /// </summary>
        /// <remarks>
        /// demoDP demonstrates turning a file into tokens and then parse
        /// trees.  Note that the trees are printed by calling pennPrint on
        /// the Tree object.  It is also possible to pass a PrintWriter to
        /// pennPrint if you want to capture the output.
        /// This code will work with any supported language.
        /// </remarks>
        public static void DemoDP(LexicalizedParser lp, string filename)
        {
            // This option shows loading, sentence-segmenting and tokenizing
            // a file using DocumentPreprocessor.
            ITreebankLanguagePack tlp = lp.TreebankLanguagePack();
            // a PennTreebankLanguagePack for English
            IGrammaticalStructureFactory gsf = null;

            if (tlp.SupportsGrammaticalStructures())
            {
                gsf = tlp.GrammaticalStructureFactory();
            }
            // You could also create a tokenizer here (as below) and pass it
            // to DocumentPreprocessor
            foreach (IList <IHasWord> sentence in new DocumentPreprocessor(filename))
            {
                Tree parse = lp.Apply(sentence);
                parse.PennPrint();
                System.Console.Out.WriteLine();
                if (gsf != null)
                {
                    GrammaticalStructure gs  = gsf.NewGrammaticalStructure(parse);
                    ICollection          tdl = gs.TypedDependenciesCCprocessed();
                    System.Console.Out.WriteLine(tdl);
                    System.Console.Out.WriteLine();
                }
            }
        }
        // TODO: when this method throws an exception (for whatever reason)
        // a waiting client might hang.  There should be some graceful
        // handling of that.
        /// <exception cref="System.IO.IOException"/>
        public virtual void HandleDependencies(string arg, OutputStream outStream, string commandArgs)
        {
            Tree tree = Parse(arg, false);

            if (tree == null)
            {
                return;
            }
            // TODO: this might throw an exception if the parser doesn't support dependencies.  Handle that cleaner?
            GrammaticalStructure          gs   = parser.GetTLPParams().GetGrammaticalStructure(tree, parser.TreebankLanguagePack().PunctuationWordRejectFilter(), parser.GetTLPParams().TypedDependencyHeadFinder());
            ICollection <TypedDependency> deps = null;

            switch (commandArgs.ToUpper())
            {
            case "COLLAPSED_TREE":
            {
                deps = gs.TypedDependenciesCollapsedTree();
                break;
            }

            default:
            {
                throw new NotSupportedException("Dependencies type not implemented: " + commandArgs);
            }
            }
            OutputStreamWriter osw = new OutputStreamWriter(outStream, "utf-8");

            foreach (TypedDependency dep in deps)
            {
                osw.Write(dep.ToString());
                osw.Write("\n");
            }
            osw.Flush();
        }
示例#5
0
        public static void DemoAPI(LexicalizedParser lp)
        {
            // This option shows parsing a list of correctly tokenized words
            var sent = new[] { "This", "is", "an", "easy", "sentence", "." };

            java.util.List rawWords = Sentence.toCoreLabelList(sent);
            Tree           parse    = lp.apply(rawWords);

            parse.pennPrint();

            // This option shows loading and using an explicit tokenizer
            const string     Sent2            = "This is another sentence.";
            TokenizerFactory tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sent2Reader = new StringReader(Sent2);

            java.util.List rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize();
            parse = lp.apply(rawWords2);

            var tlp = new PennTreebankLanguagePack();
            GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
            GrammaticalStructure        gs  = gsf.newGrammaticalStructure(parse);

            java.util.List tdl = gs.typedDependenciesCCprocessed();
            Console.WriteLine("\n{0}\n", tdl);

            var tp = new TreePrint("penn,typedDependenciesCollapsed");

            tp.printTree(parse);
        }
示例#6
0
 /// <summary>
 /// Set pointer to the <code>GrammaticalStructure</code> of which this node
 /// is a part.  Operates recursively to set pointer for all descendants too
 /// </summary>
 protected void SetTreeGraph(GrammaticalStructure tg)
 {
     this.tg = tg;
     foreach (TreeGraphNode child in _children)
     {
         child.SetTreeGraph(tg);
     }
 }
        /// <summary>Read in typed dependencies in CoNLLX format.</summary>
        /// <param name="filename"/>
        /// <exception cref="System.IO.IOException"/>
        protected internal static IList <ICollection <TypedDependency> > ReadDepsCoNLLX(string filename)
        {
            IList <GrammaticalStructure>           gss      = GrammaticalStructure.ReadCoNLLXGrammaticalStructureCollection(filename, new FakeShortNameToGRel(), new GraphLessGrammaticalStructureFactory());
            IList <ICollection <TypedDependency> > readDeps = new List <ICollection <TypedDependency> >(gss.Count);

            foreach (GrammaticalStructure gs in gss)
            {
                ICollection <TypedDependency> deps = gs.TypedDependencies();
                readDeps.Add(deps);
            }
            return(readDeps);
        }
        private static SemanticGraph GetParse(ICoreMap sentence)
        {
            GrammaticalStructure gs = parser.Predict(sentence);

            GrammaticalStructure.Extras maximal = GrammaticalStructure.Extras.Maximal;
            //        SemanticGraph deps = SemanticGraphFactory.makeFromTree(gs, SemanticGraphFactory.Mode.ENHANCED, maximal, true, null),
            //                uncollapsedDeps = SemanticGraphFactory.makeFromTree(gs, SemanticGraphFactory.Mode.BASIC, maximal, true, null),
            //    SemanticGraph ccDeps = SemanticGraphFactory.makeFromTree(gs, SemanticGraphFactory.Mode.ENHANCED_PLUS_PLUS, maximal, true, null);
            SemanticGraph ccDeps = SemanticGraphFactory.GenerateEnhancedPlusPlusDependencies(gs);

            return(ccDeps);
        }
示例#9
0
        protected internal override void DoOneSentence(Annotation annotation, ICoreMap sentence)
        {
            GrammaticalStructure gs                   = parser.Predict(sentence);
            SemanticGraph        deps                 = SemanticGraphFactory.MakeFromTree(gs, SemanticGraphFactory.Mode.Collapsed, extraDependencies, null);
            SemanticGraph        uncollapsedDeps      = SemanticGraphFactory.MakeFromTree(gs, SemanticGraphFactory.Mode.Basic, extraDependencies, null);
            SemanticGraph        ccDeps               = SemanticGraphFactory.MakeFromTree(gs, SemanticGraphFactory.Mode.Ccprocessed, extraDependencies, null);
            SemanticGraph        enhancedDeps         = SemanticGraphFactory.MakeFromTree(gs, SemanticGraphFactory.Mode.Enhanced, extraDependencies, null);
            SemanticGraph        enhancedPlusPlusDeps = SemanticGraphFactory.MakeFromTree(gs, SemanticGraphFactory.Mode.EnhancedPlusPlus, extraDependencies, null);

            sentence.Set(typeof(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation), deps);
            sentence.Set(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation), uncollapsedDeps);
            sentence.Set(typeof(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation), ccDeps);
            sentence.Set(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation), enhancedDeps);
            sentence.Set(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation), enhancedPlusPlusDeps);
        }
示例#10
0
        public static void DemoDP(LexicalizedParser lp, string fileName)
        {
            // This option shows loading and sentence-segment and tokenizing
            // a file using DocumentPreprocessor
            var tlp = new PennTreebankLanguagePack();
            GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();

            // You could also create a tokenizer here (as below) and pass it
            // to DocumentPreprocessor
            foreach (List sentence in new DocumentPreprocessor(fileName))
            {
                Tree parse = lp.apply(sentence);
                parse.pennPrint();

                GrammaticalStructure gs  = gsf.newGrammaticalStructure(parse);
                java.util.List       tdl = gs.typedDependenciesCCprocessed(true);
                Console.WriteLine("\n{0}\n", tdl);
            }
        }
        // static main method only
        public static void Main(string[] args)
        {
            string modelPath  = DependencyParser.DefaultModel;
            string taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";

            for (int argIndex = 0; argIndex < args.Length;)
            {
                switch (args[argIndex])
                {
                case "-tagger":
                {
                    taggerPath = args[argIndex + 1];
                    argIndex  += 2;
                    break;
                }

                case "-model":
                {
                    modelPath = args[argIndex + 1];
                    argIndex += 2;
                    break;
                }

                default:
                {
                    throw new Exception("Unknown argument " + args[argIndex]);
                }
                }
            }
            string               text      = "I can almost always tell when movies use fake dinosaurs.";
            MaxentTagger         tagger    = new MaxentTagger(taggerPath);
            DependencyParser     parser    = DependencyParser.LoadFromModelFile(modelPath);
            DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));

            foreach (IList <IHasWord> sentence in tokenizer)
            {
                IList <TaggedWord>   tagged = tagger.TagSentence(sentence);
                GrammaticalStructure gs     = parser.Predict(tagged);
                // Print typed dependencies
                log.Info(gs);
            }
        }
        /// <summary>Converts basic UD tree to enhanced++ UD graph.</summary>
        private static SemanticGraph ConvertBasicToEnhancedPlusPlus(SemanticGraph sg)
        {
            GrammaticalStructure gs = SemanticGraphToGrammaticalStructure(sg);

            return(SemanticGraphFactory.GenerateEnhancedPlusPlusDependencies(gs));
        }
示例#13
0
        /// <summary>This example shows a few more ways of providing input to a parser.</summary>
        /// <remarks>
        /// This example shows a few more ways of providing input to a parser.
        /// Usage: ParserDemo2 [grammar [textFile]]
        /// </remarks>
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            string grammar = args.Length > 0 ? args[0] : "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";

            string[]                        options = new string[] { "-maxLength", "80", "-retainTmpSubcategories" };
            LexicalizedParser               lp      = ((LexicalizedParser)LexicalizedParser.LoadModel(grammar, options));
            ITreebankLanguagePack           tlp     = lp.GetOp().Langpack();
            IGrammaticalStructureFactory    gsf     = tlp.GrammaticalStructureFactory();
            IEnumerable <IList <IHasWord> > sentences;

            if (args.Length > 1)
            {
                DocumentPreprocessor      dp  = new DocumentPreprocessor(args[1]);
                IList <IList <IHasWord> > tmp = new List <IList <IHasWord> >();
                foreach (IList <IHasWord> sentence in dp)
                {
                    tmp.Add(sentence);
                }
                sentences = tmp;
            }
            else
            {
                // Showing tokenization and parsing in code a couple of different ways.
                string[]         sent     = new string[] { "This", "is", "an", "easy", "sentence", "." };
                IList <IHasWord> sentence = new List <IHasWord>();
                foreach (string word in sent)
                {
                    sentence.Add(new Word(word));
                }
                string sent2 = ("This is a slightly longer and more complex " + "sentence requiring tokenization.");
                // Use the default tokenizer for this TreebankLanguagePack
                ITokenizer <IHasWord> toke      = tlp.GetTokenizerFactory().GetTokenizer(new StringReader(sent2));
                IList <IHasWord>      sentence2 = toke.Tokenize();
                string[] sent3 = new string[] { "It", "can", "can", "it", "." };
                string[] tag3  = new string[] { "PRP", "MD", "VB", "PRP", "." };
                // Parser gets second "can" wrong without help
                IList <TaggedWord> sentence3 = new List <TaggedWord>();
                for (int i = 0; i < sent3.Length; i++)
                {
                    sentence3.Add(new TaggedWord(sent3[i], tag3[i]));
                }
                Tree parse = lp.Parse(sentence3);
                parse.PennPrint();
                IList <IList <IHasWord> > tmp = new List <IList <IHasWord> >();
                tmp.Add(sentence);
                tmp.Add(sentence2);
                tmp.Add(sentence3);
                sentences = tmp;
            }
            foreach (IList <IHasWord> sentence_1 in sentences)
            {
                Tree parse = lp.Parse(sentence_1);
                parse.PennPrint();
                System.Console.Out.WriteLine();
                GrammaticalStructure    gs  = gsf.NewGrammaticalStructure(parse);
                IList <TypedDependency> tdl = gs.TypedDependenciesCCprocessed();
                System.Console.Out.WriteLine(tdl);
                System.Console.Out.WriteLine();
                System.Console.Out.WriteLine("The words of the sentence:");
                foreach (ILabel lab in parse.Yield())
                {
                    if (lab is CoreLabel)
                    {
                        System.Console.Out.WriteLine(((CoreLabel)lab).ToString(CoreLabel.OutputFormat.ValueMap));
                    }
                    else
                    {
                        System.Console.Out.WriteLine(lab);
                    }
                }
                System.Console.Out.WriteLine();
                System.Console.Out.WriteLine(parse.TaggedYield());
                System.Console.Out.WriteLine();
            }
            // This method turns the String into a single sentence using the
            // default tokenizer for the TreebankLanguagePack.
            string sent3_1 = "This is one last test!";

            lp.Parse(sent3_1).PennPrint();
        }
 /// <summary>Produces an enhanced++ dependencies SemanticGraph.</summary>
 public static SemanticGraph GenerateEnhancedPlusPlusDependencies(GrammaticalStructure gs)
 {
     return(MakeFromTree(gs, SemanticGraphFactory.Mode.EnhancedPlusPlus, GrammaticalStructure.Extras.None, null));
 }
 public static SemanticGraph GenerateCCProcessedDependencies(GrammaticalStructure gs)
 {
     return(MakeFromTree(gs, SemanticGraphFactory.Mode.Ccprocessed, GrammaticalStructure.Extras.None, null));
 }
 /// <summary>Produces an Uncollapsed (basic) SemanticGraph.</summary>
 public static SemanticGraph GenerateUncollapsedDependencies(GrammaticalStructure gs)
 {
     return(MakeFromTree(gs, SemanticGraphFactory.Mode.Basic, GrammaticalStructure.Extras.None, null));
 }
 public static SemanticGraph MakeFromTree(GrammaticalStructure structure)
 {
     return(MakeFromTree(structure, SemanticGraphFactory.Mode.Basic, GrammaticalStructure.Extras.None, null));
 }
 public static SemanticGraph MakeFromTree(GrammaticalStructure tree, SemanticGraphFactory.Mode mode, bool includeExtras, IPredicate <TypedDependency> filter)
 {
     return(MakeFromTree(tree, mode, includeExtras ? GrammaticalStructure.Extras.Maximal : GrammaticalStructure.Extras.None, filter));
 }
        // TODO: these booleans would be more readable as enums similar to Mode.
        // Then the arguments would make more sense
        public static SemanticGraph MakeFromTree(GrammaticalStructure gs, SemanticGraphFactory.Mode mode, GrammaticalStructure.Extras includeExtras, IPredicate <TypedDependency> filter)
        {
            ICollection <TypedDependency> deps;

            switch (mode)
            {
            case SemanticGraphFactory.Mode.Enhanced:
            {
                deps = gs.TypedDependenciesEnhanced();
                break;
            }

            case SemanticGraphFactory.Mode.EnhancedPlusPlus:
            {
                deps = gs.TypedDependenciesEnhancedPlusPlus();
                break;
            }

            case SemanticGraphFactory.Mode.CollapsedTree:
            {
                deps = gs.TypedDependenciesCollapsedTree();
                break;
            }

            case SemanticGraphFactory.Mode.Collapsed:
            {
                deps = gs.TypedDependenciesCollapsed(includeExtras);
                break;
            }

            case SemanticGraphFactory.Mode.Ccprocessed:
            {
                deps = gs.TypedDependenciesCCprocessed(includeExtras);
                break;
            }

            case SemanticGraphFactory.Mode.Basic:
            {
                deps = gs.TypedDependencies(includeExtras);
                break;
            }

            default:
            {
                throw new ArgumentException("Unknown mode " + mode);
            }
            }
            if (filter != null)
            {
                IList <TypedDependency> depsFiltered = Generics.NewArrayList();
                foreach (TypedDependency td in deps)
                {
                    if (filter.Test(td))
                    {
                        depsFiltered.Add(td);
                    }
                }
                deps = depsFiltered;
            }
            // there used to be an if clause that filtered out the case of empty
            // dependencies. However, I could not understand (or replicate) the error
            // it alluded to, and it led to empty dependency graphs for very short fragments,
            // which meant they were ignored by the RTE system. Changed. (pado)
            // See also the SemanticGraph constructor.
            //log.info(deps.toString());
            return(new SemanticGraph(deps));
        }
 public static SemanticGraph GenerateCollapsedDependencies(GrammaticalStructure gs, GrammaticalStructure.Extras extras)
 {
     return(MakeFromTree(gs, SemanticGraphFactory.Mode.Collapsed, extras, null));
 }
示例#21
0
 /// <summary>
 /// Create a new <code>TreeGraphNode</code> having the same tree
 /// structure and label values as an existing tree (but no shared storage)
 /// </summary>
 /// <param name="t">the tree to copy</param>
 /// <param name="graph">the graph of which this node is a part</param>
 public TreeGraphNode(Tree t, GrammaticalStructure graph) :
     this(t, (TreeGraphNode) null)
 {
     this.SetTreeGraph(graph);
 }
示例#22
0
 /// <summary>
 /// Create a new <code>TreeGraphNode</code> having the same tree
 /// structure and label values as an existing tree (but no shared storage)
 /// </summary>
 /// <param name="t">the tree to copy</param>
 /// <param name="graph">the graph of which this node is a part</param>
 public TreeGraphNode(Tree t, GrammaticalStructure graph) :
     this(t, (TreeGraphNode)null)
 {
     this.SetTreeGraph(graph);
 }
示例#23
0
 /// <summary>
 /// Set pointer to the <code>GrammaticalStructure</code> of which this node
 /// is a part.  Operates recursively to set pointer for all descendants too
 /// </summary>
 protected void SetTreeGraph(GrammaticalStructure tg)
 {
     this.tg = tg;
     foreach (TreeGraphNode child in _children)
     {
         child.SetTreeGraph(tg);
     }
 }