Ejemplo n.º 1
0
        protected internal override void DoOneSentence(Annotation annotation, ICoreMap sentence)
        {
            GrammaticalStructure gs                   = parser.Predict(sentence);
            SemanticGraph        deps                 = SemanticGraphFactory.MakeFromTree(gs, SemanticGraphFactory.Mode.Collapsed, extraDependencies, null);
            SemanticGraph        uncollapsedDeps      = SemanticGraphFactory.MakeFromTree(gs, SemanticGraphFactory.Mode.Basic, extraDependencies, null);
            SemanticGraph        ccDeps               = SemanticGraphFactory.MakeFromTree(gs, SemanticGraphFactory.Mode.Ccprocessed, extraDependencies, null);
            SemanticGraph        enhancedDeps         = SemanticGraphFactory.MakeFromTree(gs, SemanticGraphFactory.Mode.Enhanced, extraDependencies, null);
            SemanticGraph        enhancedPlusPlusDeps = SemanticGraphFactory.MakeFromTree(gs, SemanticGraphFactory.Mode.EnhancedPlusPlus, extraDependencies, null);

            sentence.Set(typeof(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation), deps);
            sentence.Set(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation), uncollapsedDeps);
            sentence.Set(typeof(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation), ccDeps);
            sentence.Set(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation), enhancedDeps);
            sentence.Set(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation), enhancedPlusPlusDeps);
        }
Ejemplo n.º 2
0
        /// <summary>Executes the given sequence of edits against the SemanticGraph.</summary>
        /// <remarks>
        /// Executes the given sequence of edits against the SemanticGraph.
        /// NOTE: because the graph could be destructively modified, the matcher may be invalid, and
        /// thus the pattern will only be executed against the first match.  Repeat this routine on the returned
        /// SemanticGraph to reapply on other matches.
        /// TODO: create variant that returns set of expansions while matcher.find() returns true
        /// </remarks>
        /// <param name="sg">SemanticGraph to operate over (NOT destroyed/modified).</param>
        /// <returns>True if a match was found and executed, otherwise false.</returns>
        /// <exception cref="System.Exception"/>
        public virtual ICollection <SemanticGraph> Execute(SemanticGraph sg)
        {
            ICollection <SemanticGraph> generated = new List <SemanticGraph>();
            SemgrexMatcher matcher = semgrexPattern.Matcher(sg);

            while (matcher.Find())
            {
                // NOTE: Semgrex can match two named nodes to the same node.  In this case, we simply,
                // check the named nodes, and if there are any collisions, we throw out this match.
                ICollection <string>      nodeNames = matcher.GetNodeNames();
                ICollection <IndexedWord> seen      = Generics.NewHashSet();
                foreach (string name in nodeNames)
                {
                    IndexedWord curr = matcher.GetNode(name);
                    if (seen.Contains(curr))
                    {
                        goto nextMatch_break;
                    }
                    seen.Add(curr);
                }
                //        System.out.println("REDUNDANT NODES FOUDN IN SEMGREX MATCH");
                // if we do have to test, assemble the tests and arguments based off of the current
                // match and test.  If false, continue, else execute as normal.
                if (predicateTest != null)
                {
                    if (!predicateTest.Test(matcher))
                    {
                        continue;
                    }
                }
                //      SemanticGraph tgt = new SemanticGraph(sg);
                // Generate a new graph, since we don't want to mutilate the original graph.
                // We use the same nodes, since the matcher operates off of those.
                SemanticGraph tgt = SemanticGraphFactory.DuplicateKeepNodes(sg);
                nodeMap = Generics.NewHashMap();
                foreach (SsurgeonEdit edit in editScript)
                {
                    edit.Evaluate(tgt, matcher);
                }
                generated.Add(tgt);
                nextMatch_continue :;
            }
            nextMatch_break :;
            return(generated);
        }
Ejemplo n.º 3
0
        public virtual void TestLemma()
        {
            SemanticGraph graph = SemanticGraph.ValueOf("[ate subj>Bill dobj>[muffins compound>blueberry]]");

            foreach (IndexedWord word in graph.VertexSet())
            {
                word.SetLemma(word.Word());
            }
            RunTest("{lemma:ate}", graph, "ate");
            Tree tree = Tree.ValueOf("(ROOT (S (NP (PRP I)) (VP (VBP love) (NP (DT the) (NN display))) (. .)))");

            graph = SemanticGraphFactory.GenerateCCProcessedDependencies(tree);
            foreach (IndexedWord word_1 in graph.VertexSet())
            {
                word_1.SetLemma(word_1.Word());
            }
            // This set of three tests also provides some coverage for a
            // bizarre error a user found where multiple copies of the same
            // IndexedWord were created
            RunTest("{}=Obj <dobj {lemma:love}=Pred", graph, "display/NN");
            RunTest("{}=Obj <dobj {}=Pred", graph, "display/NN");
            RunTest("{lemma:love}=Pred >dobj {}=Obj ", graph, "love/VBP");
        }
        /// <exception cref="System.Exception"/>
        public override Document NextDoc()
        {
            IList <IList <CoreLabel> > allWords = new List <IList <CoreLabel> >();
            IList <Tree> allTrees = new List <Tree>();

            CoNLL2011DocumentReader.Document conllDoc = reader.GetNextDocument();
            if (conllDoc == null)
            {
                return(null);
            }
            Annotation       anno      = conllDoc.GetAnnotation();
            IList <ICoreMap> sentences = anno.Get(typeof(CoreAnnotations.SentencesAnnotation));

            foreach (ICoreMap sentence in sentences)
            {
                if (!Constants.UseGoldParses && !replicateCoNLL)
                {
                    // Remove tree from annotation and replace with parse using stanford parser
                    sentence.Remove(typeof(TreeCoreAnnotations.TreeAnnotation));
                }
                else
                {
                    Tree tree = sentence.Get(typeof(TreeCoreAnnotations.TreeAnnotation));
                    treeLemmatizer.TransformTree(tree);
                    // generate the dependency graph
                    try
                    {
                        SemanticGraph deps      = SemanticGraphFactory.MakeFromTree(tree, SemanticGraphFactory.Mode.Enhanced, GrammaticalStructure.Extras.None);
                        SemanticGraph basicDeps = SemanticGraphFactory.MakeFromTree(tree, SemanticGraphFactory.Mode.Basic, GrammaticalStructure.Extras.None);
                        sentence.Set(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation), basicDeps);
                        sentence.Set(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation), deps);
                    }
                    catch (Exception e)
                    {
                        logger.Log(Level.Warning, "Exception caught during extraction of Stanford dependencies. Will ignore and continue...", e);
                    }
                }
            }
            string preSpeaker = null;
            int    utterance  = -1;

            foreach (CoreLabel token in anno.Get(typeof(CoreAnnotations.TokensAnnotation)))
            {
                if (!token.ContainsKey(typeof(CoreAnnotations.SpeakerAnnotation)))
                {
                    token.Set(typeof(CoreAnnotations.SpeakerAnnotation), string.Empty);
                }
                string curSpeaker = token.Get(typeof(CoreAnnotations.SpeakerAnnotation));
                if (!curSpeaker.Equals(preSpeaker))
                {
                    utterance++;
                    preSpeaker = curSpeaker;
                }
                token.Set(typeof(CoreAnnotations.UtteranceAnnotation), utterance);
            }
            // Run pipeline
            stanfordProcessor.Annotate(anno);
            foreach (ICoreMap sentence_1 in anno.Get(typeof(CoreAnnotations.SentencesAnnotation)))
            {
                allWords.Add(sentence_1.Get(typeof(CoreAnnotations.TokensAnnotation)));
                allTrees.Add(sentence_1.Get(typeof(TreeCoreAnnotations.TreeAnnotation)));
            }
            // Initialize gold mentions
            IList <IList <Mention> > allGoldMentions = ExtractGoldMentions(conllDoc);
            IList <IList <Mention> > allPredictedMentions;

            //allPredictedMentions = allGoldMentions;
            // Make copy of gold mentions since mentions may be later merged, mentionID's changed and stuff
            allPredictedMentions = mentionFinder.ExtractPredictedMentions(anno, maxID, dictionaries);
            try
            {
                RecallErrors(allGoldMentions, allPredictedMentions, anno);
            }
            catch (IOException e)
            {
                throw new Exception(e);
            }
            Document doc = Arrange(anno, allWords, allTrees, allPredictedMentions, allGoldMentions, true);

            doc.conllDoc = conllDoc;
            return(doc);
        }
Ejemplo n.º 5
0
 public virtual void Annotate(Annotation annotation)
 {
     // temporarily set the primary named entity tag to the coarse tag
     SetNamedEntityTagGranularity(annotation, "coarse");
     if (performMentionDetection)
     {
         mentionAnnotator.Annotate(annotation);
     }
     try
     {
         IList <Tree> trees = new List <Tree>();
         IList <IList <CoreLabel> > sentences = new List <IList <CoreLabel> >();
         // extract trees and sentence words
         // we are only supporting the new annotation standard for this Annotator!
         bool hasSpeakerAnnotations = false;
         if (annotation.ContainsKey(typeof(CoreAnnotations.SentencesAnnotation)))
         {
             // int sentNum = 0;
             foreach (ICoreMap sentence in annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)))
             {
                 IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));
                 sentences.Add(tokens);
                 Tree tree = sentence.Get(typeof(TreeCoreAnnotations.TreeAnnotation));
                 trees.Add(tree);
                 SemanticGraph dependencies = SemanticGraphFactory.MakeFromTree(tree, SemanticGraphFactory.Mode.Collapsed, GrammaticalStructure.Extras.None, null, true);
                 // locking here is crucial for correct threading!
                 sentence.Set(typeof(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation), dependencies);
                 if (!hasSpeakerAnnotations)
                 {
                     // check for speaker annotations
                     foreach (CoreLabel t in tokens)
                     {
                         if (t.Get(typeof(CoreAnnotations.SpeakerAnnotation)) != null)
                         {
                             hasSpeakerAnnotations = true;
                             break;
                         }
                     }
                 }
                 MentionExtractor.MergeLabels(tree, tokens);
                 MentionExtractor.InitializeUtterance(tokens);
             }
         }
         else
         {
             log.Error("this coreference resolution system requires SentencesAnnotation!");
             return;
         }
         if (hasSpeakerAnnotations)
         {
             annotation.Set(typeof(CoreAnnotations.UseMarkedDiscourseAnnotation), true);
         }
         // extract all possible mentions
         // this is created for each new annotation because it is not threadsafe
         RuleBasedCorefMentionFinder finder = new RuleBasedCorefMentionFinder(allowReparsing);
         IList <IList <Mention> >    allUnprocessedMentions = finder.ExtractPredictedMentions(annotation, 0, corefSystem.Dictionaries());
         // add the relevant info to mentions and order them for coref
         Document document = mentionExtractor.Arrange(annotation, sentences, trees, allUnprocessedMentions);
         IList <IList <Mention> >      orderedMentions = document.GetOrderedMentions();
         IDictionary <int, CorefChain> result          = corefSystem.CorefReturnHybridOutput(document);
         annotation.Set(typeof(CorefCoreAnnotations.CorefChainAnnotation), result);
         if (OldFormat)
         {
             IDictionary <int, CorefChain> oldResult = corefSystem.Coref(document);
             AddObsoleteCoreferenceAnnotations(annotation, orderedMentions, oldResult);
         }
     }
     catch (Exception e)
     {
         throw;
     }
     catch (Exception e)
     {
         throw new Exception(e);
     }
     finally
     {
         // restore to the fine-grained
         SetNamedEntityTagGranularity(annotation, "fine");
     }
 }
        /// <summary>Converts basic UD tree to enhanced++ UD graph.</summary>
        private static SemanticGraph ConvertBasicToEnhancedPlusPlus(SemanticGraph sg)
        {
            GrammaticalStructure gs = SemanticGraphToGrammaticalStructure(sg);

            return(SemanticGraphFactory.GenerateEnhancedPlusPlusDependencies(gs));
        }
Ejemplo n.º 7
0
        /// <summary>Prints out all matches of a semgrex pattern on a file of dependencies.</summary>
        /// <remarks>
        /// Prints out all matches of a semgrex pattern on a file of dependencies.
        /// <p>
        /// Usage:<br />
        /// java edu.stanford.nlp.semgraph.semgrex.SemgrexPattern [args]
        /// <br />
        /// See the help() function for a list of possible arguments to provide.
        /// </remarks>
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            IDictionary <string, int> flagMap = Generics.NewHashMap();

            flagMap[Pattern]            = 1;
            flagMap[TreeFile]           = 1;
            flagMap[Mode]               = 1;
            flagMap[Extras]             = 1;
            flagMap[ConlluFile]         = 1;
            flagMap[OutputFormatOption] = 1;
            IDictionary <string, string[]> argsMap = StringUtils.ArgsToMap(args, flagMap);

            // args = argsMap.get(null);
            // TODO: allow patterns to be extracted from a file
            if (!(argsMap.Contains(Pattern)) || argsMap[Pattern].Length == 0)
            {
                Help();
                System.Environment.Exit(2);
            }
            Edu.Stanford.Nlp.Semgraph.Semgrex.SemgrexPattern semgrex = Edu.Stanford.Nlp.Semgraph.Semgrex.SemgrexPattern.Compile(argsMap[Pattern][0]);
            string modeString = DefaultMode;

            if (argsMap.Contains(Mode) && argsMap[Mode].Length > 0)
            {
                modeString = argsMap[Mode][0].ToUpper();
            }
            SemanticGraphFactory.Mode mode = SemanticGraphFactory.Mode.ValueOf(modeString);
            string outputFormatString      = DefaultOutputFormat;

            if (argsMap.Contains(OutputFormatOption) && argsMap[OutputFormatOption].Length > 0)
            {
                outputFormatString = argsMap[OutputFormatOption][0].ToUpper();
            }
            SemgrexPattern.OutputFormat outputFormat = SemgrexPattern.OutputFormat.ValueOf(outputFormatString);
            bool useExtras = true;

            if (argsMap.Contains(Extras) && argsMap[Extras].Length > 0)
            {
                useExtras = bool.ValueOf(argsMap[Extras][0]);
            }
            IList <SemanticGraph> graphs = Generics.NewArrayList();

            // TODO: allow other sources of graphs, such as dependency files
            if (argsMap.Contains(TreeFile) && argsMap[TreeFile].Length > 0)
            {
                foreach (string treeFile in argsMap[TreeFile])
                {
                    log.Info("Loading file " + treeFile);
                    MemoryTreebank treebank = new MemoryTreebank(new TreeNormalizer());
                    treebank.LoadPath(treeFile);
                    foreach (Tree tree in treebank)
                    {
                        // TODO: allow other languages... this defaults to English
                        SemanticGraph graph = SemanticGraphFactory.MakeFromTree(tree, mode, useExtras ? GrammaticalStructure.Extras.Maximal : GrammaticalStructure.Extras.None);
                        graphs.Add(graph);
                    }
                }
            }
            if (argsMap.Contains(ConlluFile) && argsMap[ConlluFile].Length > 0)
            {
                CoNLLUDocumentReader reader = new CoNLLUDocumentReader();
                foreach (string conlluFile in argsMap[ConlluFile])
                {
                    log.Info("Loading file " + conlluFile);
                    IEnumerator <SemanticGraph> it = reader.GetIterator(IOUtils.ReaderFromString(conlluFile));
                    while (it.MoveNext())
                    {
                        SemanticGraph graph = it.Current;
                        graphs.Add(graph);
                    }
                }
            }
            foreach (SemanticGraph graph_1 in graphs)
            {
                SemgrexMatcher matcher = semgrex.Matcher(graph_1);
                if (!matcher.Find())
                {
                    continue;
                }
                if (outputFormat == SemgrexPattern.OutputFormat.List)
                {
                    log.Info("Matched graph:" + Runtime.LineSeparator() + graph_1.ToString(SemanticGraph.OutputFormat.List));
                    int  i     = 1;
                    bool found = true;
                    while (found)
                    {
                        log.Info("Match " + i + " at: " + matcher.GetMatch().ToString(CoreLabel.OutputFormat.ValueIndex));
                        IList <string> nodeNames = Generics.NewArrayList();
                        Sharpen.Collections.AddAll(nodeNames, matcher.GetNodeNames());
                        nodeNames.Sort();
                        foreach (string name in nodeNames)
                        {
                            log.Info("  " + name + ": " + matcher.GetNode(name).ToString(CoreLabel.OutputFormat.ValueIndex));
                        }
                        log.Info(" ");
                        found = matcher.Find();
                    }
                }
                else
                {
                    if (outputFormat == SemgrexPattern.OutputFormat.Offset)
                    {
                        if (graph_1.VertexListSorted().IsEmpty())
                        {
                            continue;
                        }
                        System.Console.Out.Printf("+%d %s%n", graph_1.VertexListSorted()[0].Get(typeof(CoreAnnotations.LineNumberAnnotation)), argsMap[ConlluFile][0]);
                    }
                }
            }
        }
        public virtual void TestFind()
        {
            SemanticGraph h = SemanticGraph.ValueOf("[married/VBN nsubjpass>Hughes/NNP auxpass>was/VBD prep_to>Gracia/NNP]");
            SemanticGraph t = SemanticGraph.ValueOf("[loved/VBD\nnsubj>Hughes/NNP\ndobj>[wife/NN poss>his/PRP$ appos>Gracia/NNP]\nconj_and>[obsessed/JJ\ncop>was/VBD\nadvmod>absolutely/RB\nprep_with>[Elicia/NN poss>his/PRP$ amod>little/JJ compound>daughter/NN]]]"
                                                    );
            string         s    = "(ROOT\n(S\n(NP (DT The) (NN chimney) (NNS sweeps))\n(VP (VBP do) (RB not)\n(VP (VB like)\n(S\n(VP (VBG working)\n(PP (IN on)\n(NP (DT an) (JJ empty) (NN stomach)))))))\n(. .)))";
            Tree           tree = Tree.ValueOf(s);
            SemanticGraph  sg   = SemanticGraphFactory.MakeFromTree(tree, SemanticGraphFactory.Mode.Collapsed, GrammaticalStructure.Extras.Maximal, null);
            SemgrexPattern pat  = SemgrexPattern.Compile("{}=gov ![>det {}] & > {word:/^(?!not).*$/}=dep");

            sg.PrettyPrint();
            // SemgrexPattern pat =
            // SemgrexPattern.compile("{} [[<prep_to ({word:married} >nsubjpass {})] | [<nsubjpass ({word:married} >prep_to {})]]");
            pat.PrettyPrint();
            SemgrexMatcher mat = pat.Matcher(sg);

            while (mat.Find())
            {
                // String match = mat.getMatch().word();
                string gov = mat.GetNode("gov").Word();
                // String reln = mat.getRelnString("reln");
                string dep = mat.GetNode("dep").Word();
                // System.out.println(match);
                System.Console.Out.WriteLine(dep + ' ' + gov);
            }
            SemgrexPattern pat2 = SemgrexPattern.Compile("{} [[>/nn|appos/ ({lemma:/wife|husband|partner/} >/poss/ {}=txtPartner)] | [<poss ({}=txtPartner >/nn|appos/ {lemma:/wife|husband|partner/})]" + "| [<nsubj ({$} >> ({word:/wife|husband|partner/} >poss {word:/his|her/} >/nn|appos/ {}))]]"
                                                         );
            SemgrexMatcher mat2 = pat2.Matcher(t);

            while (mat2.Find())
            {
                string match = mat2.GetMatch().Word();
                // String gov = mat.getNode("gov").word();
                // String reln = mat.getRelnString("reln");
                // String dep = mat.getNode("dep").word();
                System.Console.Out.WriteLine(match);
            }
            // System.out.println(dep + " " + gov);
            Dictionary <IndexedWord, IndexedWord> map = new Dictionary <IndexedWord, IndexedWord>();

            map[h.GetNodeByWordPattern("Hughes")] = t.GetNodeByWordPattern("Hughes");
            map[h.GetNodeByWordPattern("Gracia")] = t.GetNodeByWordPattern("Gracia");
            Alignment      alignment = new Alignment(map, 0, string.Empty);
            SemgrexPattern fullPat   = SemgrexPattern.Compile("({}=partnerOne [[<prep_to ({word:married} >nsubjpass {}=partnerTwo)] | [<nsubjpass ({word:married} >prep_to {}=partnerTwo)]]) @ ({} [[>/nn|appos/ ({lemma:/wife|husband|partner/} >/poss/ {}=txtPartner)] | [<poss ({}=txtPartner >/nn|appos/ {lemma:/wife|husband|partner/})]"
                                                              + "| [<nsubj ({$} >> ({word:/wife|husband|partner/} >poss {word:/his|her/} >/nn|appos/ {}=txtPartner))]])");

            fullPat.PrettyPrint();
            SemgrexMatcher fullMat = fullPat.Matcher(h, alignment, t);

            if (fullMat.Find())
            {
                System.Console.Out.WriteLine("woo: " + fullMat.GetMatch().Word());
                System.Console.Out.WriteLine(fullMat.GetNode("txtPartner"));
                System.Console.Out.WriteLine(fullMat.GetNode("partnerOne"));
                System.Console.Out.WriteLine(fullMat.GetNode("partnerTwo"));
            }
            else
            {
                System.Console.Out.WriteLine("boo");
            }
            SemgrexPattern pat3 = SemgrexPattern.Compile("({word:LIKE}=parent >>/aux.*/ {word:/do/}=node)");

            System.Console.Out.WriteLine("pattern is ");
            pat3.PrettyPrint();
            System.Console.Out.WriteLine("tree is ");
            sg.PrettyPrint();
            //checking if ignoring case or not
            SemgrexMatcher mat3 = pat3.Matcher(sg, true);

            if (mat3.Find())
            {
                string parent = mat3.GetNode("parent").Word();
                string node   = mat3.GetNode("node").Word();
                System.Console.Out.WriteLine("Result: parent is " + parent + " and node is " + node);
                NUnit.Framework.Assert.AreEqual(parent, "like");
                NUnit.Framework.Assert.AreEqual(node, "do");
            }
            else
            {
                NUnit.Framework.Assert.Fail();
            }
        }