// just static main
        public static void Main(string[] args)
        {
            string treeString = "(ROOT  (S (NP (PRP$ My) (NN dog)) (ADVP (RB also)) (VP (VBZ likes) (S (VP (VBG eating) (NP (NN sausage))))) (. .)))";
            // Typically the tree is constructed by parsing or reading a
            // treebank.  This is just for example purposes
            Tree tree = Tree.ValueOf(treeString);
            // This creates English uncollapsed dependencies as a
            // SemanticGraph.  If you are creating many SemanticGraphs, you
            // should use a GrammaticalStructureFactory and use it to generate
            // the intermediate GrammaticalStructure instead
            SemanticGraph graph = SemanticGraphFactory.GenerateUncollapsedDependencies(tree);
            // Alternatively, this could have been the Chinese params or any
            // other language supported.  As of 2014, only English and Chinese
            ITreebankLangParserParams    @params = new EnglishTreebankParserParams();
            IGrammaticalStructureFactory gsf     = @params.TreebankLanguagePack().GrammaticalStructureFactory(@params.TreebankLanguagePack().PunctuationWordRejectFilter(), @params.TypedDependencyHeadFinder());
            GrammaticalStructure         gs      = gsf.NewGrammaticalStructure(tree);

            log.Info(graph);
            SemgrexPattern semgrex = SemgrexPattern.Compile("{}=A <<nsubj {}=B");
            SemgrexMatcher matcher = semgrex.Matcher(graph);

            // This will produce two results on the given tree: "likes" is an
            // ancestor of both "dog" and "my" via the nsubj relation
            while (matcher.Find())
            {
                log.Info(matcher.GetNode("A") + " <<nsubj " + matcher.GetNode("B"));
            }
        }
Beispiel #2
0
        public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm)
        {
            IndexedWord newNode = AddDep.FromCheapString(nodeString);

            sg.AddVertex(newNode);
            AddNamedNode(newNode, nodeName);
        }
Beispiel #3
0
        /// <summary>
        /// Executes the Ssurgeon edit, but with the given Semgrex Pattern, instead of the one attached to this
        /// pattern.
        /// </summary>
        /// <remarks>
        /// Executes the Ssurgeon edit, but with the given Semgrex Pattern, instead of the one attached to this
        /// pattern.
        /// NOTE: Predicate tests are still active here, and any named nodes required for evaluation must be
        /// present.
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual ICollection <SemanticGraph> Execute(SemanticGraph sg, SemgrexPattern overridePattern)
        {
            SemgrexMatcher matcher = overridePattern.Matcher(sg);
            ICollection <SemanticGraph> generated = new List <SemanticGraph>();

            while (matcher.Find())
            {
                if (predicateTest != null)
                {
                    if (!predicateTest.Test(matcher))
                    {
                        continue;
                    }
                }
                // We reset the named node map with each edit set, since these edits
                // should exist in a separate graph for each unique Semgrex match.
                nodeMap = Generics.NewHashMap();
                SemanticGraph tgt = new SemanticGraph(sg);
                foreach (SsurgeonEdit edit in editScript)
                {
                    edit.Evaluate(tgt, matcher);
                }
                generated.Add(tgt);
            }
            return(generated);
        }
        public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm)
        {
            IndexedWord tgtNode = GetNamedNode(nodeName, sm);

            foreach (SemanticGraphEdge edge in sg.IncomingEdgeIterable(tgtNode))
            {
                sg.RemoveEdge(edge);
            }
        }
        public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm)
        {
            IList <IndexedWord> newRoots = new List <IndexedWord>();

            foreach (string name in newRootNames)
            {
                newRoots.Add(GetNamedNode(name, sm));
            }
            sg.SetRoots(newRoots);
        }
        /// <summary>Used to retrieve the named node.</summary>
        /// <remarks>
        /// Used to retrieve the named node.  If not found in the SemgrexMatcher, check the
        /// owning pattern object, as this could've been a created node.
        /// </remarks>
        public virtual IndexedWord GetNamedNode(string nodeName, SemgrexMatcher sm)
        {
            IndexedWord ret = sm.GetNode(nodeName);

            if ((ret == null) && GetOwningPattern() != null)
            {
                return(GetOwningPattern().GetNamedNode(nodeName));
            }
            return(ret);
        }
 /// <exception cref="System.Exception"/>
 public virtual bool Test(SemgrexMatcher matcher)
 {
     foreach (ISsurgPred term in this)
     {
         if (term.Test(matcher))
         {
             return(true);
         }
     }
     return(false);
 }
Beispiel #8
0
        public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm)
        {
            string            relation = sm.GetRelnString(edgeName);
            IndexedWord       govNode  = GetNamedNode(govName, sm);
            IndexedWord       depNode  = GetNamedNode(depName, sm);
            SemanticGraphEdge edge     = sg.GetEdge(govNode, depNode, GrammaticalRelation.ValueOf(relation));

            if (edge != null)
            {
                sg.RemoveEdge(edge);
            }
        }
Beispiel #9
0
        /// <summary>TODO: figure out how to specify where in the sentence this node goes.</summary>
        /// <remarks>
        /// TODO: figure out how to specify where in the sentence this node goes.
        /// TODO: determine if we should be copying an IndexedWord, or working just with a FeatureLabel.
        /// TODO: bombproof if this gov, dep, and reln already exist.
        /// </remarks>
        public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm)
        {
            IndexedWord govNode  = sm.GetNode(govNodeName);
            IndexedWord newNode  = new IndexedWord(newNodePrototype);
            int         newIndex = SemanticGraphUtils.LeftMostChildVertice(govNode, sg).Index();

            // cheap En-specific hack for placing copula (beginning of governing phrase)
            newNode.SetDocID(govNode.DocID());
            newNode.SetIndex(newIndex);
            newNode.SetSentIndex(govNode.SentIndex());
            sg.AddVertex(newNode);
            sg.AddEdge(govNode, newNode, relation, weight, false);
        }
Beispiel #10
0
        public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm)
        {
            IList <IndexedWord> nodes = new List <IndexedWord>(sg.VertexSet());

            foreach (IndexedWord node in nodes)
            {
                IList <IndexedWord> rootPath = sg.GetPathToRoot(node);
                if (rootPath == null)
                {
                    sg.RemoveVertex(node);
                }
            }
        }
Beispiel #11
0
        /// <summary>Returns whether any of the given patterns match this tree.</summary>
        private bool Matches(ICoreMap sentence, ICollection <SemgrexPattern> rulesForRel, KBPRelationExtractor.KBPInput input, SemanticGraph graph)
        {
            if (graph == null || graph.IsEmpty())
            {
                return(false);
            }
            IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));

            foreach (int i in input.subjectSpan)
            {
                if ("O".Equals(tokens[i].Ner()))
                {
                    tokens[i].SetNER(input.subjectType.name);
                }
            }
            foreach (int i_1 in input.objectSpan)
            {
                if ("O".Equals(tokens[i_1].Ner()))
                {
                    tokens[i_1].SetNER(input.objectType.name);
                }
            }
            foreach (SemgrexPattern p in rulesForRel)
            {
                try
                {
                    SemgrexMatcher n = p.Matcher(graph);
                    while (n.Find())
                    {
                        IndexedWord entity     = n.GetNode("entity");
                        IndexedWord slot       = n.GetNode("slot");
                        bool        hasSubject = entity.Index() >= input.subjectSpan.Start() + 1 && entity.Index() <= input.subjectSpan.End();
                        bool        hasObject  = slot.Index() >= input.objectSpan.Start() + 1 && slot.Index() <= input.objectSpan.End();
                        if (hasSubject && hasObject)
                        {
                            return(true);
                        }
                    }
                }
                catch (Exception)
                {
                    //Happens when graph has no roots
                    return(false);
                }
            }
            return(false);
        }
        public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm)
        {
            IndexedWord seedNode = GetNamedNode(destroyNodeName, sm);

            // TODO: do not execute if seedNode if not in graph (or just error?)
            if (sg.ContainsVertex(seedNode))
            {
                ICollection <IndexedWord> nodesToDestroy = Crawl(seedNode, sg);
                foreach (IndexedWord node in nodesToDestroy)
                {
                    sg.RemoveVertex(node);
                }
                // After destroy nodes, need to reset the roots, since it's possible a root node
                // was destroyed.
                sg.ResetRoots();
            }
        }
Beispiel #13
0
        /// <summary>Executes the given sequence of edits against the SemanticGraph.</summary>
        /// <remarks>
        /// Executes the given sequence of edits against the SemanticGraph.
        /// NOTE: because the graph could be destructively modified, the matcher may be invalid, and
        /// thus the pattern will only be executed against the first match.  Repeat this routine on the returned
        /// SemanticGraph to reapply on other matches.
        /// TODO: create variant that returns set of expansions while matcher.find() returns true
        /// </remarks>
        /// <param name="sg">SemanticGraph to operate over (NOT destroyed/modified).</param>
        /// <returns>True if a match was found and executed, otherwise false.</returns>
        /// <exception cref="System.Exception"/>
        public virtual ICollection <SemanticGraph> Execute(SemanticGraph sg)
        {
            ICollection <SemanticGraph> generated = new List <SemanticGraph>();
            SemgrexMatcher matcher = semgrexPattern.Matcher(sg);

            while (matcher.Find())
            {
                // NOTE: Semgrex can match two named nodes to the same node.  In this case, we simply,
                // check the named nodes, and if there are any collisions, we throw out this match.
                ICollection <string>      nodeNames = matcher.GetNodeNames();
                ICollection <IndexedWord> seen      = Generics.NewHashSet();
                foreach (string name in nodeNames)
                {
                    IndexedWord curr = matcher.GetNode(name);
                    if (seen.Contains(curr))
                    {
                        goto nextMatch_break;
                    }
                    seen.Add(curr);
                }
                //        System.out.println("REDUNDANT NODES FOUDN IN SEMGREX MATCH");
                // if we do have to test, assemble the tests and arguments based off of the current
                // match and test.  If false, continue, else execute as normal.
                if (predicateTest != null)
                {
                    if (!predicateTest.Test(matcher))
                    {
                        continue;
                    }
                }
                //      SemanticGraph tgt = new SemanticGraph(sg);
                // Generate a new graph, since we don't want to mutilate the original graph.
                // We use the same nodes, since the matcher operates off of those.
                SemanticGraph tgt = SemanticGraphFactory.DuplicateKeepNodes(sg);
                nodeMap = Generics.NewHashMap();
                foreach (SsurgeonEdit edit in editScript)
                {
                    edit.Evaluate(tgt, matcher);
                }
                generated.Add(tgt);
                nextMatch_continue :;
            }
            nextMatch_break :;
            return(generated);
        }
Beispiel #14
0
        public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm)
        {
            IndexedWord rootNode = this.GetNamedNode(rootName, sm);
            ICollection <IndexedWord> subgraphNodeSet = sg.GetSubgraphVertices(rootNode);

            if (!sg.IsDag(rootNode))
            {
                /* Check if there is a cycle going back to the root. */
                foreach (IndexedWord child in sg.GetChildren(rootNode))
                {
                    ICollection <IndexedWord> reachableSet = sg.GetSubgraphVertices(child);
                    if (reachableSet.Contains(rootNode))
                    {
                        throw new ArgumentException("Subtree cannot contain cycle leading back to root node!");
                    }
                }
            }
            IList <IndexedWord> sortedSubgraphNodes = Generics.NewArrayList(subgraphNodeSet);

            sortedSubgraphNodes.Sort();
            IndexedWord newNode = new IndexedWord(rootNode.DocID(), rootNode.SentIndex(), rootNode.Index());

            /* Copy all attributes from rootNode. */
            foreach (Type key in newNode.BackingLabel().KeySet())
            {
                newNode.Set(key, rootNode.Get(key));
            }
            newNode.SetValue(StringUtils.Join(sortedSubgraphNodes.Stream().Map(null), " "));
            newNode.SetWord(StringUtils.Join(sortedSubgraphNodes.Stream().Map(null), " "));
            newNode.SetLemma(StringUtils.Join(sortedSubgraphNodes.Stream().Map(null), " "));
            if (sg.GetRoots().Contains(rootNode))
            {
                sg.GetRoots().Remove(rootNode);
                sg.AddRoot(rootNode);
            }
            foreach (SemanticGraphEdge edge in sg.IncomingEdgeIterable(rootNode))
            {
                sg.AddEdge(edge.GetGovernor(), newNode, edge.GetRelation(), edge.GetWeight(), edge.IsExtra());
            }
            foreach (IndexedWord node in sortedSubgraphNodes)
            {
                sg.RemoveVertex(node);
            }
        }
Beispiel #15
0
        public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm)
        {
            bool        govWild = govName.Equals(WildcardNode);
            bool        depWild = depName.Equals(WildcardNode);
            IndexedWord govNode = GetNamedNode(govName, sm);
            IndexedWord depNode = GetNamedNode(depName, sm);

            if (govNode != null && depNode != null)
            {
                SemanticGraphEdge edge = sg.GetEdge(govNode, depNode, relation);
                if (edge != null)
                {
                    bool successFlag = sg.RemoveEdge(edge);
                }
            }
            else
            {
                if (depNode != null && govWild)
                {
                    // dep known, wildcard gov
                    foreach (SemanticGraphEdge edge in sg.IncomingEdgeIterable(depNode))
                    {
                        if (edge.GetRelation().Equals(relation) && sg.ContainsEdge(edge))
                        {
                            sg.RemoveEdge(edge);
                        }
                    }
                }
                else
                {
                    if (govNode != null && depWild)
                    {
                        // gov known, wildcard dep
                        foreach (SemanticGraphEdge edge in sg.OutgoingEdgeIterable(govNode))
                        {
                            if (edge.GetRelation().Equals(relation) && sg.ContainsEdge(edge))
                            {
                                sg.RemoveEdge(edge);
                            }
                        }
                    }
                }
            }
        }
        // System.out.println("the string is " + StringUtils.join(focuss, ";"));

        /*
         * Given a SemanticGraph g and a SemgrexPattern pattern
         * And a bunch of other parameters,
         * run the pattern matcher (get SemgrexMatcher m)
         * Iterate through to get matching words/phrases
         *
         * Next, gets matchedGraphsForPattern.get(pattern),
         * a list of matched (String, semgraph) pairs
         * and adds the new graph and tokens if matched.
         *
         * I need to clarify what's going on with tokens.
         */
        public virtual ICollection <IndexedWord> GetSemGrexPatternNodes(SemanticGraph g, IList <string> tokens, ICollection <string> outputNodes, ICollection <IntPair> outputIndices, SemgrexPattern pattern, bool findSubTrees, ICollection <ExtractedPhrase
                                                                                                                                                                                                                                               > extractedPhrases, bool lowercase, IPredicate <CoreLabel> acceptWord)
        {
            ICollection <IndexedWord> foundWordsParents = new HashSet <IndexedWord>();
            SemgrexMatcher            m = pattern.Matcher(g, lowercase);

            while (m.Find())
            {
                IndexedWord w = m.GetNode("node");
                //System.out.println("found a match for " + pattern.pattern());
                IndexedWord parent = m.GetNode("parent");
                bool        ifSatisfiedMaxDepth = CheckIfSatisfiedMaxDepth(g, parent, w, new IntPair(maxDepth, 0));
                if (ifSatisfiedMaxDepth == false)
                {
                    continue;
                }
                if (Debug > 3)
                {
                    IList <Pair <string, SemanticGraph> > matchedGraphs = matchedGraphsForPattern[pattern];
                    if (matchedGraphs == null)
                    {
                        matchedGraphs = new List <Pair <string, SemanticGraph> >();
                    }
                    matchedGraphs.Add(new Pair <string, SemanticGraph>(StringUtils.Join(tokens, " "), g));
                    //if (DEBUG >= 3)
                    //  System.out.println("matched pattern is " + pattern);
                    matchedGraphsForPattern[pattern] = matchedGraphs;
                }
                foundWordsParents.Add(parent);
                // String relationName = m.getRelnString("reln");
                // System.out.println("word is " + w.lemma() + " and " + w.tag());
                List <IndexedWord> seenNodes       = new List <IndexedWord>();
                IList <string>     cutoffrelations = new List <string>();
                //      if (elementStr.equalsIgnoreCase("technique"))
                //        cutoffrelations = cutoffRelationsForTech;
                //      if (elementStr.equalsIgnoreCase("app"))
                //        cutoffrelations = this.cuttoffRelationsForApp;
                //System.out.println("g is ");
                //g.prettyPrint();
                PrintSubGraph(g, w, cutoffrelations, tokens, outputNodes, outputIndices, seenNodes, new List <IndexedWord>(), findSubTrees, extractedPhrases, pattern, acceptWord);
            }
            return(foundWordsParents);
        }
Beispiel #17
0
        public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm)
        {
            IndexedWord       govNode      = GetNamedNode(govName, sm);
            IndexedWord       depNode      = GetNamedNode(depName, sm);
            SemanticGraphEdge existingEdge = sg.GetEdge(govNode, depNode, relation);

            if (existingEdge == null)
            {
                // When adding the edge, check to see if the gov/dep nodes are presently in the graph.
                //
                if (!sg.ContainsVertex(govNode))
                {
                    sg.AddVertex(govNode);
                }
                if (!sg.ContainsVertex(depNode))
                {
                    sg.AddVertex(depNode);
                }
                sg.AddEdge(govNode, depNode, relation, weight, false);
            }
        }
        /// <summary>
        /// Find the operators in this sentence, annotating the head word (only!) of each operator with the
        /// <see cref="OperatorAnnotation"/>
        /// .
        /// </summary>
        /// <param name="sentence">
        /// As in
        /// <see cref="DoOneSentence(Edu.Stanford.Nlp.Pipeline.Annotation, Edu.Stanford.Nlp.Util.ICoreMap)"/>
        /// </param>
        private void AnnotateOperators(ICoreMap sentence)
        {
            SemanticGraph     tree   = sentence.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));

            if (tree == null)
            {
                tree = sentence.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));
            }
            foreach (SemgrexPattern pattern in Patterns)
            {
                SemgrexMatcher matcher = pattern.Matcher(tree);
                while (matcher.Find())
                {
                    // Get terms
                    IndexedWord properSubject = matcher.GetNode("Subject");
                    IndexedWord quantifier;
                    IndexedWord subject;
                    bool        namedEntityQuantifier = false;
                    if (properSubject != null)
                    {
                        quantifier            = subject = properSubject;
                        namedEntityQuantifier = true;
                    }
                    else
                    {
                        quantifier = matcher.GetNode("quantifier");
                        subject    = matcher.GetNode("subject");
                    }
                    IndexedWord @object = matcher.GetNode("object");
                    // Validate quantifier
                    // At the end of this
                    Optional <Triple <Operator, int, int> > quantifierInfo;
                    if (namedEntityQuantifier)
                    {
                        // named entities have the "all" semantics by default.
                        if (!neQuantifiers)
                        {
                            continue;
                        }
                        quantifierInfo = Optional.Of(Triple.MakeTriple(Operator.ImplicitNamedEntity, quantifier.Index(), quantifier.Index()));
                    }
                    else
                    {
                        // note: empty quantifier span given
                        // find the quantifier, and return some info about it.
                        quantifierInfo = ValidateQuantifierByHead(sentence, quantifier, @object == null || subject == null);
                    }
                    // Awful hacks to regularize the subject of things like "one of" and "there are"
                    // (fix up 'there are')
                    if ("be".Equals(subject == null ? null : subject.Lemma()))
                    {
                        bool        hasExpl    = false;
                        IndexedWord newSubject = null;
                        foreach (SemanticGraphEdge outgoingEdge in tree.OutgoingEdgeIterable(subject))
                        {
                            if ("nsubj".Equals(outgoingEdge.GetRelation().ToString()))
                            {
                                newSubject = outgoingEdge.GetDependent();
                            }
                            else
                            {
                                if ("expl".Equals(outgoingEdge.GetRelation().ToString()))
                                {
                                    hasExpl = true;
                                }
                            }
                        }
                        if (hasExpl)
                        {
                            subject = newSubject;
                        }
                    }
                    // (fix up '$n$ of')
                    if ("CD".Equals(subject == null ? null : subject.Tag()))
                    {
                        foreach (SemanticGraphEdge outgoingEdge in tree.OutgoingEdgeIterable(subject))
                        {
                            string rel = outgoingEdge.GetRelation().ToString();
                            if (rel.StartsWith("nmod"))
                            {
                                subject = outgoingEdge.GetDependent();
                            }
                        }
                    }
                    // Set tokens
                    if (quantifierInfo.IsPresent())
                    {
                        // Compute span
                        IndexedWord pivot = matcher.GetNode("pivot");
                        if (pivot == null)
                        {
                            pivot = @object;
                        }
                        OperatorSpec scope = ComputeScope(tree, quantifierInfo.Get().first, pivot, Pair.MakePair(quantifierInfo.Get().second, quantifierInfo.Get().third), subject, namedEntityQuantifier, @object, tokens.Count);
                        // Set annotation
                        CoreLabel    token    = sentence.Get(typeof(CoreAnnotations.TokensAnnotation))[quantifier.Index() - 1];
                        OperatorSpec oldScope = token.Get(typeof(NaturalLogicAnnotations.OperatorAnnotation));
                        if (oldScope == null || oldScope.QuantifierLength() < scope.QuantifierLength() || oldScope.instance != scope.instance)
                        {
                            token.Set(typeof(NaturalLogicAnnotations.OperatorAnnotation), scope);
                        }
                        else
                        {
                            token.Set(typeof(NaturalLogicAnnotations.OperatorAnnotation), OperatorSpec.Merge(oldScope, scope));
                        }
                    }
                }
            }
            // Ensure we didn't select overlapping quantifiers. For example, "a" and "a few" can often overlap.
            // In these cases, take the longer quantifier match.
            IList <OperatorSpec> quantifiers = new List <OperatorSpec>();

            for (int i = 0; i < tokens.Count; ++i)
            {
                CoreLabel    token = tokens[i];
                OperatorSpec @operator;
                if ((@operator = token.Get(typeof(NaturalLogicAnnotations.OperatorAnnotation))) != null)
                {
                    if (i == 0 && @operator.instance == Operator.No && tokens.Count > 2 && "PRP".Equals(tokens[1].Get(typeof(CoreAnnotations.PartOfSpeechAnnotation))))
                    {
                        // This is pragmatically not a negation -- ignore it
                        // For example, "no I don't like candy" or "no you like cats"
                        token.Remove(typeof(NaturalLogicAnnotations.OperatorAnnotation));
                    }
                    else
                    {
                        quantifiers.Add(@operator);
                    }
                }
            }
            quantifiers.Sort(null);
            foreach (OperatorSpec quantifier_1 in quantifiers)
            {
                for (int i_1 = quantifier_1.quantifierBegin; i_1 < quantifier_1.quantifierEnd; ++i_1)
                {
                    if (i_1 != quantifier_1.quantifierHead)
                    {
                        tokens[i_1].Remove(typeof(NaturalLogicAnnotations.OperatorAnnotation));
                    }
                }
            }
        }
 /// <summary>
 /// Given a matching instance (via the SemgrexMatcher), performs an in-place
 /// modification on the given SemanticGraph.
 /// </summary>
 public abstract void Evaluate(SemanticGraph sg, SemgrexMatcher sm);
Beispiel #20
0
        //using quote-removed depparses
        public virtual void DependencyParses(Annotation doc)
        {
            IList <ICoreMap>  quotes    = doc.Get(typeof(CoreAnnotations.QuotationsAnnotation));
            IList <CoreLabel> tokens    = doc.Get(typeof(CoreAnnotations.TokensAnnotation));
            IList <ICoreMap>  sentences = doc.Get(typeof(CoreAnnotations.SentencesAnnotation));

            foreach (ICoreMap quote in quotes)
            {
                if (quote.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)) != null)
                {
                    continue;
                }
                Pair <int, int> range = QuoteAttributionUtils.GetRemainderInSentence(doc, quote);
                if (range == null)
                {
                    continue;
                }
                //search for mentions in the first run
                Pair <List <string>, List <Pair <int, int> > > namesAndNameIndices = ScanForNames(range);
                List <string>           names       = namesAndNameIndices.first;
                List <Pair <int, int> > nameIndices = namesAndNameIndices.second;
                SemanticGraph           graph       = quote.Get(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation));
                SemgrexMatcher          matcher     = subjVerbPattern.Matcher(graph);
                IList <Pair <IndexedWord, IndexedWord> > subjVerbPairs = new List <Pair <IndexedWord, IndexedWord> >();
                //TODO: check and see if this is necessary
                while (matcher.Find())
                {
                    IndexedWord subj = matcher.GetNode("SUBJ");
                    IndexedWord verb = matcher.GetNode("VERB");
                    subjVerbPairs.Add(new Pair <IndexedWord, IndexedWord>(subj, verb));
                }
                IList <IndexedWord> vbs = graph.GetAllNodesByPartOfSpeechPattern("VB.*");
                foreach (IndexedWord iw in vbs)
                {
                    // does it have an nsubj child?
                    ICollection <IndexedWord> children = graph.GetChildren(iw);
                    IList <IndexedWord>       deps     = Generics.NewArrayList();
                    IndexedWord nsubj = null;
                    foreach (IndexedWord child in children)
                    {
                        SemanticGraphEdge sge = graph.GetEdge(iw, child);
                        if (sge.GetRelation().GetShortName().Equals("dep") && child.Tag().StartsWith("VB"))
                        {
                            deps.Add(child);
                        }
                        else
                        {
                            if (sge.GetRelation().GetShortName().Equals("nsubj"))
                            {
                                nsubj = child;
                            }
                        }
                    }
                    if (nsubj != null)
                    {
                        foreach (IndexedWord dep in deps)
                        {
                            subjVerbPairs.Add(new Pair(nsubj, dep));
                        }
                    }
                }
                //look for a speech verb
                foreach (Pair <IndexedWord, IndexedWord> SVPair in subjVerbPairs)
                {
                    IndexedWord verb = SVPair.second;
                    IndexedWord subj = SVPair.first;
                    //check if subj and verb outside of quote
                    int verbTokPos = TokenToLocation(verb.BackingLabel());
                    int subjTokPos = TokenToLocation(verb.BackingLabel());
                    if (InRange(range, verbTokPos) && InRange(range, subjTokPos) && commonSpeechWords.Contains(verb.Lemma()))
                    {
                        if (subj.Tag().Equals("NNP"))
                        {
                            int startChar = subj.BeginPosition();
                            for (int i = 0; i < names.Count; i++)
                            {
                                Pair <int, int> nameIndex = nameIndices[i];
                                //avoid names that don't actually exist in
                                if (RangeContainsCharIndex(nameIndex, startChar))
                                {
                                    FillInMention(quote, TokenRangeToString(nameIndex), nameIndex.first, nameIndex.second, sieveName, Name);
                                    break;
                                }
                            }
                        }
                        else
                        {
                            if (subj.Tag().Equals("PRP"))
                            {
                                int loc = TokenToLocation(subj.BackingLabel());
                                FillInMention(quote, subj.Word(), loc, loc, sieveName, Pronoun);
                                break;
                            }
                            else
                            {
                                if (subj.Tag().Equals("NN") && animacySet.Contains(subj.Word()))
                                {
                                    int loc = TokenToLocation(subj.BackingLabel());
                                    FillInMention(quote, subj.Word(), loc, loc, sieveName, AnimateNoun);
                                    break;
                                }
                            }
                        }
                    }
                }
            }
        }
Beispiel #21
0
 /// <summary>Returns all of the entailed shortened clauses (as per natural logic) from the given clause.</summary>
 /// <remarks>
 /// Returns all of the entailed shortened clauses (as per natural logic) from the given clause.
 /// This runs the forward entailment component of the OpenIE system only.
 /// It is usually chained together with the clause splitting component:
 /// <see cref="ClausesInSentence(Edu.Stanford.Nlp.Util.ICoreMap)"/>
 /// .
 /// </remarks>
 /// <param name="clause">The premise clause, as a sentence fragment in itself.</param>
 /// <returns>A list of entailed clauses.</returns>
 public virtual IList <SentenceFragment> EntailmentsFromClause(SentenceFragment clause)
 {
     if (clause.parseTree.IsEmpty())
     {
         return(Java.Util.Collections.EmptyList());
     }
     else
     {
         // Get the forward entailments
         IList <SentenceFragment> list = new List <SentenceFragment>();
         if (entailmentsPerSentence > 0)
         {
             Sharpen.Collections.AddAll(list, forwardEntailer.Apply(clause.parseTree, true).Search().Stream().Map(null).Collect(Collectors.ToList()));
         }
         list.Add(clause);
         // A special case for adjective entailments
         IList <SentenceFragment> adjFragments = new List <SentenceFragment>();
         SemgrexMatcher           matcher      = adjectivePattern.Matcher(clause.parseTree);
         while (matcher.Find())
         {
             // (get nodes)
             IndexedWord subj = matcher.GetNode("subj");
             IndexedWord be   = matcher.GetNode("be");
             IndexedWord adj  = matcher.GetNode("adj");
             IndexedWord obj  = matcher.GetNode("obj");
             IndexedWord pobj = matcher.GetNode("pobj");
             string      prep = matcher.GetRelnString("prep");
             // (if the adjective, or any earlier adjective, is privative, then all bets are off)
             foreach (SemanticGraphEdge edge in clause.parseTree.OutgoingEdgeIterable(obj))
             {
                 if ("amod".Equals(edge.GetRelation().ToString()) && edge.GetDependent().Index() <= adj.Index() && Edu.Stanford.Nlp.Naturalli.Util.PrivativeAdjectives.Contains(edge.GetDependent().Word().ToLower()))
                 {
                     goto OUTER_continue;
                 }
             }
             // (create the core tree)
             SemanticGraph tree = new SemanticGraph();
             tree.AddRoot(adj);
             tree.AddVertex(subj);
             tree.AddVertex(be);
             tree.AddEdge(adj, be, GrammaticalRelation.ValueOf(Language.English, "cop"), double.NegativeInfinity, false);
             tree.AddEdge(adj, subj, GrammaticalRelation.ValueOf(Language.English, "nsubj"), double.NegativeInfinity, false);
             // (add pp attachment, if it existed)
             if (pobj != null)
             {
                 System.Diagnostics.Debug.Assert(prep != null);
                 tree.AddEdge(adj, pobj, GrammaticalRelation.ValueOf(Language.English, prep), double.NegativeInfinity, false);
             }
             // (check for monotonicity)
             if (adj.Get(typeof(NaturalLogicAnnotations.PolarityAnnotation)).IsUpwards() && be.Get(typeof(NaturalLogicAnnotations.PolarityAnnotation)).IsUpwards())
             {
                 // (add tree)
                 adjFragments.Add(new SentenceFragment(tree, clause.assumedTruth, false));
             }
             OUTER_continue :;
         }
         OUTER_break :;
         Sharpen.Collections.AddAll(list, adjFragments);
         return(list);
     }
 }
        /// <summary>
        /// Annotate any unary quantifiers that weren't found in the main
        /// <see cref="AnnotateOperators(Edu.Stanford.Nlp.Util.ICoreMap)"/>
        /// method.
        /// </summary>
        /// <param name="sentence">The sentence to annotate.</param>
        private static void AnnotateUnaries(ICoreMap sentence)
        {
            // Get tree and tokens
            SemanticGraph tree = sentence.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));

            if (tree == null)
            {
                tree = sentence.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));
            }
            IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));

            // Get operator exists mask
            bool[] isOperator = new bool[tokens.Count];
            for (int i = 0; i < isOperator.Length; ++i)
            {
                OperatorSpec spec = tokens[i].Get(typeof(NaturalLogicAnnotations.OperatorAnnotation));
                if (spec != null)
                {
                    for (int k = spec.quantifierBegin; k < spec.quantifierEnd; ++k)
                    {
                        isOperator[k] = true;
                    }
                }
            }
            // Match Semgrex
            SemgrexMatcher matcher = UnaryPattern.Matcher(tree);

            while (matcher.Find())
            {
                // Get relevant nodes
                IndexedWord quantifier = matcher.GetNode("quantifier");
                string      word       = quantifier.Word().ToLower();
                if (word.Equals("a") || word.Equals("an") || word.Equals("the") || "CD".Equals(quantifier.Tag()))
                {
                    continue;
                }
                // These are absurdly common, and uninformative, and we're just going to shoot ourselves in the foot from parsing errors and idiomatic expressions.
                IndexedWord subject = matcher.GetNode("subject");
                // ... If there is not already an operator there
                if (!isOperator[quantifier.Index() - 1])
                {
                    Optional <Triple <Operator, int, int> > quantifierInfo = ValidateQuantifierByHead(sentence, quantifier, true);
                    // ... and if we found a quantifier span
                    if (quantifierInfo.IsPresent())
                    {
                        // Then add the unary operator!
                        OperatorSpec scope = ComputeScope(tree, quantifierInfo.Get().first, subject, Pair.MakePair(quantifierInfo.Get().second, quantifierInfo.Get().third), null, false, null, tokens.Count);
                        CoreLabel    token = tokens[quantifier.Index() - 1];
                        token.Set(typeof(NaturalLogicAnnotations.OperatorAnnotation), scope);
                    }
                }
            }
            // Match TokensRegex
            TokenSequenceMatcher tokenMatcher = DoubtPattern.Matcher(tokens);

            while (tokenMatcher.Find())
            {
                IList <CoreLabel> doubt  = (IList <CoreLabel>)tokenMatcher.GroupNodes("$doubt");
                IList <CoreLabel> target = (IList <CoreLabel>)tokenMatcher.GroupNodes("$target");
                foreach (CoreLabel word in doubt)
                {
                    OperatorSpec spec = new OperatorSpec(Operator.GeneralNegPolarity, word.Index() - 1, word.Index(), target[0].Index() - 1, target[target.Count - 1].Index(), 0, 0, tokens.Count);
                    word.Set(typeof(NaturalLogicAnnotations.OperatorAnnotation), spec);
                }
            }
        }
Beispiel #23
0
 /// <exception cref="System.Exception"/>
 public virtual bool Test(SemgrexMatcher matcher)
 {
     return(Evaluate(matcher.GetNode(matchName)));
 }
        /// <summary>
        /// Create a dataset of subject/object pairs, such that a sequence of splits that segments this
        /// subject and object is a correct sequence.
        /// </summary>
        /// <param name="depparse">The dependency parse of the sentence.</param>
        /// <param name="traceTargets">The set of spans corresponding to targets of traces.</param>
        /// <param name="traceSources">The set of indices in a sentence corresponding to the sources of traces.</param>
        /// <returns>A dataset of subject/object spans.</returns>
        private static ICollection <Pair <Span, Span> > SubjectObjectPairs(SemanticGraph depparse, IList <CoreLabel> tokens, IDictionary <int, Span> traceTargets, IDictionary <int, int> traceSources)
        {
            //    log(StringUtils.join(tokens.stream().map(CoreLabel::word), " "));
            IList <Pair <Span, Span> > data = new List <Pair <Span, Span> >();

            foreach (SemgrexPattern vpPattern in segmenter.VpPatterns)
            {
                SemgrexMatcher matcher = vpPattern.Matcher(depparse);
                while (matcher.Find())
                {
                    // Get the verb and object
                    IndexedWord verb    = matcher.GetNode("verb");
                    IndexedWord @object = matcher.GetNode("object");
                    if (verb != null && @object != null)
                    {
                        // See if there is already a subject attached
                        bool hasSubject = false;
                        foreach (SemanticGraphEdge edge in depparse.OutgoingEdgeIterable(verb))
                        {
                            if (edge.GetRelation().ToString().Contains("subj"))
                            {
                                hasSubject = true;
                            }
                        }
                        foreach (SemanticGraphEdge edge_1 in depparse.OutgoingEdgeIterable(@object))
                        {
                            if (edge_1.GetRelation().ToString().Contains("subj"))
                            {
                                hasSubject = true;
                            }
                        }
                        if (!hasSubject)
                        {
                            // Get the spans for the verb and object
                            Optional <IList <IndexedWord> > verbChunk   = segmenter.GetValidChunk(depparse, verb, segmenter.ValidAdverbArcs, Optional.Empty(), true);
                            Optional <IList <IndexedWord> > objectChunk = segmenter.GetValidChunk(depparse, @object, segmenter.ValidObjectArcs, Optional.Empty(), true);
                            if (verbChunk.IsPresent() && objectChunk.IsPresent())
                            {
                                verbChunk.Get().Sort(IComparer.ComparingInt(null));
                                objectChunk.Get().Sort(IComparer.ComparingInt(null));
                                // Find a trace
                                int  traceId   = -1;
                                Span verbSpan  = ToSpan(verbChunk.Get());
                                Span traceSpan = Span.FromValues(verbSpan.Start() - 1, verbSpan.End() + 1);
                                foreach (KeyValuePair <int, int> entry in traceSources)
                                {
                                    if (traceSpan.Contains(entry.Value))
                                    {
                                        traceId = entry.Key;
                                    }
                                }
                                //noinspection StatementWithEmptyBody
                                if (traceId < 0)
                                {
                                }
                                else
                                {
                                    // Register the VP as an unknown VP
                                    //                List<CoreLabel> vpChunk = new ArrayList<>();
                                    //                vpChunk.addAll(verbChunk.get());
                                    //                vpChunk.addAll(objectChunk.get());
                                    //                Collections.sort(vpChunk, (a, b) -> a.index() - b.index());
                                    //                debug("could not find trace for " + vpChunk);
                                    // Add the obj chunk
                                    Span subjectSpan = traceTargets[traceId];
                                    Span objectSpan  = ToSpan(objectChunk.Get());
                                    if (subjectSpan != null)
                                    {
                                        //                  debug("(" +
                                        //                      StringUtils.join(tokens.subList(subjectSpan.start(), subjectSpan.end()).stream().map(CoreLabel::word), " ") + "; " +
                                        //                      verb.word() + "; " +
                                        //                      StringUtils.join(tokens.subList(objectSpan.start(), objectSpan.end()).stream().map(CoreLabel::word), " ") +
                                        //                      ")");
                                        data.Add(Pair.MakePair(subjectSpan, objectSpan));
                                    }
                                }
                            }
                        }
                    }
                }
            }
            // Run vanilla pattern splits
            foreach (SemgrexPattern vpPattern_1 in segmenter.VerbPatterns)
            {
                SemgrexMatcher matcher = vpPattern_1.Matcher(depparse);
                while (matcher.Find())
                {
                    // Get the verb and object
                    IndexedWord subject = matcher.GetNode("subject");
                    IndexedWord @object = matcher.GetNode("object");
                    if (subject != null && @object != null)
                    {
                        Optional <IList <IndexedWord> > subjectChunk = segmenter.GetValidChunk(depparse, subject, segmenter.ValidSubjectArcs, Optional.Empty(), true);
                        Optional <IList <IndexedWord> > objectChunk  = segmenter.GetValidChunk(depparse, @object, segmenter.ValidObjectArcs, Optional.Empty(), true);
                        if (subjectChunk.IsPresent() && objectChunk.IsPresent())
                        {
                            Span subjectSpan = ToSpan(subjectChunk.Get());
                            Span objectSpan  = ToSpan(objectChunk.Get());
                            data.Add(Pair.MakePair(subjectSpan, objectSpan));
                        }
                    }
                }
            }
            return(data);
        }