public virtual double DeletionProbability(SemanticGraphEdge edge, IEnumerable <SemanticGraphEdge> neighbors)
        {
            string edgeRel = edge.GetRelation().ToString();

            if (edgeRel.Contains("prep"))
            {
                return(PpDeletionProbability(edge, neighbors));
            }
            else
            {
                if (edgeRel.Contains("obj"))
                {
                    return(ObjDeletionProbability(edge, neighbors));
                }
                else
                {
                    if (edgeRel.Contains("subj"))
                    {
                        return(SubjDeletionProbability(edge, neighbors));
                    }
                    else
                    {
                        if (edgeRel.Equals("amod"))
                        {
                            string word = (edge.GetDependent().Lemma() != null ? edge.GetDependent().Lemma() : edge.GetDependent().Word()).ToLower();
                            if (Edu.Stanford.Nlp.Naturalli.Util.PrivativeAdjectives.Contains(word))
                            {
                                return(0.0);
                            }
                            else
                            {
                                return(1.0);
                            }
                        }
                        else
                        {
                            return(DeletionProbability(edgeRel));
                        }
                    }
                }
            }
        }
        public virtual double ObjDeletionProbability(SemanticGraphEdge edge, IEnumerable <SemanticGraphEdge> neighbors)
        {
            // Get information about the neighbors
            // (in a totally not-creepy-stalker sort of way)
            Optional <string> subj = Optional.Empty();
            Optional <string> pp   = Optional.Empty();

            foreach (SemanticGraphEdge neighbor in neighbors)
            {
                if (neighbor != edge)
                {
                    string neighborRel = neighbor.GetRelation().ToString();
                    if (neighborRel.Contains("subj"))
                    {
                        subj = Optional.Of(neighbor.GetDependent().OriginalText().ToLower());
                    }
                    if (neighborRel.Contains("prep"))
                    {
                        pp = Optional.Of(neighborRel);
                    }
                    if (neighborRel.Contains("obj"))
                    {
                        return(1.0);
                    }
                }
            }
            // allow deleting second object
            string obj  = edge.GetDependent().OriginalText().ToLower();
            string verb = edge.GetGovernor().OriginalText().ToLower();
            // Compute the most informative drop probability we can
            double rawScore = null;

            if (subj.IsPresent())
            {
                if (pp.IsPresent())
                {
                    // Case: subj+obj
                    rawScore = verbSubjPPObjAffinity[Quadruple.MakeQuadruple(verb, subj.Get(), pp.Get(), obj)];
                }
            }
            if (rawScore == null)
            {
                rawScore = verbObjAffinity[verb];
            }
            if (rawScore == null)
            {
                return(DeletionProbability(edge.GetRelation().ToString()));
            }
            else
            {
                return(1.0 - Math.Min(1.0, rawScore / upperProbabilityCap));
            }
        }
Esempio n. 3
0
        /// <summary>The search algorithm, starting with a full sentence and iteratively shortening it to its entailed sentences.</summary>
        /// <returns>A list of search results, corresponding to shortenings of the sentence.</returns>
        private IList <ForwardEntailerSearchProblem.SearchResult> SearchImplementation()
        {
            // Pre-process the tree
            SemanticGraph parseTree = new SemanticGraph(this.parseTree);

            System.Diagnostics.Debug.Assert(Edu.Stanford.Nlp.Naturalli.Util.IsTree(parseTree));
            // (remove common determiners)
            IList <string> determinerRemovals = new List <string>();

            parseTree.GetLeafVertices().Stream().Filter(null).ForEach(null);
            // (cut conj_and nodes)
            ICollection <SemanticGraphEdge> andsToAdd = new HashSet <SemanticGraphEdge>();

            foreach (IndexedWord vertex in parseTree.VertexSet())
            {
                if (parseTree.InDegree(vertex) > 1)
                {
                    SemanticGraphEdge conjAnd = null;
                    foreach (SemanticGraphEdge edge in parseTree.IncomingEdgeIterable(vertex))
                    {
                        if ("conj:and".Equals(edge.GetRelation().ToString()))
                        {
                            conjAnd = edge;
                        }
                    }
                    if (conjAnd != null)
                    {
                        parseTree.RemoveEdge(conjAnd);
                        System.Diagnostics.Debug.Assert(Edu.Stanford.Nlp.Naturalli.Util.IsTree(parseTree));
                        andsToAdd.Add(conjAnd);
                    }
                }
            }
            // Clean the tree
            Edu.Stanford.Nlp.Naturalli.Util.CleanTree(parseTree);
            System.Diagnostics.Debug.Assert(Edu.Stanford.Nlp.Naturalli.Util.IsTree(parseTree));
            // Find the subject / object split
            // This takes max O(n^2) time, expected O(n*log(n)) time.
            // Optimal is O(n), but I'm too lazy to implement it.
            BitSet isSubject = new BitSet(256);

            foreach (IndexedWord vertex_1 in parseTree.VertexSet())
            {
                // Search up the tree for a subj node; if found, mark that vertex as a subject.
                IEnumerator <SemanticGraphEdge> incomingEdges = parseTree.IncomingEdgeIterator(vertex_1);
                SemanticGraphEdge edge = null;
                if (incomingEdges.MoveNext())
                {
                    edge = incomingEdges.Current;
                }
                int numIters = 0;
                while (edge != null)
                {
                    if (edge.GetRelation().ToString().EndsWith("subj"))
                    {
                        System.Diagnostics.Debug.Assert(vertex_1.Index() > 0);
                        isSubject.Set(vertex_1.Index() - 1);
                        break;
                    }
                    incomingEdges = parseTree.IncomingEdgeIterator(edge.GetGovernor());
                    if (incomingEdges.MoveNext())
                    {
                        edge = incomingEdges.Current;
                    }
                    else
                    {
                        edge = null;
                    }
                    numIters += 1;
                    if (numIters > 100)
                    {
                        //          log.error("tree has apparent depth > 100");
                        return(Java.Util.Collections.EmptyList);
                    }
                }
            }
            // Outputs
            IList <ForwardEntailerSearchProblem.SearchResult> results = new List <ForwardEntailerSearchProblem.SearchResult>();

            if (!determinerRemovals.IsEmpty())
            {
                if (andsToAdd.IsEmpty())
                {
                    double score = Math.Pow(weights.DeletionProbability("det"), (double)determinerRemovals.Count);
                    System.Diagnostics.Debug.Assert(!double.IsNaN(score));
                    System.Diagnostics.Debug.Assert(!double.IsInfinite(score));
                    results.Add(new ForwardEntailerSearchProblem.SearchResult(parseTree, determinerRemovals, score));
                }
                else
                {
                    SemanticGraph treeWithAnds = new SemanticGraph(parseTree);
                    System.Diagnostics.Debug.Assert(Edu.Stanford.Nlp.Naturalli.Util.IsTree(treeWithAnds));
                    foreach (SemanticGraphEdge and in andsToAdd)
                    {
                        treeWithAnds.AddEdge(and.GetGovernor(), and.GetDependent(), and.GetRelation(), double.NegativeInfinity, false);
                    }
                    System.Diagnostics.Debug.Assert(Edu.Stanford.Nlp.Naturalli.Util.IsTree(treeWithAnds));
                    results.Add(new ForwardEntailerSearchProblem.SearchResult(treeWithAnds, determinerRemovals, Math.Pow(weights.DeletionProbability("det"), (double)determinerRemovals.Count)));
                }
            }
            // Initialize the search
            System.Diagnostics.Debug.Assert(Edu.Stanford.Nlp.Naturalli.Util.IsTree(parseTree));
            IList <IndexedWord> topologicalVertices;

            try
            {
                topologicalVertices = parseTree.TopologicalSort();
            }
            catch (InvalidOperationException)
            {
                //      log.info("Could not topologically sort the vertices! Using left-to-right traversal.");
                topologicalVertices = parseTree.VertexListSorted();
            }
            if (topologicalVertices.IsEmpty())
            {
                return(results);
            }
            Stack <ForwardEntailerSearchProblem.SearchState> fringe = new Stack <ForwardEntailerSearchProblem.SearchState>();

            fringe.Push(new ForwardEntailerSearchProblem.SearchState(new BitSet(256), 0, parseTree, null, null, 1.0));
            // Start the search
            int numTicks = 0;

            while (!fringe.IsEmpty())
            {
                // Overhead with popping a node.
                if (numTicks >= maxTicks)
                {
                    return(results);
                }
                numTicks += 1;
                if (results.Count >= maxResults)
                {
                    return(results);
                }
                ForwardEntailerSearchProblem.SearchState state = fringe.Pop();
                System.Diagnostics.Debug.Assert(state.score > 0.0);
                IndexedWord currentWord = topologicalVertices[state.currentIndex];
                // Push the case where we don't delete
                int nextIndex = state.currentIndex + 1;
                int numIters  = 0;
                while (nextIndex < topologicalVertices.Count)
                {
                    IndexedWord nextWord = topologicalVertices[nextIndex];
                    System.Diagnostics.Debug.Assert(nextWord.Index() > 0);
                    if (!state.deletionMask.Get(nextWord.Index() - 1))
                    {
                        fringe.Push(new ForwardEntailerSearchProblem.SearchState(state.deletionMask, nextIndex, state.tree, null, state, state.score));
                        break;
                    }
                    else
                    {
                        nextIndex += 1;
                    }
                    numIters += 1;
                    if (numIters > 10000)
                    {
                        //          log.error("logic error (apparent infinite loop); returning");
                        return(results);
                    }
                }
                // Check if we can delete this subtree
                bool canDelete = !state.tree.GetFirstRoot().Equals(currentWord);
                foreach (SemanticGraphEdge edge in state.tree.IncomingEdgeIterable(currentWord))
                {
                    if ("CD".Equals(edge.GetGovernor().Tag()))
                    {
                        canDelete = false;
                    }
                    else
                    {
                        // Get token information
                        CoreLabel            token = edge.GetDependent().BackingLabel();
                        OperatorSpec         @operator;
                        NaturalLogicRelation lexicalRelation;
                        Polarity             tokenPolarity = token.Get(typeof(NaturalLogicAnnotations.PolarityAnnotation));
                        if (tokenPolarity == null)
                        {
                            tokenPolarity = Polarity.Default;
                        }
                        // Get the relation for this deletion
                        if ((@operator = token.Get(typeof(NaturalLogicAnnotations.OperatorAnnotation))) != null)
                        {
                            lexicalRelation = @operator.instance.deleteRelation;
                        }
                        else
                        {
                            System.Diagnostics.Debug.Assert(edge.GetDependent().Index() > 0);
                            lexicalRelation = NaturalLogicRelation.ForDependencyDeletion(edge.GetRelation().ToString(), isSubject.Get(edge.GetDependent().Index() - 1));
                        }
                        NaturalLogicRelation projectedRelation = tokenPolarity.ProjectLexicalRelation(lexicalRelation);
                        // Make sure this is a valid entailment
                        if (!projectedRelation.ApplyToTruthValue(truthOfPremise).IsTrue())
                        {
                            canDelete = false;
                        }
                    }
                }
                if (canDelete)
                {
                    // Register the deletion
                    Lazy <Pair <SemanticGraph, BitSet> > treeWithDeletionsAndNewMask = Lazy.Of(null);
                    // Compute the score of the sentence
                    double newScore = state.score;
                    foreach (SemanticGraphEdge edge_1 in state.tree.IncomingEdgeIterable(currentWord))
                    {
                        double multiplier = weights.DeletionProbability(edge_1, state.tree.OutgoingEdgeIterable(edge_1.GetGovernor()));
                        System.Diagnostics.Debug.Assert(!double.IsNaN(multiplier));
                        System.Diagnostics.Debug.Assert(!double.IsInfinite(multiplier));
                        newScore *= multiplier;
                    }
                    // Register the result
                    if (newScore > 0.0)
                    {
                        SemanticGraph resultTree = new SemanticGraph(treeWithDeletionsAndNewMask.Get().first);
                        andsToAdd.Stream().Filter(null).ForEach(null);
                        results.Add(new ForwardEntailerSearchProblem.SearchResult(resultTree, AggregateDeletedEdges(state, state.tree.IncomingEdgeIterable(currentWord), determinerRemovals), newScore));
                        // Push the state with this subtree deleted
                        nextIndex = state.currentIndex + 1;
                        numIters  = 0;
                        while (nextIndex < topologicalVertices.Count)
                        {
                            IndexedWord   nextWord          = topologicalVertices[nextIndex];
                            BitSet        newMask           = treeWithDeletionsAndNewMask.Get().second;
                            SemanticGraph treeWithDeletions = treeWithDeletionsAndNewMask.Get().first;
                            if (!newMask.Get(nextWord.Index() - 1))
                            {
                                System.Diagnostics.Debug.Assert(treeWithDeletions.ContainsVertex(topologicalVertices[nextIndex]));
                                fringe.Push(new ForwardEntailerSearchProblem.SearchState(newMask, nextIndex, treeWithDeletions, null, state, newScore));
                                break;
                            }
                            else
                            {
                                nextIndex += 1;
                            }
                            numIters += 1;
                            if (numIters > 10000)
                            {
                                //              log.error("logic error (apparent infinite loop); returning");
                                return(results);
                            }
                        }
                    }
                }
            }
            // Return
            return(results);
        }
Esempio n. 4
0
        /// <summary>Fix some bizarre peculiarities with certain trees.</summary>
        /// <remarks>
        /// Fix some bizarre peculiarities with certain trees.
        /// So far, these include:
        /// <ul>
        /// <li>Sometimes there's a node from a word to itself. This seems wrong.</li>
        /// </ul>
        /// </remarks>
        /// <param name="tree">The tree to clean (in place!).</param>
        /// <returns>A list of extra edges, which are valid but were removed.</returns>
        public static IList <SemanticGraphEdge> CleanTree(SemanticGraph tree)
        {
            //    assert !isCyclic(tree);
            // Clean nodes
            IList <IndexedWord> toDelete = new List <IndexedWord>();

            foreach (IndexedWord vertex in tree.VertexSet())
            {
                // Clean punctuation
                if (vertex.Tag() == null)
                {
                    continue;
                }
                char tag = vertex.BackingLabel().Tag()[0];
                if (tag == '.' || tag == ',' || tag == '(' || tag == ')' || tag == ':')
                {
                    if (!tree.OutgoingEdgeIterator(vertex).MoveNext())
                    {
                        // This should really never happen, but it does.
                        toDelete.Add(vertex);
                    }
                }
            }
            toDelete.ForEach(null);
            // Clean edges
            IEnumerator <SemanticGraphEdge> iter = tree.EdgeIterable().GetEnumerator();
            IList <Triple <IndexedWord, IndexedWord, SemanticGraphEdge> > toAdd = new List <Triple <IndexedWord, IndexedWord, SemanticGraphEdge> >();

            toDelete.Clear();
            while (iter.MoveNext())
            {
                SemanticGraphEdge edge = iter.Current;
                if (edge.GetDependent().Index() == edge.GetGovernor().Index())
                {
                    // Clean up copy-edges
                    if (edge.GetDependent().IsCopy(edge.GetGovernor()))
                    {
                        foreach (SemanticGraphEdge toCopy in tree.OutgoingEdgeIterable(edge.GetDependent()))
                        {
                            toAdd.Add(Triple.MakeTriple(edge.GetGovernor(), toCopy.GetDependent(), toCopy));
                        }
                        toDelete.Add(edge.GetDependent());
                    }
                    if (edge.GetGovernor().IsCopy(edge.GetDependent()))
                    {
                        foreach (SemanticGraphEdge toCopy in tree.OutgoingEdgeIterable(edge.GetGovernor()))
                        {
                            toAdd.Add(Triple.MakeTriple(edge.GetDependent(), toCopy.GetDependent(), toCopy));
                        }
                        toDelete.Add(edge.GetGovernor());
                    }
                    // Clean self-edges
                    iter.Remove();
                }
                else
                {
                    if (edge.GetRelation().ToString().Equals("punct"))
                    {
                        // Clean punctuation (again)
                        if (!tree.OutgoingEdgeIterator(edge.GetDependent()).MoveNext())
                        {
                            // This should really never happen, but it does.
                            iter.Remove();
                        }
                    }
                }
            }
            // (add edges we wanted to add)
            toDelete.ForEach(null);
            foreach (Triple <IndexedWord, IndexedWord, SemanticGraphEdge> edge_1 in toAdd)
            {
                tree.AddEdge(edge_1.first, edge_1.second, edge_1.third.GetRelation(), edge_1.third.GetWeight(), edge_1.third.IsExtra());
            }
            // Handle extra edges.
            // Two cases:
            // (1) the extra edge is a subj/obj edge and the main edge is a conj:.*
            //     in this case, keep the extra
            // (2) otherwise, delete the extra
            IList <SemanticGraphEdge> extraEdges = new List <SemanticGraphEdge>();

            foreach (SemanticGraphEdge edge_2 in tree.EdgeIterable())
            {
                if (edge_2.IsExtra())
                {
                    IList <SemanticGraphEdge> incomingEdges = tree.IncomingEdgeList(edge_2.GetDependent());
                    SemanticGraphEdge         toKeep        = null;
                    foreach (SemanticGraphEdge candidate in incomingEdges)
                    {
                        if (toKeep == null)
                        {
                            toKeep = candidate;
                        }
                        else
                        {
                            if (toKeep.GetRelation().ToString().StartsWith("conj") && candidate.GetRelation().ToString().Matches(".subj.*|.obj.*"))
                            {
                                toKeep = candidate;
                            }
                            else
                            {
                                if (!candidate.IsExtra() && !(candidate.GetRelation().ToString().StartsWith("conj") && toKeep.GetRelation().ToString().Matches(".subj.*|.obj.*")))
                                {
                                    toKeep = candidate;
                                }
                            }
                        }
                    }
                    foreach (SemanticGraphEdge candidate_1 in incomingEdges)
                    {
                        if (candidate_1 != toKeep)
                        {
                            extraEdges.Add(candidate_1);
                        }
                    }
                }
            }
            extraEdges.ForEach(null);
            // Add apposition edges (simple coref)
            foreach (SemanticGraphEdge extraEdge in new List <SemanticGraphEdge>(extraEdges))
            {
                // note[gabor] prevent concurrent modification exception
                foreach (SemanticGraphEdge candidateAppos in tree.IncomingEdgeIterable(extraEdge.GetDependent()))
                {
                    if (candidateAppos.GetRelation().ToString().Equals("appos"))
                    {
                        extraEdges.Add(new SemanticGraphEdge(extraEdge.GetGovernor(), candidateAppos.GetGovernor(), extraEdge.GetRelation(), extraEdge.GetWeight(), extraEdge.IsExtra()));
                    }
                }
                foreach (SemanticGraphEdge candidateAppos_1 in tree.OutgoingEdgeIterable(extraEdge.GetDependent()))
                {
                    if (candidateAppos_1.GetRelation().ToString().Equals("appos"))
                    {
                        extraEdges.Add(new SemanticGraphEdge(extraEdge.GetGovernor(), candidateAppos_1.GetDependent(), extraEdge.GetRelation(), extraEdge.GetWeight(), extraEdge.IsExtra()));
                    }
                }
            }
            // Brute force ensure tree
            // Remove incoming edges from roots
            IList <SemanticGraphEdge> rootIncomingEdges = new List <SemanticGraphEdge>();

            foreach (IndexedWord root in tree.GetRoots())
            {
                foreach (SemanticGraphEdge incomingEdge in tree.IncomingEdgeIterable(root))
                {
                    rootIncomingEdges.Add(incomingEdge);
                }
            }
            rootIncomingEdges.ForEach(null);
            // Loop until it becomes a tree.
            bool changed = true;

            while (changed)
            {
                // I just want trees to be trees; is that so much to ask!?
                changed = false;
                IList <IndexedWord>       danglingNodes = new List <IndexedWord>();
                IList <SemanticGraphEdge> invalidEdges  = new List <SemanticGraphEdge>();
                foreach (IndexedWord vertex_1 in tree.VertexSet())
                {
                    // Collect statistics
                    IEnumerator <SemanticGraphEdge> incomingIter = tree.IncomingEdgeIterator(vertex_1);
                    bool hasIncoming         = incomingIter.MoveNext();
                    bool hasMultipleIncoming = false;
                    if (hasIncoming)
                    {
                        incomingIter.Current;
                        hasMultipleIncoming = incomingIter.MoveNext();
                    }
                    // Register actions
                    if (!hasIncoming && !tree.GetRoots().Contains(vertex_1))
                    {
                        danglingNodes.Add(vertex_1);
                    }
                    else
                    {
                        if (hasMultipleIncoming)
                        {
                            foreach (SemanticGraphEdge edge in new IterableIterator <SemanticGraphEdge>(incomingIter))
                            {
                                invalidEdges.Add(edge_2);
                            }
                        }
                    }
                }
                // Perform actions
                foreach (IndexedWord vertex_2 in danglingNodes)
                {
                    tree.RemoveVertex(vertex_2);
                    changed = true;
                }
                foreach (SemanticGraphEdge edge_3 in invalidEdges)
                {
                    tree.RemoveEdge(edge_3);
                    changed = true;
                }
            }
            // Edge case: remove duplicate dobj to "that."
            //            This is a common parse error.
            foreach (IndexedWord vertex_3 in tree.VertexSet())
            {
                SemanticGraphEdge thatEdge = null;
                int dobjCount = 0;
                foreach (SemanticGraphEdge edge in tree.OutgoingEdgeIterable(vertex_3))
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase("that", edge_2.GetDependent().Word()))
                    {
                        thatEdge = edge_2;
                    }
                    if ("dobj".Equals(edge_2.GetRelation().ToString()))
                    {
                        dobjCount += 1;
                    }
                }
                if (dobjCount > 1 && thatEdge != null)
                {
                    // Case: there are two dobj edges, one of which goes to the word "that"
                    // Action: rewrite the dobj edge to "that" to be a "mark" edge.
                    tree.RemoveEdge(thatEdge);
                    tree.AddEdge(thatEdge.GetGovernor(), thatEdge.GetDependent(), GrammaticalRelation.ValueOf(thatEdge.GetRelation().GetLanguage(), "mark"), thatEdge.GetWeight(), thatEdge.IsExtra());
                }
            }
            // Return
            System.Diagnostics.Debug.Assert(IsTree(tree));
            return(extraEdges);
        }