Exemple #1
0
        /// <summary>Strip away case edges, if the incoming edge is a preposition.</summary>
        /// <remarks>
        /// Strip away case edges, if the incoming edge is a preposition.
        /// This replicates the behavior of the old Stanford dependencies on universal dependencies.
        /// </remarks>
        /// <param name="tree">The tree to modify in place.</param>
        public static void StripPrepCases(SemanticGraph tree)
        {
            // Find incoming case edges that have an 'nmod' incoming edge
            IList <SemanticGraphEdge> toClean = new List <SemanticGraphEdge>();

            foreach (SemanticGraphEdge edge in tree.EdgeIterable())
            {
                if ("case".Equals(edge.GetRelation().ToString()))
                {
                    bool isPrepTarget = false;
                    foreach (SemanticGraphEdge incoming in tree.IncomingEdgeIterable(edge.GetGovernor()))
                    {
                        if ("nmod".Equals(incoming.GetRelation().GetShortName()))
                        {
                            isPrepTarget = true;
                            break;
                        }
                    }
                    if (isPrepTarget && !tree.OutgoingEdgeIterator(edge.GetDependent()).MoveNext())
                    {
                        toClean.Add(edge);
                    }
                }
            }
            // Delete these edges
            foreach (SemanticGraphEdge edge_1 in toClean)
            {
                tree.RemoveEdge(edge_1);
                tree.RemoveVertex(edge_1.GetDependent());
                System.Diagnostics.Debug.Assert(IsTree(tree));
            }
        }
        /// <summary>This creates a new graph based off the given, but uses the existing nodes objects.</summary>
        public static SemanticGraph DuplicateKeepNodes(SemanticGraph sg)
        {
            SemanticGraph retSg = new SemanticGraph();

            foreach (IndexedWord node in sg.VertexSet())
            {
                retSg.AddVertex(node);
            }
            retSg.SetRoots(sg.GetRoots());
            foreach (SemanticGraphEdge edge in sg.EdgeIterable())
            {
                retSg.AddEdge(edge.GetGovernor(), edge.GetDependent(), edge.GetRelation(), edge.GetWeight(), edge.IsExtra());
            }
            return(retSg);
        }
        /// <summary>
        /// Like makeFromGraphs, but it makes a deep copy of the graphs and
        /// renumbers the index words.
        /// </summary>
        /// <remarks>
        /// Like makeFromGraphs, but it makes a deep copy of the graphs and
        /// renumbers the index words.
        /// <br />
        /// <paramref name="lengths"/>
        /// must be a vector containing the number of
        /// tokens in each sentence.  This is used to reindex the tokens.
        /// </remarks>
        public static SemanticGraph DeepCopyFromGraphs(IList <SemanticGraph> graphs, IList <int> lengths)
        {
            SemanticGraph newGraph = new SemanticGraph();
            IDictionary <int, IndexedWord> newWords = Generics.NewHashMap();
            IList <IndexedWord>            newRoots = new List <IndexedWord>();
            int vertexOffset = 0;

            for (int i = 0; i < graphs.Count; ++i)
            {
                SemanticGraph graph = graphs[i];
                foreach (IndexedWord vertex in graph.VertexSet())
                {
                    IndexedWord newVertex = new IndexedWord(vertex);
                    newVertex.SetIndex(vertex.Index() + vertexOffset);
                    newGraph.AddVertex(newVertex);
                    newWords[newVertex.Index()] = newVertex;
                }
                foreach (SemanticGraphEdge edge in graph.EdgeIterable())
                {
                    IndexedWord gov = newWords[edge.GetGovernor().Index() + vertexOffset];
                    IndexedWord dep = newWords[edge.GetDependent().Index() + vertexOffset];
                    if (gov == null || dep == null)
                    {
                        throw new AssertionError("Counting problem (or broken edge)");
                    }
                    newGraph.AddEdge(gov, dep, edge.GetRelation(), edge.GetWeight(), edge.IsExtra());
                }
                foreach (IndexedWord root in graph.GetRoots())
                {
                    newRoots.Add(newWords[root.Index() + vertexOffset]);
                }
                vertexOffset += lengths[i];
            }
            newGraph.SetRoots(newRoots);
            return(newGraph);
        }
        /// <summary>Saves all arcs in the graph on two lines: first line contains the vertices, second the edges.</summary>
        /// <param name="graph"/>
        /// <param name="pw"/>
        private static void SaveDependencyGraph(SemanticGraph graph, PrintWriter pw)
        {
            if (graph == null)
            {
                pw.Println();
                pw.Println();
                return;
            }
            bool outputHeader = false;

            foreach (IndexedWord node in graph.VertexSet())
            {
                // first line: sentence index for all nodes; we recover the words
                // from the original tokens the first two tokens in this line
                // indicate: docid, sentence index
                if (!outputHeader)
                {
                    string docId = node.Get(typeof(CoreAnnotations.DocIDAnnotation));
                    if (docId != null && docId.Length > 0)
                    {
                        pw.Print(docId);
                    }
                    else
                    {
                        pw.Print("-");
                    }
                    pw.Print("\t");
                    pw.Print(node.Get(typeof(CoreAnnotations.SentenceIndexAnnotation)));
                    outputHeader = true;
                }
                pw.Print("\t");
                pw.Print(node.Index());
                // CopyAnnotations indicate copied (or virtual nodes) generated due to CCs (see EnglishGrammaticalStructure)
                // These annotations are usually not set, so print them only if necessary
                if (node.CopyCount() > 0)
                {
                    pw.Print("-");
                    pw.Print(node.CopyCount());
                }
                // System.out.println("FOUND COPY ANNOTATION: " + node.get(CoreAnnotations.CopyAnnotation.class));
                if (graph.GetRoots().Contains(node))
                {
                    if (node.CopyCount() > 0)
                    {
                        pw.Print("-R");
                    }
                    else
                    {
                        pw.Print("-0-R");
                    }
                }
            }
            pw.Println();
            // second line: all edges
            bool first = true;

            foreach (SemanticGraphEdge edge in graph.EdgeIterable())
            {
                if (!first)
                {
                    pw.Print("\t");
                }
                string rel = edge.GetRelation().ToString();
                // no spaces allowed in the relation name
                // note that they might occur due to the tokenization of HTML/XML/RDF tags
                rel = rel.ReplaceAll("\\s+", string.Empty);
                pw.Print(rel);
                pw.Print(" ");
                pw.Print(edge.GetSource().Index());
                pw.Print(" ");
                pw.Print(edge.GetTarget().Index());
                if (edge.IsExtra() || edge.GetSource().CopyCount() > 0 || edge.GetTarget().CopyCount() > 0)
                {
                    pw.Print(" ");
                    pw.Print(edge.IsExtra());
                    pw.Print(" ");
                    pw.Print(edge.GetSource().CopyCount());
                    pw.Print(" ");
                    pw.Print(edge.GetTarget().CopyCount());
                }
                first = false;
            }
            pw.Println();
        }
Exemple #5
0
        /// <summary>Fix some bizarre peculiarities with certain trees.</summary>
        /// <remarks>
        /// Fix some bizarre peculiarities with certain trees.
        /// So far, these include:
        /// <ul>
        /// <li>Sometimes there's a node from a word to itself. This seems wrong.</li>
        /// </ul>
        /// </remarks>
        /// <param name="tree">The tree to clean (in place!).</param>
        /// <returns>A list of extra edges, which are valid but were removed.</returns>
        public static IList <SemanticGraphEdge> CleanTree(SemanticGraph tree)
        {
            //    assert !isCyclic(tree);
            // Clean nodes
            IList <IndexedWord> toDelete = new List <IndexedWord>();

            foreach (IndexedWord vertex in tree.VertexSet())
            {
                // Clean punctuation
                if (vertex.Tag() == null)
                {
                    continue;
                }
                char tag = vertex.BackingLabel().Tag()[0];
                if (tag == '.' || tag == ',' || tag == '(' || tag == ')' || tag == ':')
                {
                    if (!tree.OutgoingEdgeIterator(vertex).MoveNext())
                    {
                        // This should really never happen, but it does.
                        toDelete.Add(vertex);
                    }
                }
            }
            toDelete.ForEach(null);
            // Clean edges
            IEnumerator <SemanticGraphEdge> iter = tree.EdgeIterable().GetEnumerator();
            IList <Triple <IndexedWord, IndexedWord, SemanticGraphEdge> > toAdd = new List <Triple <IndexedWord, IndexedWord, SemanticGraphEdge> >();

            toDelete.Clear();
            while (iter.MoveNext())
            {
                SemanticGraphEdge edge = iter.Current;
                if (edge.GetDependent().Index() == edge.GetGovernor().Index())
                {
                    // Clean up copy-edges
                    if (edge.GetDependent().IsCopy(edge.GetGovernor()))
                    {
                        foreach (SemanticGraphEdge toCopy in tree.OutgoingEdgeIterable(edge.GetDependent()))
                        {
                            toAdd.Add(Triple.MakeTriple(edge.GetGovernor(), toCopy.GetDependent(), toCopy));
                        }
                        toDelete.Add(edge.GetDependent());
                    }
                    if (edge.GetGovernor().IsCopy(edge.GetDependent()))
                    {
                        foreach (SemanticGraphEdge toCopy in tree.OutgoingEdgeIterable(edge.GetGovernor()))
                        {
                            toAdd.Add(Triple.MakeTriple(edge.GetDependent(), toCopy.GetDependent(), toCopy));
                        }
                        toDelete.Add(edge.GetGovernor());
                    }
                    // Clean self-edges
                    iter.Remove();
                }
                else
                {
                    if (edge.GetRelation().ToString().Equals("punct"))
                    {
                        // Clean punctuation (again)
                        if (!tree.OutgoingEdgeIterator(edge.GetDependent()).MoveNext())
                        {
                            // This should really never happen, but it does.
                            iter.Remove();
                        }
                    }
                }
            }
            // (add edges we wanted to add)
            toDelete.ForEach(null);
            foreach (Triple <IndexedWord, IndexedWord, SemanticGraphEdge> edge_1 in toAdd)
            {
                tree.AddEdge(edge_1.first, edge_1.second, edge_1.third.GetRelation(), edge_1.third.GetWeight(), edge_1.third.IsExtra());
            }
            // Handle extra edges.
            // Two cases:
            // (1) the extra edge is a subj/obj edge and the main edge is a conj:.*
            //     in this case, keep the extra
            // (2) otherwise, delete the extra
            IList <SemanticGraphEdge> extraEdges = new List <SemanticGraphEdge>();

            foreach (SemanticGraphEdge edge_2 in tree.EdgeIterable())
            {
                if (edge_2.IsExtra())
                {
                    IList <SemanticGraphEdge> incomingEdges = tree.IncomingEdgeList(edge_2.GetDependent());
                    SemanticGraphEdge         toKeep        = null;
                    foreach (SemanticGraphEdge candidate in incomingEdges)
                    {
                        if (toKeep == null)
                        {
                            toKeep = candidate;
                        }
                        else
                        {
                            if (toKeep.GetRelation().ToString().StartsWith("conj") && candidate.GetRelation().ToString().Matches(".subj.*|.obj.*"))
                            {
                                toKeep = candidate;
                            }
                            else
                            {
                                if (!candidate.IsExtra() && !(candidate.GetRelation().ToString().StartsWith("conj") && toKeep.GetRelation().ToString().Matches(".subj.*|.obj.*")))
                                {
                                    toKeep = candidate;
                                }
                            }
                        }
                    }
                    foreach (SemanticGraphEdge candidate_1 in incomingEdges)
                    {
                        if (candidate_1 != toKeep)
                        {
                            extraEdges.Add(candidate_1);
                        }
                    }
                }
            }
            extraEdges.ForEach(null);
            // Add apposition edges (simple coref)
            foreach (SemanticGraphEdge extraEdge in new List <SemanticGraphEdge>(extraEdges))
            {
                // note[gabor] prevent concurrent modification exception
                foreach (SemanticGraphEdge candidateAppos in tree.IncomingEdgeIterable(extraEdge.GetDependent()))
                {
                    if (candidateAppos.GetRelation().ToString().Equals("appos"))
                    {
                        extraEdges.Add(new SemanticGraphEdge(extraEdge.GetGovernor(), candidateAppos.GetGovernor(), extraEdge.GetRelation(), extraEdge.GetWeight(), extraEdge.IsExtra()));
                    }
                }
                foreach (SemanticGraphEdge candidateAppos_1 in tree.OutgoingEdgeIterable(extraEdge.GetDependent()))
                {
                    if (candidateAppos_1.GetRelation().ToString().Equals("appos"))
                    {
                        extraEdges.Add(new SemanticGraphEdge(extraEdge.GetGovernor(), candidateAppos_1.GetDependent(), extraEdge.GetRelation(), extraEdge.GetWeight(), extraEdge.IsExtra()));
                    }
                }
            }
            // Brute force ensure tree
            // Remove incoming edges from roots
            IList <SemanticGraphEdge> rootIncomingEdges = new List <SemanticGraphEdge>();

            foreach (IndexedWord root in tree.GetRoots())
            {
                foreach (SemanticGraphEdge incomingEdge in tree.IncomingEdgeIterable(root))
                {
                    rootIncomingEdges.Add(incomingEdge);
                }
            }
            rootIncomingEdges.ForEach(null);
            // Loop until it becomes a tree.
            bool changed = true;

            while (changed)
            {
                // I just want trees to be trees; is that so much to ask!?
                changed = false;
                IList <IndexedWord>       danglingNodes = new List <IndexedWord>();
                IList <SemanticGraphEdge> invalidEdges  = new List <SemanticGraphEdge>();
                foreach (IndexedWord vertex_1 in tree.VertexSet())
                {
                    // Collect statistics
                    IEnumerator <SemanticGraphEdge> incomingIter = tree.IncomingEdgeIterator(vertex_1);
                    bool hasIncoming         = incomingIter.MoveNext();
                    bool hasMultipleIncoming = false;
                    if (hasIncoming)
                    {
                        incomingIter.Current;
                        hasMultipleIncoming = incomingIter.MoveNext();
                    }
                    // Register actions
                    if (!hasIncoming && !tree.GetRoots().Contains(vertex_1))
                    {
                        danglingNodes.Add(vertex_1);
                    }
                    else
                    {
                        if (hasMultipleIncoming)
                        {
                            foreach (SemanticGraphEdge edge in new IterableIterator <SemanticGraphEdge>(incomingIter))
                            {
                                invalidEdges.Add(edge_2);
                            }
                        }
                    }
                }
                // Perform actions
                foreach (IndexedWord vertex_2 in danglingNodes)
                {
                    tree.RemoveVertex(vertex_2);
                    changed = true;
                }
                foreach (SemanticGraphEdge edge_3 in invalidEdges)
                {
                    tree.RemoveEdge(edge_3);
                    changed = true;
                }
            }
            // Edge case: remove duplicate dobj to "that."
            //            This is a common parse error.
            foreach (IndexedWord vertex_3 in tree.VertexSet())
            {
                SemanticGraphEdge thatEdge = null;
                int dobjCount = 0;
                foreach (SemanticGraphEdge edge in tree.OutgoingEdgeIterable(vertex_3))
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase("that", edge_2.GetDependent().Word()))
                    {
                        thatEdge = edge_2;
                    }
                    if ("dobj".Equals(edge_2.GetRelation().ToString()))
                    {
                        dobjCount += 1;
                    }
                }
                if (dobjCount > 1 && thatEdge != null)
                {
                    // Case: there are two dobj edges, one of which goes to the word "that"
                    // Action: rewrite the dobj edge to "that" to be a "mark" edge.
                    tree.RemoveEdge(thatEdge);
                    tree.AddEdge(thatEdge.GetGovernor(), thatEdge.GetDependent(), GrammaticalRelation.ValueOf(thatEdge.GetRelation().GetLanguage(), "mark"), thatEdge.GetWeight(), thatEdge.IsExtra());
                }
            }
            // Return
            System.Diagnostics.Debug.Assert(IsTree(tree));
            return(extraEdges);
        }