/// <summary>Strip away case edges, if the incoming edge is a preposition.</summary> /// <remarks> /// Strip away case edges, if the incoming edge is a preposition. /// This replicates the behavior of the old Stanford dependencies on universal dependencies. /// </remarks> /// <param name="tree">The tree to modify in place.</param> public static void StripPrepCases(SemanticGraph tree) { // Find incoming case edges that have an 'nmod' incoming edge IList <SemanticGraphEdge> toClean = new List <SemanticGraphEdge>(); foreach (SemanticGraphEdge edge in tree.EdgeIterable()) { if ("case".Equals(edge.GetRelation().ToString())) { bool isPrepTarget = false; foreach (SemanticGraphEdge incoming in tree.IncomingEdgeIterable(edge.GetGovernor())) { if ("nmod".Equals(incoming.GetRelation().GetShortName())) { isPrepTarget = true; break; } } if (isPrepTarget && !tree.OutgoingEdgeIterator(edge.GetDependent()).MoveNext()) { toClean.Add(edge); } } } // Delete these edges foreach (SemanticGraphEdge edge_1 in toClean) { tree.RemoveEdge(edge_1); tree.RemoveVertex(edge_1.GetDependent()); System.Diagnostics.Debug.Assert(IsTree(tree)); } }
/// <summary>This creates a new graph based off the given, but uses the existing nodes objects.</summary> public static SemanticGraph DuplicateKeepNodes(SemanticGraph sg) { SemanticGraph retSg = new SemanticGraph(); foreach (IndexedWord node in sg.VertexSet()) { retSg.AddVertex(node); } retSg.SetRoots(sg.GetRoots()); foreach (SemanticGraphEdge edge in sg.EdgeIterable()) { retSg.AddEdge(edge.GetGovernor(), edge.GetDependent(), edge.GetRelation(), edge.GetWeight(), edge.IsExtra()); } return(retSg); }
/// <summary> /// Like makeFromGraphs, but it makes a deep copy of the graphs and /// renumbers the index words. /// </summary> /// <remarks> /// Like makeFromGraphs, but it makes a deep copy of the graphs and /// renumbers the index words. /// <br /> /// <paramref name="lengths"/> /// must be a vector containing the number of /// tokens in each sentence. This is used to reindex the tokens. /// </remarks> public static SemanticGraph DeepCopyFromGraphs(IList <SemanticGraph> graphs, IList <int> lengths) { SemanticGraph newGraph = new SemanticGraph(); IDictionary <int, IndexedWord> newWords = Generics.NewHashMap(); IList <IndexedWord> newRoots = new List <IndexedWord>(); int vertexOffset = 0; for (int i = 0; i < graphs.Count; ++i) { SemanticGraph graph = graphs[i]; foreach (IndexedWord vertex in graph.VertexSet()) { IndexedWord newVertex = new IndexedWord(vertex); newVertex.SetIndex(vertex.Index() + vertexOffset); newGraph.AddVertex(newVertex); newWords[newVertex.Index()] = newVertex; } foreach (SemanticGraphEdge edge in graph.EdgeIterable()) { IndexedWord gov = newWords[edge.GetGovernor().Index() + vertexOffset]; IndexedWord dep = newWords[edge.GetDependent().Index() + vertexOffset]; if (gov == null || dep == null) { throw new AssertionError("Counting problem (or broken edge)"); } newGraph.AddEdge(gov, dep, edge.GetRelation(), edge.GetWeight(), edge.IsExtra()); } foreach (IndexedWord root in graph.GetRoots()) { newRoots.Add(newWords[root.Index() + vertexOffset]); } vertexOffset += lengths[i]; } newGraph.SetRoots(newRoots); return(newGraph); }
/// <summary>Saves all arcs in the graph on two lines: first line contains the vertices, second the edges.</summary> /// <param name="graph"/> /// <param name="pw"/> private static void SaveDependencyGraph(SemanticGraph graph, PrintWriter pw) { if (graph == null) { pw.Println(); pw.Println(); return; } bool outputHeader = false; foreach (IndexedWord node in graph.VertexSet()) { // first line: sentence index for all nodes; we recover the words // from the original tokens the first two tokens in this line // indicate: docid, sentence index if (!outputHeader) { string docId = node.Get(typeof(CoreAnnotations.DocIDAnnotation)); if (docId != null && docId.Length > 0) { pw.Print(docId); } else { pw.Print("-"); } pw.Print("\t"); pw.Print(node.Get(typeof(CoreAnnotations.SentenceIndexAnnotation))); outputHeader = true; } pw.Print("\t"); pw.Print(node.Index()); // CopyAnnotations indicate copied (or virtual nodes) generated due to CCs (see EnglishGrammaticalStructure) // These annotations are usually not set, so print them only if necessary if (node.CopyCount() > 0) { pw.Print("-"); pw.Print(node.CopyCount()); } // System.out.println("FOUND COPY ANNOTATION: " + node.get(CoreAnnotations.CopyAnnotation.class)); if (graph.GetRoots().Contains(node)) { if (node.CopyCount() > 0) { pw.Print("-R"); } else { pw.Print("-0-R"); } } } pw.Println(); // second line: all edges bool first = true; foreach (SemanticGraphEdge edge in graph.EdgeIterable()) { if (!first) { pw.Print("\t"); } string rel = edge.GetRelation().ToString(); // no spaces allowed in the relation name // note that they might occur due to the tokenization of HTML/XML/RDF tags rel = rel.ReplaceAll("\\s+", string.Empty); pw.Print(rel); pw.Print(" "); pw.Print(edge.GetSource().Index()); pw.Print(" "); pw.Print(edge.GetTarget().Index()); if (edge.IsExtra() || edge.GetSource().CopyCount() > 0 || edge.GetTarget().CopyCount() > 0) { pw.Print(" "); pw.Print(edge.IsExtra()); pw.Print(" "); pw.Print(edge.GetSource().CopyCount()); pw.Print(" "); pw.Print(edge.GetTarget().CopyCount()); } first = false; } pw.Println(); }
/// <summary>Fix some bizarre peculiarities with certain trees.</summary> /// <remarks> /// Fix some bizarre peculiarities with certain trees. /// So far, these include: /// <ul> /// <li>Sometimes there's a node from a word to itself. This seems wrong.</li> /// </ul> /// </remarks> /// <param name="tree">The tree to clean (in place!).</param> /// <returns>A list of extra edges, which are valid but were removed.</returns> public static IList <SemanticGraphEdge> CleanTree(SemanticGraph tree) { // assert !isCyclic(tree); // Clean nodes IList <IndexedWord> toDelete = new List <IndexedWord>(); foreach (IndexedWord vertex in tree.VertexSet()) { // Clean punctuation if (vertex.Tag() == null) { continue; } char tag = vertex.BackingLabel().Tag()[0]; if (tag == '.' || tag == ',' || tag == '(' || tag == ')' || tag == ':') { if (!tree.OutgoingEdgeIterator(vertex).MoveNext()) { // This should really never happen, but it does. toDelete.Add(vertex); } } } toDelete.ForEach(null); // Clean edges IEnumerator <SemanticGraphEdge> iter = tree.EdgeIterable().GetEnumerator(); IList <Triple <IndexedWord, IndexedWord, SemanticGraphEdge> > toAdd = new List <Triple <IndexedWord, IndexedWord, SemanticGraphEdge> >(); toDelete.Clear(); while (iter.MoveNext()) { SemanticGraphEdge edge = iter.Current; if (edge.GetDependent().Index() == edge.GetGovernor().Index()) { // Clean up copy-edges if (edge.GetDependent().IsCopy(edge.GetGovernor())) { foreach (SemanticGraphEdge toCopy in tree.OutgoingEdgeIterable(edge.GetDependent())) { toAdd.Add(Triple.MakeTriple(edge.GetGovernor(), toCopy.GetDependent(), toCopy)); } toDelete.Add(edge.GetDependent()); } if (edge.GetGovernor().IsCopy(edge.GetDependent())) { foreach (SemanticGraphEdge toCopy in tree.OutgoingEdgeIterable(edge.GetGovernor())) { toAdd.Add(Triple.MakeTriple(edge.GetDependent(), toCopy.GetDependent(), toCopy)); } toDelete.Add(edge.GetGovernor()); } // Clean self-edges iter.Remove(); } else { if (edge.GetRelation().ToString().Equals("punct")) { // Clean punctuation (again) if (!tree.OutgoingEdgeIterator(edge.GetDependent()).MoveNext()) { // This should really never happen, but it does. iter.Remove(); } } } } // (add edges we wanted to add) toDelete.ForEach(null); foreach (Triple <IndexedWord, IndexedWord, SemanticGraphEdge> edge_1 in toAdd) { tree.AddEdge(edge_1.first, edge_1.second, edge_1.third.GetRelation(), edge_1.third.GetWeight(), edge_1.third.IsExtra()); } // Handle extra edges. // Two cases: // (1) the extra edge is a subj/obj edge and the main edge is a conj:.* // in this case, keep the extra // (2) otherwise, delete the extra IList <SemanticGraphEdge> extraEdges = new List <SemanticGraphEdge>(); foreach (SemanticGraphEdge edge_2 in tree.EdgeIterable()) { if (edge_2.IsExtra()) { IList <SemanticGraphEdge> incomingEdges = tree.IncomingEdgeList(edge_2.GetDependent()); SemanticGraphEdge toKeep = null; foreach (SemanticGraphEdge candidate in incomingEdges) { if (toKeep == null) { toKeep = candidate; } else { if (toKeep.GetRelation().ToString().StartsWith("conj") && candidate.GetRelation().ToString().Matches(".subj.*|.obj.*")) { toKeep = candidate; } else { if (!candidate.IsExtra() && !(candidate.GetRelation().ToString().StartsWith("conj") && toKeep.GetRelation().ToString().Matches(".subj.*|.obj.*"))) { toKeep = candidate; } } } } foreach (SemanticGraphEdge candidate_1 in incomingEdges) { if (candidate_1 != toKeep) { extraEdges.Add(candidate_1); } } } } extraEdges.ForEach(null); // Add apposition edges (simple coref) foreach (SemanticGraphEdge extraEdge in new List <SemanticGraphEdge>(extraEdges)) { // note[gabor] prevent concurrent modification exception foreach (SemanticGraphEdge candidateAppos in tree.IncomingEdgeIterable(extraEdge.GetDependent())) { if (candidateAppos.GetRelation().ToString().Equals("appos")) { extraEdges.Add(new SemanticGraphEdge(extraEdge.GetGovernor(), candidateAppos.GetGovernor(), extraEdge.GetRelation(), extraEdge.GetWeight(), extraEdge.IsExtra())); } } foreach (SemanticGraphEdge candidateAppos_1 in tree.OutgoingEdgeIterable(extraEdge.GetDependent())) { if (candidateAppos_1.GetRelation().ToString().Equals("appos")) { extraEdges.Add(new SemanticGraphEdge(extraEdge.GetGovernor(), candidateAppos_1.GetDependent(), extraEdge.GetRelation(), extraEdge.GetWeight(), extraEdge.IsExtra())); } } } // Brute force ensure tree // Remove incoming edges from roots IList <SemanticGraphEdge> rootIncomingEdges = new List <SemanticGraphEdge>(); foreach (IndexedWord root in tree.GetRoots()) { foreach (SemanticGraphEdge incomingEdge in tree.IncomingEdgeIterable(root)) { rootIncomingEdges.Add(incomingEdge); } } rootIncomingEdges.ForEach(null); // Loop until it becomes a tree. bool changed = true; while (changed) { // I just want trees to be trees; is that so much to ask!? changed = false; IList <IndexedWord> danglingNodes = new List <IndexedWord>(); IList <SemanticGraphEdge> invalidEdges = new List <SemanticGraphEdge>(); foreach (IndexedWord vertex_1 in tree.VertexSet()) { // Collect statistics IEnumerator <SemanticGraphEdge> incomingIter = tree.IncomingEdgeIterator(vertex_1); bool hasIncoming = incomingIter.MoveNext(); bool hasMultipleIncoming = false; if (hasIncoming) { incomingIter.Current; hasMultipleIncoming = incomingIter.MoveNext(); } // Register actions if (!hasIncoming && !tree.GetRoots().Contains(vertex_1)) { danglingNodes.Add(vertex_1); } else { if (hasMultipleIncoming) { foreach (SemanticGraphEdge edge in new IterableIterator <SemanticGraphEdge>(incomingIter)) { invalidEdges.Add(edge_2); } } } } // Perform actions foreach (IndexedWord vertex_2 in danglingNodes) { tree.RemoveVertex(vertex_2); changed = true; } foreach (SemanticGraphEdge edge_3 in invalidEdges) { tree.RemoveEdge(edge_3); changed = true; } } // Edge case: remove duplicate dobj to "that." // This is a common parse error. foreach (IndexedWord vertex_3 in tree.VertexSet()) { SemanticGraphEdge thatEdge = null; int dobjCount = 0; foreach (SemanticGraphEdge edge in tree.OutgoingEdgeIterable(vertex_3)) { if (Sharpen.Runtime.EqualsIgnoreCase("that", edge_2.GetDependent().Word())) { thatEdge = edge_2; } if ("dobj".Equals(edge_2.GetRelation().ToString())) { dobjCount += 1; } } if (dobjCount > 1 && thatEdge != null) { // Case: there are two dobj edges, one of which goes to the word "that" // Action: rewrite the dobj edge to "that" to be a "mark" edge. tree.RemoveEdge(thatEdge); tree.AddEdge(thatEdge.GetGovernor(), thatEdge.GetDependent(), GrammaticalRelation.ValueOf(thatEdge.GetRelation().GetLanguage(), "mark"), thatEdge.GetWeight(), thatEdge.IsExtra()); } } // Return System.Diagnostics.Debug.Assert(IsTree(tree)); return(extraEdges); }