public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm) { IndexedWord rootNode = this.GetNamedNode(rootName, sm); ICollection <IndexedWord> subgraphNodeSet = sg.GetSubgraphVertices(rootNode); if (!sg.IsDag(rootNode)) { /* Check if there is a cycle going back to the root. */ foreach (IndexedWord child in sg.GetChildren(rootNode)) { ICollection <IndexedWord> reachableSet = sg.GetSubgraphVertices(child); if (reachableSet.Contains(rootNode)) { throw new ArgumentException("Subtree cannot contain cycle leading back to root node!"); } } } IList <IndexedWord> sortedSubgraphNodes = Generics.NewArrayList(subgraphNodeSet); sortedSubgraphNodes.Sort(); IndexedWord newNode = new IndexedWord(rootNode.DocID(), rootNode.SentIndex(), rootNode.Index()); /* Copy all attributes from rootNode. */ foreach (Type key in newNode.BackingLabel().KeySet()) { newNode.Set(key, rootNode.Get(key)); } newNode.SetValue(StringUtils.Join(sortedSubgraphNodes.Stream().Map(null), " ")); newNode.SetWord(StringUtils.Join(sortedSubgraphNodes.Stream().Map(null), " ")); newNode.SetLemma(StringUtils.Join(sortedSubgraphNodes.Stream().Map(null), " ")); if (sg.GetRoots().Contains(rootNode)) { sg.GetRoots().Remove(rootNode); sg.AddRoot(rootNode); } foreach (SemanticGraphEdge edge in sg.IncomingEdgeIterable(rootNode)) { sg.AddEdge(edge.GetGovernor(), newNode, edge.GetRelation(), edge.GetWeight(), edge.IsExtra()); } foreach (IndexedWord node in sortedSubgraphNodes) { sg.RemoveVertex(node); } }
/// <exception cref="System.Exception"/> public static ICollection <IndexedWord> Descendants(SemanticGraph g, IndexedWord vertex, IList <string> allCutOffRels, IList <IndexedWord> doNotAddThese, bool ignoreCommonTags, IPredicate <CoreLabel> acceptWord, CollectionValuedMap <int, string> feat) { // Do a depth first search ICollection <IndexedWord> descendantSet = new HashSet <IndexedWord>(); if (doNotAddThese != null && doNotAddThese.Contains(vertex)) { return(descendantSet); } if (!acceptWord.Test(vertex.BackingLabel())) { return(descendantSet); } DescendantsHelper(g, vertex, descendantSet, allCutOffRels, doNotAddThese, new List <IndexedWord>(), ignoreCommonTags, acceptWord, feat); // String descStr = ""; // for(IndexedWord descendant: descendantSet){ // descStr += descendant.word()+" "; // } // System.out.println(descStr); return(descendantSet); }
/// <exception cref="System.Exception"/> private static void DescendantsHelper(SemanticGraph g, IndexedWord curr, ICollection <IndexedWord> descendantSet, IList <string> allCutOffRels, IList <IndexedWord> doNotAddThese, IList <IndexedWord> seenNodes, bool ignoreCommonTags, IPredicate <CoreLabel > acceptWord, CollectionValuedMap <int, string> feat) { if (seenNodes.Contains(curr)) { return; } seenNodes.Add(curr); if (descendantSet.Contains(curr) || (doNotAddThese != null && doNotAddThese.Contains(curr)) || !acceptWord.Test(curr.BackingLabel())) { return; } if (!ignoreCommonTags || !ignoreTags.Contains(curr.Tag().Trim())) { descendantSet.Add(curr); } foreach (IndexedWord child in g.GetChildren(curr)) { bool dontuse = false; if (doNotAddThese != null && doNotAddThese.Contains(child)) { dontuse = true; } GrammaticalRelation rel = null; if (dontuse == false) { rel = g.Reln(curr, child); dontuse = CheckIfSatisfiesRelConstrains(g, curr, child, allCutOffRels, rel); } if (dontuse == false) { foreach (string cutOffTagRegex in cutoffTags) { if (child.Tag().Matches(cutOffTagRegex)) { if (Debug >= 5) { System.Console.Out.WriteLine("ignored tag " + child + " because it satisfied " + cutOffTagRegex); } dontuse = true; break; } } } if (dontuse == false) { if (!feat.Contains(curr.Index())) { feat[curr.Index()] = new List <string>(); } GetPatternsFromDataMultiClass.GetFeatures(g, curr, false, feat[curr.Index()], rel); //feat.add(curr.index(), "REL-" + rel.getShortName()); DescendantsHelper(g, child, descendantSet, allCutOffRels, doNotAddThese, seenNodes, ignoreCommonTags, acceptWord, feat); } } }
//using quote-removed depparses public virtual void DependencyParses(Annotation doc) { IList <ICoreMap> quotes = doc.Get(typeof(CoreAnnotations.QuotationsAnnotation)); IList <CoreLabel> tokens = doc.Get(typeof(CoreAnnotations.TokensAnnotation)); IList <ICoreMap> sentences = doc.Get(typeof(CoreAnnotations.SentencesAnnotation)); foreach (ICoreMap quote in quotes) { if (quote.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)) != null) { continue; } Pair <int, int> range = QuoteAttributionUtils.GetRemainderInSentence(doc, quote); if (range == null) { continue; } //search for mentions in the first run Pair <List <string>, List <Pair <int, int> > > namesAndNameIndices = ScanForNames(range); List <string> names = namesAndNameIndices.first; List <Pair <int, int> > nameIndices = namesAndNameIndices.second; SemanticGraph graph = quote.Get(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation)); SemgrexMatcher matcher = subjVerbPattern.Matcher(graph); IList <Pair <IndexedWord, IndexedWord> > subjVerbPairs = new List <Pair <IndexedWord, IndexedWord> >(); //TODO: check and see if this is necessary while (matcher.Find()) { IndexedWord subj = matcher.GetNode("SUBJ"); IndexedWord verb = matcher.GetNode("VERB"); subjVerbPairs.Add(new Pair <IndexedWord, IndexedWord>(subj, verb)); } IList <IndexedWord> vbs = graph.GetAllNodesByPartOfSpeechPattern("VB.*"); foreach (IndexedWord iw in vbs) { // does it have an nsubj child? ICollection <IndexedWord> children = graph.GetChildren(iw); IList <IndexedWord> deps = Generics.NewArrayList(); IndexedWord nsubj = null; foreach (IndexedWord child in children) { SemanticGraphEdge sge = graph.GetEdge(iw, child); if (sge.GetRelation().GetShortName().Equals("dep") && child.Tag().StartsWith("VB")) { deps.Add(child); } else { if (sge.GetRelation().GetShortName().Equals("nsubj")) { nsubj = child; } } } if (nsubj != null) { foreach (IndexedWord dep in deps) { subjVerbPairs.Add(new Pair(nsubj, dep)); } } } //look for a speech verb foreach (Pair <IndexedWord, IndexedWord> SVPair in subjVerbPairs) { IndexedWord verb = SVPair.second; IndexedWord subj = SVPair.first; //check if subj and verb outside of quote int verbTokPos = TokenToLocation(verb.BackingLabel()); int subjTokPos = TokenToLocation(verb.BackingLabel()); if (InRange(range, verbTokPos) && InRange(range, subjTokPos) && commonSpeechWords.Contains(verb.Lemma())) { if (subj.Tag().Equals("NNP")) { int startChar = subj.BeginPosition(); for (int i = 0; i < names.Count; i++) { Pair <int, int> nameIndex = nameIndices[i]; //avoid names that don't actually exist in if (RangeContainsCharIndex(nameIndex, startChar)) { FillInMention(quote, TokenRangeToString(nameIndex), nameIndex.first, nameIndex.second, sieveName, Name); break; } } } else { if (subj.Tag().Equals("PRP")) { int loc = TokenToLocation(subj.BackingLabel()); FillInMention(quote, subj.Word(), loc, loc, sieveName, Pronoun); break; } else { if (subj.Tag().Equals("NN") && animacySet.Contains(subj.Word())) { int loc = TokenToLocation(subj.BackingLabel()); FillInMention(quote, subj.Word(), loc, loc, sieveName, AnimateNoun); break; } } } } } } }