/// <summary>Extracts features from relative and interrogative pronouns.</summary> private static Dictionary <string, string> GetRelAndIntPronFeatures(SemanticGraph sg, IndexedWord word) { Dictionary <string, string> features = new Dictionary <string, string>(); if (word.Tag().StartsWith("W")) { bool isRel = false; IndexedWord parent = sg.GetParent(word); if (parent != null) { IndexedWord parentParent = sg.GetParent(parent); if (parentParent != null) { SemanticGraphEdge edge = sg.GetEdge(parentParent, parent); isRel = edge.GetRelation().Equals(UniversalEnglishGrammaticalRelations.RelativeClauseModifier); } } if (isRel) { features["PronType"] = "Rel"; } else { if (Sharpen.Runtime.EqualsIgnoreCase(word.Value(), "that")) { features["PronType"] = "Dem"; } else { features["PronType"] = "Int"; } } } return(features); }
/// <summary>Determine the case of the pronoun "you" or "it".</summary> private static string PronounCase(SemanticGraph sg, IndexedWord word) { word = sg.GetNodeByIndex(word.Index()); IndexedWord parent = sg.GetParent(word); if (parent != null) { SemanticGraphEdge edge = sg.GetEdge(parent, word); if (edge != null) { if (UniversalEnglishGrammaticalRelations.Object.IsAncestor(edge.GetRelation())) { /* "you" is an object. */ return("Acc"); } else { if (UniversalEnglishGrammaticalRelations.NominalModifier.IsAncestor(edge.GetRelation()) || edge.GetRelation() == GrammaticalRelation.Root) { if (sg.HasChildWithReln(word, UniversalEnglishGrammaticalRelations.CaseMarker)) { /* "you" is the head of a prepositional phrase. */ return("Acc"); } } } } } return("Nom"); }
private static Dictionary <string, string> GetGraphFeatures(SemanticGraph sg, IndexedWord word) { Dictionary <string, string> features = new Dictionary <string, string>(); /* Determine the case of "you". */ if (word.Tag().Equals("PRP") && (Sharpen.Runtime.EqualsIgnoreCase(word.Value(), "you") || Sharpen.Runtime.EqualsIgnoreCase(word.Value(), "it"))) { features["Case"] = PronounCase(sg, word); } /* Determine the person of "was". */ if (word.Tag().Equals("VBD") && Sharpen.Runtime.EqualsIgnoreCase(word.Value(), "was")) { string person = WasPerson(sg, word); if (person != null) { features["Person"] = person; } } /* Determine features of relative and interrogative pronouns. */ features.PutAll(GetRelAndIntPronFeatures(sg, word)); /* Determine features of gerunds and present participles. */ if (word.Tag().Equals("VBG")) { if (HasBeAux(sg, word)) { features["VerbForm"] = "Part"; features["Tense"] = "Pres"; } else { features["VerbForm"] = "Ger"; } } /* Determine whether reflexive pronoun is reflexive or intensive. */ if (word.Value().Matches(SelfRegex) && word.Tag().Equals("PRP")) { IndexedWord parent = sg.GetParent(word); if (parent != null) { SemanticGraphEdge edge = sg.GetEdge(parent, word); if (edge.GetRelation() != UniversalEnglishGrammaticalRelations.NpAdverbialModifier) { features["Case"] = "Acc"; features["Reflex"] = "Yes"; } } } /* Voice feature. */ if (word.Tag().Equals("VBN")) { if (sg.HasChildWithReln(word, UniversalEnglishGrammaticalRelations.AuxPassiveModifier)) { features["Voice"] = "Pass"; } } return(features); }
public virtual SimpleMatrix GetMentionEmbeddings(Mention m, SimpleMatrix docEmbedding) { IEnumerator <SemanticGraphEdge> depIterator = m.enhancedDependency.IncomingEdgeIterator(m.headIndexedWord); SemanticGraphEdge depRelation = depIterator.MoveNext() ? depIterator.Current : null; return(NeuralUtils.Concatenate(GetAverageEmbedding(m.sentenceWords, m.startIndex, m.endIndex), GetAverageEmbedding(m.sentenceWords, m.startIndex - 5, m.startIndex), GetAverageEmbedding(m.sentenceWords, m.endIndex, m.endIndex + 5), GetAverageEmbedding (m.sentenceWords.SubList(0, m.sentenceWords.Count - 1)), docEmbedding, GetWordEmbedding(m.sentenceWords, m.headIndex), GetWordEmbedding(m.sentenceWords, m.startIndex), GetWordEmbedding(m.sentenceWords, m.endIndex - 1), GetWordEmbedding(m.sentenceWords , m.startIndex - 1), GetWordEmbedding(m.sentenceWords, m.endIndex), GetWordEmbedding(m.sentenceWords, m.startIndex - 2), GetWordEmbedding(m.sentenceWords, m.endIndex + 1), GetWordEmbedding(depRelation == null ? null : depRelation.GetSource( ).Word()))); }
public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm) { string relation = sm.GetRelnString(edgeName); IndexedWord govNode = GetNamedNode(govName, sm); IndexedWord depNode = GetNamedNode(depName, sm); SemanticGraphEdge edge = sg.GetEdge(govNode, depNode, GrammaticalRelation.ValueOf(relation)); if (edge != null) { sg.RemoveEdge(edge); } }
public virtual double ObjDeletionProbability(SemanticGraphEdge edge, IEnumerable <SemanticGraphEdge> neighbors) { // Get information about the neighbors // (in a totally not-creepy-stalker sort of way) Optional <string> subj = Optional.Empty(); Optional <string> pp = Optional.Empty(); foreach (SemanticGraphEdge neighbor in neighbors) { if (neighbor != edge) { string neighborRel = neighbor.GetRelation().ToString(); if (neighborRel.Contains("subj")) { subj = Optional.Of(neighbor.GetDependent().OriginalText().ToLower()); } if (neighborRel.Contains("prep")) { pp = Optional.Of(neighborRel); } if (neighborRel.Contains("obj")) { return(1.0); } } } // allow deleting second object string obj = edge.GetDependent().OriginalText().ToLower(); string verb = edge.GetGovernor().OriginalText().ToLower(); // Compute the most informative drop probability we can double rawScore = null; if (subj.IsPresent()) { if (pp.IsPresent()) { // Case: subj+obj rawScore = verbSubjPPObjAffinity[Quadruple.MakeQuadruple(verb, subj.Get(), pp.Get(), obj)]; } } if (rawScore == null) { rawScore = verbObjAffinity[verb]; } if (rawScore == null) { return(DeletionProbability(edge.GetRelation().ToString())); } else { return(1.0 - Math.Min(1.0, rawScore / upperProbabilityCap)); } }
private void ExtractPronounForHeadword(IndexedWord headword, SemanticGraph dep, ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet) { IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation)); SemanticGraph basic = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); SemanticGraph enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)); if (enhanced == null) { enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); } int beginIdx = headword.Index() - 1; int endIdx = headword.Index(); // handle "you all", "they both" etc if (sent.Count > headword.Index() && sent[headword.Index()].Word().Matches("all|both")) { IndexedWord c = dep.GetNodeByIndex(headword.Index() + 1); SemanticGraphEdge edge = dep.GetEdge(headword, c); if (edge != null) { endIdx++; } } IntPair mSpan = new IntPair(beginIdx, endIdx); if (!mentionSpanSet.Contains(mSpan) && (!InsideNE(mSpan, namedEntitySpanSet))) { int dummyMentionId = -1; Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basic, enhanced, new List <CoreLabel>(sent.SubList(beginIdx, endIdx))); m.headIndex = headword.Index() - 1; m.headWord = sent[m.headIndex]; m.headString = m.headWord.Word().ToLower(Locale.English); mentions.Add(m); mentionSpanSet.Add(mSpan); } // when pronoun is a part of conjunction (e.g., you and I) ICollection <IndexedWord> conjChildren = dep.GetChildrenWithReln(headword, UniversalEnglishGrammaticalRelations.Conjunct); if (conjChildren.Count > 0) { IntPair npSpan = GetNPSpan(headword, dep, sent); beginIdx = npSpan.Get(0); endIdx = npSpan.Get(1) + 1; if (",".Equals(sent[endIdx - 1].Word())) { endIdx--; } // try not to have span that ends with , AddMention(beginIdx, endIdx, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced); } }
/// <summary>Return a new <see cref="ParseResult"/> constructed from <paramref name="annotation"/></summary> internal ParseResult(Annotation annotation) { java.util.AbstractList sentences = annotation.get(SentencesAnnotationClass) as java.util.AbstractList; CoreMap sentence = sentences.get(0) as CoreMap; LabeledScoredTreeNode constituencyParse = sentence.get(TreeAnnotationClass) as LabeledScoredTreeNode; // Skip the ROOT Tree childOfRoot = constituencyParse.firstChild(); Constituents = childOfRoot; Constituents.indexLeaves(); // Build the collection of tokens var parsedTokens = sentence.get(TokensAnnotationClass) as java.util.AbstractList; var mentions = sentence.get(MentionsAnnotationClass); for (int tokenIndex = 0; tokenIndex < parsedTokens.size(); tokenIndex++) { CoreLabel source = parsedTokens.get(tokenIndex) as CoreLabel; var tokenMentions = source.get(MentionTokenAnnotationClass); var tokenGender = source.get(GenderAnnotationClass); Tokens.Add(new ParseToken { Index = source.index(), Word = source.word(), Lemma = source.lemma(), PartOfSpeech = source.get(PartOfSpeechAnnotationClass) as string, NamedEntityClass = source.get(NamedEntityTagAnnotationClass) as string, }); } // Create the list of dependencies between tokens SemanticGraph dependencyGraph = sentence.get(DependencyAnnotationClass) as SemanticGraph; //java.util.List dependencies = dependencyGraph.edgeListSorted(); java.util.Iterator dependencyGraphEdges = dependencyGraph.edgeIterable().iterator(); while (dependencyGraphEdges.hasNext()) { SemanticGraphEdge edge = dependencyGraphEdges.next() as SemanticGraphEdge; string relationName = edge.getRelation().getShortName(); string relationSpecifier = edge.getRelation().getSpecific(); IndexedWord governor = edge.getGovernor(); IndexedWord dependent = edge.getDependent(); Dependencies.Add((relationName, relationSpecifier, governor.index(), dependent.index())); } }
public virtual double SubjDeletionProbability(SemanticGraphEdge edge, IEnumerable <SemanticGraphEdge> neighbors) { // Get information about the neighbors // (in a totally not-creepy-stalker sort of way) foreach (SemanticGraphEdge neighbor in neighbors) { if (neighbor != edge) { string neighborRel = neighbor.GetRelation().ToString(); if (neighborRel.Contains("subj")) { return(1.0); } } } return(0.0); }
/// <summary> /// Returns true if /// <paramref name="word"/> /// has an auxiliary verb attached to it. /// </summary> private static bool HasAux(SemanticGraph sg, IndexedWord word) { if (sg.HasChildWithReln(word, UniversalEnglishGrammaticalRelations.AuxModifier)) { return(true); } IndexedWord gov = sg.GetParent(word); if (gov != null) { SemanticGraphEdge edge = sg.GetEdge(gov, word); if (UniversalEnglishGrammaticalRelations.Conjunct.IsAncestor(edge.GetRelation()) || UniversalEnglishGrammaticalRelations.Copula.Equals(edge.GetRelation())) { return(HasAux(sg, gov)); } } return(false); }
public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm) { bool govWild = govName.Equals(WildcardNode); bool depWild = depName.Equals(WildcardNode); IndexedWord govNode = GetNamedNode(govName, sm); IndexedWord depNode = GetNamedNode(depName, sm); if (govNode != null && depNode != null) { SemanticGraphEdge edge = sg.GetEdge(govNode, depNode, relation); if (edge != null) { bool successFlag = sg.RemoveEdge(edge); } } else { if (depNode != null && govWild) { // dep known, wildcard gov foreach (SemanticGraphEdge edge in sg.IncomingEdgeIterable(depNode)) { if (edge.GetRelation().Equals(relation) && sg.ContainsEdge(edge)) { sg.RemoveEdge(edge); } } } else { if (govNode != null && depWild) { // gov known, wildcard dep foreach (SemanticGraphEdge edge in sg.OutgoingEdgeIterable(govNode)) { if (edge.GetRelation().Equals(relation) && sg.ContainsEdge(edge)) { sg.RemoveEdge(edge); } } } } } }
public virtual double DeletionProbability(SemanticGraphEdge edge, IEnumerable <SemanticGraphEdge> neighbors) { string edgeRel = edge.GetRelation().ToString(); if (edgeRel.Contains("prep")) { return(PpDeletionProbability(edge, neighbors)); } else { if (edgeRel.Contains("obj")) { return(ObjDeletionProbability(edge, neighbors)); } else { if (edgeRel.Contains("subj")) { return(SubjDeletionProbability(edge, neighbors)); } else { if (edgeRel.Equals("amod")) { string word = (edge.GetDependent().Lemma() != null ? edge.GetDependent().Lemma() : edge.GetDependent().Word()).ToLower(); if (Edu.Stanford.Nlp.Naturalli.Util.PrivativeAdjectives.Contains(word)) { return(0.0); } else { return(1.0); } } else { return(DeletionProbability(edgeRel)); } } } } }
private void ExtractNPorPRPFromDependency(ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet) { IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation)); SemanticGraph basic = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); IList <IndexedWord> nounsOrPrp = basic.GetAllNodesByPartOfSpeechPattern("N.*|PRP.*|DT"); // DT is for "this, these, etc" Tree tree = s.Get(typeof(TreeCoreAnnotations.TreeAnnotation)); foreach (IndexedWord w in nounsOrPrp) { SemanticGraphEdge edge = basic.GetEdge(basic.GetParent(w), w); GrammaticalRelation rel = null; string shortname = "root"; // if edge is null, it's root if (edge != null) { rel = edge.GetRelation(); shortname = rel.GetShortName(); } // TODO: what to remove? remove more? if (shortname.Matches("det|compound")) { // // for debug --------------- // Tree t = tree.getLeaves().get(w.index()-1); // for(Tree p : tree.pathNodeToNode(t, tree)) { // if(p.label().value().equals("NP")) { // HeadFinder headFinder = new SemanticHeadFinder(); // Tree head = headFinder.determineHead(p); // if(head == t.parent(tree)) { // log.info(); // } // break; // } // } // for debug ------------- continue; } else { ExtractMentionForHeadword(w, basic, s, mentions, mentionSpanSet, namedEntitySpanSet); } } }
public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm) { IndexedWord govNode = GetNamedNode(govName, sm); IndexedWord depNode = GetNamedNode(depName, sm); SemanticGraphEdge existingEdge = sg.GetEdge(govNode, depNode, relation); if (existingEdge == null) { // When adding the edge, check to see if the gov/dep nodes are presently in the graph. // if (!sg.ContainsVertex(govNode)) { sg.AddVertex(govNode); } if (!sg.ContainsVertex(depNode)) { sg.AddVertex(depNode); } sg.AddEdge(govNode, depNode, relation, weight, false); } }
/// <summary>Determine the person of "was".</summary> private static string WasPerson(SemanticGraph sg, IndexedWord word) { IndexedWord subj = sg.GetChildWithReln(word, UniversalEnglishGrammaticalRelations.NominalSubject); if (subj == null) { subj = sg.GetChildWithReln(word, UniversalEnglishGrammaticalRelations.NominalPassiveSubject); } if (subj != null) { if (Sharpen.Runtime.EqualsIgnoreCase(subj.Word(), "i")) { /* "I" is the subject of "was". */ return("1"); } } IndexedWord parent = sg.GetParent(word); if (parent == null) { return(subj != null ? "3" : null); } SemanticGraphEdge edge = sg.GetEdge(parent, word); if (edge == null) { return(subj != null ? "3" : null); } if (UniversalEnglishGrammaticalRelations.AuxModifier.Equals(edge.GetRelation()) || UniversalEnglishGrammaticalRelations.AuxPassiveModifier.Equals(edge.GetRelation())) { return(WasPerson(sg, parent)); } if (UniversalEnglishGrammaticalRelations.Conjunct.IsAncestor(edge.GetRelation())) { /* Check if the subject of the head of a conjunction is "I". */ return(WasPerson(sg, parent)); } return("3"); }
/// <summary> /// Returns true if /// <paramref name="word"/> /// has an inflection of "be" as an auxiliary. /// </summary> private static bool HasBeAux(SemanticGraph sg, IndexedWord word) { foreach (IndexedWord aux in sg.GetChildrenWithReln(word, UniversalEnglishGrammaticalRelations.AuxModifier)) { if (aux.Value().Matches(BeRegex)) { return(true); } } /* Check if head of conjunction has an auxiliary in case the word is part of a conjunction */ IndexedWord gov = sg.GetParent(word); if (gov != null) { SemanticGraphEdge edge = sg.GetEdge(gov, word); if (UniversalEnglishGrammaticalRelations.Conjunct.IsAncestor(edge.GetRelation())) { return(HasBeAux(sg, gov)); } } return(false); }
/// <summary> /// return the left and right most node except copula relation (nsubj & cop) and some others (maybe discourse?) /// e.g., you are the person -> return "the person" /// </summary> private IntPair GetNPSpan(IndexedWord headword, SemanticGraph dep, IList <CoreLabel> sent) { int headwordIdx = headword.Index() - 1; IList <IndexedWord> children = dep.GetChildList(headword); // if(children.size()==0) return new IntPair(headwordIdx, headwordIdx); // the headword is the only word // check if we have copula relation IndexedWord cop = dep.GetChildWithReln(headword, UniversalEnglishGrammaticalRelations.Copula); int startIdx = (cop == null) ? 0 : children.IndexOf(cop) + 1; // children which will be inside of NP IList <IndexedWord> insideNP = Generics.NewArrayList(); for (int i = startIdx; i < children.Count; i++) { IndexedWord child = children[i]; SemanticGraphEdge edge = dep.GetEdge(headword, child); if (edge.GetRelation().GetShortName().Matches("dep|discourse|punct")) { continue; } else { // skip insideNP.Add(child); } } if (insideNP.Count == 0) { return(new IntPair(headwordIdx, headwordIdx)); } // the headword is the only word Pair <IndexedWord, IndexedWord> firstChildLeftRight = SemanticGraphUtils.LeftRightMostChildVertices(insideNP[0], dep); Pair <IndexedWord, IndexedWord> lastChildLeftRight = SemanticGraphUtils.LeftRightMostChildVertices(insideNP[insideNP.Count - 1], dep); // headword can be first or last word int beginIdx = Math.Min(headwordIdx, firstChildLeftRight.first.Index() - 1); int endIdx = Math.Max(headwordIdx, lastChildLeftRight.second.Index() - 1); return(new IntPair(beginIdx, endIdx)); }
private void AddDependencyFeatures(ICounter <string> features, string prefix, SemanticGraphEdge e, bool addWord) { if (e == null) { features.IncrementCount("no-" + prefix); return; } IndexedWord parent = e.GetSource(); string parentPOS = parent.Tag(); string parentWord = parent.Word(); string parentRelation = e.GetRelation().ToString(); //String parentDir = e.getSource().beginPosition() < e.getTarget().beginPosition() // ? "right" : "left"; if (addWord) { features.IncrementCount(prefix + "-word=" + WordIndicator(parentWord, parentPOS)); } features.IncrementCount(prefix + "-POS=" + parentPOS); features.IncrementCount(prefix + "-relation=" + parentRelation); }
private ICounter <string> GetFeatures(Document doc, Mention m1, Mention m2) { System.Diagnostics.Debug.Assert((m1.AppearEarlierThan(m2))); ICounter <string> features = new ClassicCounter <string>(); // global features features.IncrementCount("bias"); if (useDocSource) { features.IncrementCount("doc-type=" + doc.docType); if (doc.docInfo != null && doc.docInfo.Contains("DOC_ID")) { features.IncrementCount("doc-source=" + doc.docInfo["DOC_ID"].Split("/")[1]); } } // singleton feature conjunctions IList <string> singletonFeatures1 = m1.GetSingletonFeatures(dictionaries); IList <string> singletonFeatures2 = m2.GetSingletonFeatures(dictionaries); foreach (KeyValuePair <int, string> e in SingletonFeatures) { if (e.Key < singletonFeatures1.Count && e.Key < singletonFeatures2.Count) { features.IncrementCount(e.Value + "=" + singletonFeatures1[e.Key] + "_" + singletonFeatures2[e.Key]); } } SemanticGraphEdge p1 = GetDependencyParent(m1); SemanticGraphEdge p2 = GetDependencyParent(m2); features.IncrementCount("dep-relations=" + (p1 == null ? "null" : p1.GetRelation()) + "_" + (p2 == null ? "null" : p2.GetRelation())); features.IncrementCount("roles=" + GetRole(m1) + "_" + GetRole(m2)); CoreLabel headCL1 = HeadWord(m1); CoreLabel headCL2 = HeadWord(m2); string headPOS1 = GetPOS(headCL1); string headPOS2 = GetPOS(headCL2); features.IncrementCount("head-pos-s=" + headPOS1 + "_" + headPOS2); features.IncrementCount("head-words=" + WordIndicator("h_" + headCL1.Word().ToLower() + "_" + headCL2.Word().ToLower(), headPOS1 + "_" + headPOS2)); // agreement features AddFeature(features, "animacies-agree", m2.AnimaciesAgree(m1)); AddFeature(features, "attributes-agree", m2.AttributesAgree(m1, dictionaries)); AddFeature(features, "entity-types-agree", m2.EntityTypesAgree(m1, dictionaries)); AddFeature(features, "numbers-agree", m2.NumbersAgree(m1)); AddFeature(features, "genders-agree", m2.GendersAgree(m1)); AddFeature(features, "ner-strings-equal", m1.nerString.Equals(m2.nerString)); // string matching features AddFeature(features, "antecedent-head-in-anaphor", HeadContainedIn(m1, m2)); AddFeature(features, "anaphor-head-in-antecedent", HeadContainedIn(m2, m1)); if (m1.mentionType != Dictionaries.MentionType.Pronominal && m2.mentionType != Dictionaries.MentionType.Pronominal) { AddFeature(features, "antecedent-in-anaphor", m2.SpanToString().ToLower().Contains(m1.SpanToString().ToLower())); AddFeature(features, "anaphor-in-antecedent", m1.SpanToString().ToLower().Contains(m2.SpanToString().ToLower())); AddFeature(features, "heads-equal", Sharpen.Runtime.EqualsIgnoreCase(m1.headString, m2.headString)); AddFeature(features, "heads-agree", m2.HeadsAgree(m1)); AddFeature(features, "exact-match", m1.ToString().Trim().ToLower().Equals(m2.ToString().Trim().ToLower())); AddFeature(features, "partial-match", RelaxedStringMatch(m1, m2)); double editDistance = StringUtils.EditDistance(m1.SpanToString(), m2.SpanToString()) / (double)(m1.SpanToString().Length + m2.SpanToString().Length); features.IncrementCount("edit-distance", editDistance); features.IncrementCount("edit-distance=" + ((int)(editDistance * 10) / 10.0)); double headEditDistance = StringUtils.EditDistance(m1.headString, m2.headString) / (double)(m1.headString.Length + m2.headString.Length); features.IncrementCount("head-edit-distance", headEditDistance); features.IncrementCount("head-edit-distance=" + ((int)(headEditDistance * 10) / 10.0)); } // distance features AddNumeric(features, "mention-distance", m2.mentionNum - m1.mentionNum); AddNumeric(features, "sentence-distance", m2.sentNum - m1.sentNum); if (m2.sentNum == m1.sentNum) { AddNumeric(features, "word-distance", m2.startIndex - m1.endIndex); if (m1.endIndex > m2.startIndex) { features.IncrementCount("spans-intersect"); } } // setup for dcoref features ICollection <Mention> ms1 = new HashSet <Mention>(); ms1.Add(m1); ICollection <Mention> ms2 = new HashSet <Mention>(); ms2.Add(m2); Random r = new Random(); CorefCluster c1 = new CorefCluster(20000 + r.NextInt(10000), ms1); CorefCluster c2 = new CorefCluster(10000 + r.NextInt(10000), ms2); string s2 = m2.LowercaseNormalizedSpanString(); string s1 = m1.LowercaseNormalizedSpanString(); // discourse dcoref features AddFeature(features, "mention-speaker-PER0", Sharpen.Runtime.EqualsIgnoreCase(m2.headWord.Get(typeof(CoreAnnotations.SpeakerAnnotation)), "PER0")); AddFeature(features, "antecedent-is-anaphor-speaker", CorefRules.AntecedentIsMentionSpeaker(doc, m2, m1, dictionaries)); AddFeature(features, "same-speaker", CorefRules.EntitySameSpeaker(doc, m2, m1)); AddFeature(features, "person-disagree-same-speaker", CorefRules.EntityPersonDisagree(doc, m2, m1, dictionaries) && CorefRules.EntitySameSpeaker(doc, m2, m1)); AddFeature(features, "antecedent-matches-anaphor-speaker", CorefRules.AntecedentMatchesMentionSpeakerAnnotation(m2, m1, doc)); AddFeature(features, "discourse-you-PER0", m2.person == Dictionaries.Person.You && doc.docType == Document.DocType.Article && m2.headWord.Get(typeof(CoreAnnotations.SpeakerAnnotation)).Equals("PER0")); AddFeature(features, "speaker-match-i-i", m2.number == Dictionaries.Number.Singular && dictionaries.firstPersonPronouns.Contains(s1) && m1.number == Dictionaries.Number.Singular && dictionaries.firstPersonPronouns.Contains(s2) && CorefRules. EntitySameSpeaker(doc, m2, m1)); AddFeature(features, "speaker-match-speaker-i", m2.number == Dictionaries.Number.Singular && dictionaries.firstPersonPronouns.Contains(s2) && CorefRules.AntecedentIsMentionSpeaker(doc, m2, m1, dictionaries)); AddFeature(features, "speaker-match-i-speaker", m1.number == Dictionaries.Number.Singular && dictionaries.firstPersonPronouns.Contains(s1) && CorefRules.AntecedentIsMentionSpeaker(doc, m1, m2, dictionaries)); AddFeature(features, "speaker-match-you-you", dictionaries.secondPersonPronouns.Contains(s1) && dictionaries.secondPersonPronouns.Contains(s2) && CorefRules.EntitySameSpeaker(doc, m2, m1)); AddFeature(features, "discourse-between-two-person", ((m2.person == Dictionaries.Person.I && m1.person == Dictionaries.Person.You || (m2.person == Dictionaries.Person.You && m1.person == Dictionaries.Person.I)) && (m2.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation )) - m1.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation)) == 1) && doc.docType == Document.DocType.Conversation)); AddFeature(features, "incompatible-not-match", m1.person != Dictionaries.Person.I && m2.person != Dictionaries.Person.I && (CorefRules.AntecedentIsMentionSpeaker(doc, m1, m2, dictionaries) || CorefRules.AntecedentIsMentionSpeaker(doc, m2, m1 , dictionaries))); int utteranceDist = Math.Abs(m1.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation)) - m2.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation))); if (doc.docType != Document.DocType.Article && utteranceDist == 1 && !CorefRules.EntitySameSpeaker(doc, m2, m1)) { AddFeature(features, "speaker-mismatch-i-i", m1.person == Dictionaries.Person.I && m2.person == Dictionaries.Person.I); AddFeature(features, "speaker-mismatch-you-you", m1.person == Dictionaries.Person.You && m2.person == Dictionaries.Person.You); AddFeature(features, "speaker-mismatch-we-we", m1.person == Dictionaries.Person.We && m2.person == Dictionaries.Person.We); } // other dcoref features string firstWord1 = FirstWord(m1).Word().ToLower(); AddFeature(features, "indefinite-article-np", (m1.appositions == null && m1.predicateNominatives == null && (firstWord1.Equals("a") || firstWord1.Equals("an")))); AddFeature(features, "far-this", m2.LowercaseNormalizedSpanString().Equals("this") && Math.Abs(m2.sentNum - m1.sentNum) > 3); AddFeature(features, "per0-you-in-article", m2.person == Dictionaries.Person.You && doc.docType == Document.DocType.Article && m2.headWord.Get(typeof(CoreAnnotations.SpeakerAnnotation)).Equals("PER0")); AddFeature(features, "inside-in", m2.InsideIn(m1) || m1.InsideIn(m2)); AddFeature(features, "indefinite-determiners", dictionaries.indefinitePronouns.Contains(m1.originalSpan[0].Lemma()) || dictionaries.indefinitePronouns.Contains(m2.originalSpan[0].Lemma())); AddFeature(features, "entity-attributes-agree", CorefRules.EntityAttributesAgree(c2, c1)); AddFeature(features, "entity-token-distance", CorefRules.EntityTokenDistance(m2, m1)); AddFeature(features, "i-within-i", CorefRules.EntityIWithinI(m2, m1, dictionaries)); AddFeature(features, "exact-string-match", CorefRules.EntityExactStringMatch(c2, c1, dictionaries, doc.roleSet)); AddFeature(features, "entity-relaxed-heads-agree", CorefRules.EntityRelaxedHeadsAgreeBetweenMentions(c2, c1, m2, m1)); AddFeature(features, "is-acronym", CorefRules.EntityIsAcronym(doc, c2, c1)); AddFeature(features, "demonym", m2.IsDemonym(m1, dictionaries)); AddFeature(features, "incompatible-modifier", CorefRules.EntityHaveIncompatibleModifier(m2, m1)); AddFeature(features, "head-lemma-match", m1.headWord.Lemma().Equals(m2.headWord.Lemma())); AddFeature(features, "words-included", CorefRules.EntityWordsIncluded(c2, c1, m2, m1)); AddFeature(features, "extra-proper-noun", CorefRules.EntityHaveExtraProperNoun(m2, m1, new HashSet <string>())); AddFeature(features, "number-in-later-mentions", CorefRules.EntityNumberInLaterMention(m2, m1)); AddFeature(features, "sentence-context-incompatible", CorefRules.SentenceContextIncompatible(m2, m1, dictionaries)); // syntax features if (useConstituencyParse) { if (m1.sentNum == m2.sentNum) { int clauseCount = 0; Tree tree = m2.contextParseTree; Tree current = m2.mentionSubTree; while (true) { current = current.Ancestor(1, tree); if (current.Label().Value().StartsWith("S")) { clauseCount++; } if (current.Dominates(m1.mentionSubTree)) { break; } if (current.Label().Value().Equals("ROOT") || current.Ancestor(1, tree) == null) { break; } } features.IncrementCount("clause-count", clauseCount); features.IncrementCount("clause-count=" + Bin(clauseCount)); } if (RuleBasedCorefMentionFinder.IsPleonastic(m2, m2.contextParseTree) || RuleBasedCorefMentionFinder.IsPleonastic(m1, m1.contextParseTree)) { features.IncrementCount("pleonastic-it"); } if (MaximalNp(m1.mentionSubTree) == MaximalNp(m2.mentionSubTree)) { features.IncrementCount("same-maximal-np"); } bool m1Embedded = HeadEmbeddingLevel(m1.mentionSubTree, m1.headIndex - m1.startIndex) > 1; bool m2Embedded = HeadEmbeddingLevel(m2.mentionSubTree, m2.headIndex - m2.startIndex) > 1; features.IncrementCount("embedding=" + m1Embedded + "_" + m2Embedded); } return(features); }
private ICounter <string> GetFeatures(Document doc, Mention m, IDictionary <int, IList <Mention> > mentionsByHeadIndex) { ICounter <string> features = new ClassicCounter <string>(); // type features features.IncrementCount("mention-type=" + m.mentionType); features.IncrementCount("gender=" + m.gender); features.IncrementCount("person-fine=" + m.person); features.IncrementCount("head-ne-type=" + m.nerString); IList <string> singletonFeatures = m.GetSingletonFeatures(dictionaries); foreach (KeyValuePair <int, string> e in SingletonFeatures) { if (e.Key < singletonFeatures.Count) { features.IncrementCount(e.Value + "=" + singletonFeatures[e.Key]); } } // length and location features AddNumeric(features, "mention-length", m.SpanToString().Length); AddNumeric(features, "mention-words", m.originalSpan.Count); AddNumeric(features, "sentence-words", m.sentenceWords.Count); features.IncrementCount("sentence-words=" + Bin(m.sentenceWords.Count)); features.IncrementCount("mention-position", m.mentionNum / (double)doc.predictedMentions.Count); features.IncrementCount("sentence-position", m.sentNum / (double)doc.numSentences); // lexical features CoreLabel firstWord = FirstWord(m); CoreLabel lastWord = LastWord(m); CoreLabel headWord = HeadWord(m); CoreLabel prevWord = PrevWord(m); CoreLabel nextWord = NextWord(m); CoreLabel prevprevWord = PrevprevWord(m); CoreLabel nextnextWord = NextnextWord(m); string headPOS = GetPOS(headWord); string firstPOS = GetPOS(firstWord); string lastPOS = GetPOS(lastWord); string prevPOS = GetPOS(prevWord); string nextPOS = GetPOS(nextWord); string prevprevPOS = GetPOS(prevprevWord); string nextnextPOS = GetPOS(nextnextWord); features.IncrementCount("first-word=" + WordIndicator(firstWord, firstPOS)); features.IncrementCount("last-word=" + WordIndicator(lastWord, lastPOS)); features.IncrementCount("head-word=" + WordIndicator(headWord, headPOS)); features.IncrementCount("next-word=" + WordIndicator(nextWord, nextPOS)); features.IncrementCount("prev-word=" + WordIndicator(prevWord, prevPOS)); features.IncrementCount("next-bigram=" + WordIndicator(nextWord, nextnextWord, nextPOS + "_" + nextnextPOS)); features.IncrementCount("prev-bigram=" + WordIndicator(prevprevWord, prevWord, prevprevPOS + "_" + prevPOS)); features.IncrementCount("next-pos=" + nextPOS); features.IncrementCount("prev-pos=" + prevPOS); features.IncrementCount("first-pos=" + firstPOS); features.IncrementCount("last-pos=" + lastPOS); features.IncrementCount("next-pos-bigram=" + nextPOS + "_" + nextnextPOS); features.IncrementCount("prev-pos-bigram=" + prevprevPOS + "_" + prevPOS); AddDependencyFeatures(features, "parent", GetDependencyParent(m), true); AddFeature(features, "ends-with-head", m.headIndex == m.endIndex - 1); AddFeature(features, "is-generic", m.originalSpan.Count == 1 && firstPOS.Equals("NNS")); // syntax features IndexedWord w = m.headIndexedWord; string depPath = string.Empty; int depth = 0; while (w != null) { SemanticGraphEdge e_1 = GetDependencyParent(m, w); depth++; if (depth <= 3 && e_1 != null) { depPath += (depPath.IsEmpty() ? string.Empty : "_") + e_1.GetRelation().ToString(); features.IncrementCount("dep-path=" + depPath); w = e_1.GetSource(); } else { w = null; } } if (useConstituencyParse) { int fullEmbeddingLevel = HeadEmbeddingLevel(m.contextParseTree, m.headIndex); int mentionEmbeddingLevel = HeadEmbeddingLevel(m.mentionSubTree, m.headIndex - m.startIndex); if (fullEmbeddingLevel != -1 && mentionEmbeddingLevel != -1) { features.IncrementCount("mention-embedding-level=" + Bin(fullEmbeddingLevel - mentionEmbeddingLevel)); features.IncrementCount("head-embedding-level=" + Bin(mentionEmbeddingLevel)); } else { features.IncrementCount("undetermined-embedding-level"); } features.IncrementCount("num-embedded-nps=" + Bin(NumEmbeddedNps(m.mentionSubTree))); string syntaxPath = string.Empty; Tree tree = m.contextParseTree; Tree head = tree.GetLeaves()[m.headIndex].Ancestor(1, tree); depth = 0; foreach (Tree node in tree.PathNodeToNode(head, tree)) { syntaxPath += node.Value() + "-"; features.IncrementCount("syntax-path=" + syntaxPath); depth++; if (depth >= 4 || node.Value().Equals("S")) { break; } } } // mention containment features AddFeature(features, "contained-in-other-mention", mentionsByHeadIndex[m.headIndex].Stream().AnyMatch(null)); AddFeature(features, "contains-other-mention", mentionsByHeadIndex[m.headIndex].Stream().AnyMatch(null)); // features from dcoref rules AddFeature(features, "bare-plural", m.originalSpan.Count == 1 && headPOS.Equals("NNS")); AddFeature(features, "quantifier-start", dictionaries.quantifiers.Contains(firstWord.Word().ToLower())); AddFeature(features, "negative-start", firstWord.Word().ToLower().Matches("none|no|nothing|not")); AddFeature(features, "partitive", RuleBasedCorefMentionFinder.PartitiveRule(m, m.sentenceWords, dictionaries)); AddFeature(features, "adjectival-demonym", dictionaries.IsAdjectivalDemonym(m.SpanToString())); if (doc.docType != Document.DocType.Article && m.person == Dictionaries.Person.You && nextWord != null && Sharpen.Runtime.EqualsIgnoreCase(nextWord.Word(), "know")) { features.IncrementCount("generic-you"); } return(features); }
public virtual string PrintSemanticGraph(SemanticGraph sg, bool unescapeParenthesis) { bool isTree = SemanticGraphUtils.IsTree(sg); StringBuilder sb = new StringBuilder(); /* Print comments. */ foreach (string comment in sg.GetComments()) { sb.Append(comment).Append("\n"); } foreach (IndexedWord token in sg.VertexListSorted()) { /* Check for multiword tokens. */ if (token.ContainsKey(typeof(CoreAnnotations.CoNLLUTokenSpanAnnotation))) { IntPair tokenSpan = token.Get(typeof(CoreAnnotations.CoNLLUTokenSpanAnnotation)); if (tokenSpan.GetSource() == token.Index()) { string range = string.Format("%d-%d", tokenSpan.GetSource(), tokenSpan.GetTarget()); sb.Append(string.Format("%s\t%s\t_\t_\t_\t_\t_\t_\t_\t_%n", range, token.OriginalText())); } } /* Try to find main governor and additional dependencies. */ string govIdx = null; GrammaticalRelation reln = null; Dictionary <string, string> enhancedDependencies = new Dictionary <string, string>(); foreach (IndexedWord parent in sg.GetParents(token)) { SemanticGraphEdge edge = sg.GetEdge(parent, token); if (govIdx == null && !edge.IsExtra()) { govIdx = parent.ToCopyIndex(); reln = edge.GetRelation(); } enhancedDependencies[parent.ToCopyIndex()] = edge.GetRelation().ToString(); } string additionalDepsString = isTree ? "_" : CoNLLUUtils.ToExtraDepsString(enhancedDependencies); string word = token.Word(); string featuresString = CoNLLUUtils.ToFeatureString(token.Get(typeof(CoreAnnotations.CoNLLUFeats))); string pos = token.GetString <CoreAnnotations.PartOfSpeechAnnotation>("_"); string upos = token.GetString <CoreAnnotations.CoarseTagAnnotation>("_"); string misc = token.GetString <CoreAnnotations.CoNLLUMisc>("_"); string lemma = token.GetString <CoreAnnotations.LemmaAnnotation>("_"); string relnName = reln == null ? "_" : reln.ToString(); /* Root. */ if (govIdx == null && sg.GetRoots().Contains(token)) { govIdx = "0"; relnName = GrammaticalRelation.Root.ToString(); additionalDepsString = isTree ? "_" : "0:" + relnName; } else { if (govIdx == null) { govIdx = "_"; relnName = "_"; } } if (unescapeParenthesis) { word = word.ReplaceAll(LrbPattern, "("); word = word.ReplaceAll(RrbPattern, ")"); lemma = lemma.ReplaceAll(LrbPattern, "("); lemma = lemma.ReplaceAll(RrbPattern, ")"); } sb.Append(string.Format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n", token.ToCopyIndex(), word, lemma, upos, pos, featuresString, govIdx, relnName, additionalDepsString, misc)); } sb.Append("\n"); return(sb.ToString()); }
/// <summary>The search algorithm, starting with a full sentence and iteratively shortening it to its entailed sentences.</summary> /// <returns>A list of search results, corresponding to shortenings of the sentence.</returns> private IList <ForwardEntailerSearchProblem.SearchResult> SearchImplementation() { // Pre-process the tree SemanticGraph parseTree = new SemanticGraph(this.parseTree); System.Diagnostics.Debug.Assert(Edu.Stanford.Nlp.Naturalli.Util.IsTree(parseTree)); // (remove common determiners) IList <string> determinerRemovals = new List <string>(); parseTree.GetLeafVertices().Stream().Filter(null).ForEach(null); // (cut conj_and nodes) ICollection <SemanticGraphEdge> andsToAdd = new HashSet <SemanticGraphEdge>(); foreach (IndexedWord vertex in parseTree.VertexSet()) { if (parseTree.InDegree(vertex) > 1) { SemanticGraphEdge conjAnd = null; foreach (SemanticGraphEdge edge in parseTree.IncomingEdgeIterable(vertex)) { if ("conj:and".Equals(edge.GetRelation().ToString())) { conjAnd = edge; } } if (conjAnd != null) { parseTree.RemoveEdge(conjAnd); System.Diagnostics.Debug.Assert(Edu.Stanford.Nlp.Naturalli.Util.IsTree(parseTree)); andsToAdd.Add(conjAnd); } } } // Clean the tree Edu.Stanford.Nlp.Naturalli.Util.CleanTree(parseTree); System.Diagnostics.Debug.Assert(Edu.Stanford.Nlp.Naturalli.Util.IsTree(parseTree)); // Find the subject / object split // This takes max O(n^2) time, expected O(n*log(n)) time. // Optimal is O(n), but I'm too lazy to implement it. BitSet isSubject = new BitSet(256); foreach (IndexedWord vertex_1 in parseTree.VertexSet()) { // Search up the tree for a subj node; if found, mark that vertex as a subject. IEnumerator <SemanticGraphEdge> incomingEdges = parseTree.IncomingEdgeIterator(vertex_1); SemanticGraphEdge edge = null; if (incomingEdges.MoveNext()) { edge = incomingEdges.Current; } int numIters = 0; while (edge != null) { if (edge.GetRelation().ToString().EndsWith("subj")) { System.Diagnostics.Debug.Assert(vertex_1.Index() > 0); isSubject.Set(vertex_1.Index() - 1); break; } incomingEdges = parseTree.IncomingEdgeIterator(edge.GetGovernor()); if (incomingEdges.MoveNext()) { edge = incomingEdges.Current; } else { edge = null; } numIters += 1; if (numIters > 100) { // log.error("tree has apparent depth > 100"); return(Java.Util.Collections.EmptyList); } } } // Outputs IList <ForwardEntailerSearchProblem.SearchResult> results = new List <ForwardEntailerSearchProblem.SearchResult>(); if (!determinerRemovals.IsEmpty()) { if (andsToAdd.IsEmpty()) { double score = Math.Pow(weights.DeletionProbability("det"), (double)determinerRemovals.Count); System.Diagnostics.Debug.Assert(!double.IsNaN(score)); System.Diagnostics.Debug.Assert(!double.IsInfinite(score)); results.Add(new ForwardEntailerSearchProblem.SearchResult(parseTree, determinerRemovals, score)); } else { SemanticGraph treeWithAnds = new SemanticGraph(parseTree); System.Diagnostics.Debug.Assert(Edu.Stanford.Nlp.Naturalli.Util.IsTree(treeWithAnds)); foreach (SemanticGraphEdge and in andsToAdd) { treeWithAnds.AddEdge(and.GetGovernor(), and.GetDependent(), and.GetRelation(), double.NegativeInfinity, false); } System.Diagnostics.Debug.Assert(Edu.Stanford.Nlp.Naturalli.Util.IsTree(treeWithAnds)); results.Add(new ForwardEntailerSearchProblem.SearchResult(treeWithAnds, determinerRemovals, Math.Pow(weights.DeletionProbability("det"), (double)determinerRemovals.Count))); } } // Initialize the search System.Diagnostics.Debug.Assert(Edu.Stanford.Nlp.Naturalli.Util.IsTree(parseTree)); IList <IndexedWord> topologicalVertices; try { topologicalVertices = parseTree.TopologicalSort(); } catch (InvalidOperationException) { // log.info("Could not topologically sort the vertices! Using left-to-right traversal."); topologicalVertices = parseTree.VertexListSorted(); } if (topologicalVertices.IsEmpty()) { return(results); } Stack <ForwardEntailerSearchProblem.SearchState> fringe = new Stack <ForwardEntailerSearchProblem.SearchState>(); fringe.Push(new ForwardEntailerSearchProblem.SearchState(new BitSet(256), 0, parseTree, null, null, 1.0)); // Start the search int numTicks = 0; while (!fringe.IsEmpty()) { // Overhead with popping a node. if (numTicks >= maxTicks) { return(results); } numTicks += 1; if (results.Count >= maxResults) { return(results); } ForwardEntailerSearchProblem.SearchState state = fringe.Pop(); System.Diagnostics.Debug.Assert(state.score > 0.0); IndexedWord currentWord = topologicalVertices[state.currentIndex]; // Push the case where we don't delete int nextIndex = state.currentIndex + 1; int numIters = 0; while (nextIndex < topologicalVertices.Count) { IndexedWord nextWord = topologicalVertices[nextIndex]; System.Diagnostics.Debug.Assert(nextWord.Index() > 0); if (!state.deletionMask.Get(nextWord.Index() - 1)) { fringe.Push(new ForwardEntailerSearchProblem.SearchState(state.deletionMask, nextIndex, state.tree, null, state, state.score)); break; } else { nextIndex += 1; } numIters += 1; if (numIters > 10000) { // log.error("logic error (apparent infinite loop); returning"); return(results); } } // Check if we can delete this subtree bool canDelete = !state.tree.GetFirstRoot().Equals(currentWord); foreach (SemanticGraphEdge edge in state.tree.IncomingEdgeIterable(currentWord)) { if ("CD".Equals(edge.GetGovernor().Tag())) { canDelete = false; } else { // Get token information CoreLabel token = edge.GetDependent().BackingLabel(); OperatorSpec @operator; NaturalLogicRelation lexicalRelation; Polarity tokenPolarity = token.Get(typeof(NaturalLogicAnnotations.PolarityAnnotation)); if (tokenPolarity == null) { tokenPolarity = Polarity.Default; } // Get the relation for this deletion if ((@operator = token.Get(typeof(NaturalLogicAnnotations.OperatorAnnotation))) != null) { lexicalRelation = @operator.instance.deleteRelation; } else { System.Diagnostics.Debug.Assert(edge.GetDependent().Index() > 0); lexicalRelation = NaturalLogicRelation.ForDependencyDeletion(edge.GetRelation().ToString(), isSubject.Get(edge.GetDependent().Index() - 1)); } NaturalLogicRelation projectedRelation = tokenPolarity.ProjectLexicalRelation(lexicalRelation); // Make sure this is a valid entailment if (!projectedRelation.ApplyToTruthValue(truthOfPremise).IsTrue()) { canDelete = false; } } } if (canDelete) { // Register the deletion Lazy <Pair <SemanticGraph, BitSet> > treeWithDeletionsAndNewMask = Lazy.Of(null); // Compute the score of the sentence double newScore = state.score; foreach (SemanticGraphEdge edge_1 in state.tree.IncomingEdgeIterable(currentWord)) { double multiplier = weights.DeletionProbability(edge_1, state.tree.OutgoingEdgeIterable(edge_1.GetGovernor())); System.Diagnostics.Debug.Assert(!double.IsNaN(multiplier)); System.Diagnostics.Debug.Assert(!double.IsInfinite(multiplier)); newScore *= multiplier; } // Register the result if (newScore > 0.0) { SemanticGraph resultTree = new SemanticGraph(treeWithDeletionsAndNewMask.Get().first); andsToAdd.Stream().Filter(null).ForEach(null); results.Add(new ForwardEntailerSearchProblem.SearchResult(resultTree, AggregateDeletedEdges(state, state.tree.IncomingEdgeIterable(currentWord), determinerRemovals), newScore)); // Push the state with this subtree deleted nextIndex = state.currentIndex + 1; numIters = 0; while (nextIndex < topologicalVertices.Count) { IndexedWord nextWord = topologicalVertices[nextIndex]; BitSet newMask = treeWithDeletionsAndNewMask.Get().second; SemanticGraph treeWithDeletions = treeWithDeletionsAndNewMask.Get().first; if (!newMask.Get(nextWord.Index() - 1)) { System.Diagnostics.Debug.Assert(treeWithDeletions.ContainsVertex(topologicalVertices[nextIndex])); fringe.Push(new ForwardEntailerSearchProblem.SearchState(newMask, nextIndex, treeWithDeletions, null, state, newScore)); break; } else { nextIndex += 1; } numIters += 1; if (numIters > 10000) { // log.error("logic error (apparent infinite loop); returning"); return(results); } } } } } // Return return(results); }
//using quote-removed depparses public virtual void DependencyParses(Annotation doc) { IList <ICoreMap> quotes = doc.Get(typeof(CoreAnnotations.QuotationsAnnotation)); IList <CoreLabel> tokens = doc.Get(typeof(CoreAnnotations.TokensAnnotation)); IList <ICoreMap> sentences = doc.Get(typeof(CoreAnnotations.SentencesAnnotation)); foreach (ICoreMap quote in quotes) { if (quote.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)) != null) { continue; } Pair <int, int> range = QuoteAttributionUtils.GetRemainderInSentence(doc, quote); if (range == null) { continue; } //search for mentions in the first run Pair <List <string>, List <Pair <int, int> > > namesAndNameIndices = ScanForNames(range); List <string> names = namesAndNameIndices.first; List <Pair <int, int> > nameIndices = namesAndNameIndices.second; SemanticGraph graph = quote.Get(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation)); SemgrexMatcher matcher = subjVerbPattern.Matcher(graph); IList <Pair <IndexedWord, IndexedWord> > subjVerbPairs = new List <Pair <IndexedWord, IndexedWord> >(); //TODO: check and see if this is necessary while (matcher.Find()) { IndexedWord subj = matcher.GetNode("SUBJ"); IndexedWord verb = matcher.GetNode("VERB"); subjVerbPairs.Add(new Pair <IndexedWord, IndexedWord>(subj, verb)); } IList <IndexedWord> vbs = graph.GetAllNodesByPartOfSpeechPattern("VB.*"); foreach (IndexedWord iw in vbs) { // does it have an nsubj child? ICollection <IndexedWord> children = graph.GetChildren(iw); IList <IndexedWord> deps = Generics.NewArrayList(); IndexedWord nsubj = null; foreach (IndexedWord child in children) { SemanticGraphEdge sge = graph.GetEdge(iw, child); if (sge.GetRelation().GetShortName().Equals("dep") && child.Tag().StartsWith("VB")) { deps.Add(child); } else { if (sge.GetRelation().GetShortName().Equals("nsubj")) { nsubj = child; } } } if (nsubj != null) { foreach (IndexedWord dep in deps) { subjVerbPairs.Add(new Pair(nsubj, dep)); } } } //look for a speech verb foreach (Pair <IndexedWord, IndexedWord> SVPair in subjVerbPairs) { IndexedWord verb = SVPair.second; IndexedWord subj = SVPair.first; //check if subj and verb outside of quote int verbTokPos = TokenToLocation(verb.BackingLabel()); int subjTokPos = TokenToLocation(verb.BackingLabel()); if (InRange(range, verbTokPos) && InRange(range, subjTokPos) && commonSpeechWords.Contains(verb.Lemma())) { if (subj.Tag().Equals("NNP")) { int startChar = subj.BeginPosition(); for (int i = 0; i < names.Count; i++) { Pair <int, int> nameIndex = nameIndices[i]; //avoid names that don't actually exist in if (RangeContainsCharIndex(nameIndex, startChar)) { FillInMention(quote, TokenRangeToString(nameIndex), nameIndex.first, nameIndex.second, sieveName, Name); break; } } } else { if (subj.Tag().Equals("PRP")) { int loc = TokenToLocation(subj.BackingLabel()); FillInMention(quote, subj.Word(), loc, loc, sieveName, Pronoun); break; } else { if (subj.Tag().Equals("NN") && animacySet.Contains(subj.Word())) { int loc = TokenToLocation(subj.BackingLabel()); FillInMention(quote, subj.Word(), loc, loc, sieveName, AnimateNoun); break; } } } } } } }
/// <summary>Fix some bizarre peculiarities with certain trees.</summary> /// <remarks> /// Fix some bizarre peculiarities with certain trees. /// So far, these include: /// <ul> /// <li>Sometimes there's a node from a word to itself. This seems wrong.</li> /// </ul> /// </remarks> /// <param name="tree">The tree to clean (in place!).</param> /// <returns>A list of extra edges, which are valid but were removed.</returns> public static IList <SemanticGraphEdge> CleanTree(SemanticGraph tree) { // assert !isCyclic(tree); // Clean nodes IList <IndexedWord> toDelete = new List <IndexedWord>(); foreach (IndexedWord vertex in tree.VertexSet()) { // Clean punctuation if (vertex.Tag() == null) { continue; } char tag = vertex.BackingLabel().Tag()[0]; if (tag == '.' || tag == ',' || tag == '(' || tag == ')' || tag == ':') { if (!tree.OutgoingEdgeIterator(vertex).MoveNext()) { // This should really never happen, but it does. toDelete.Add(vertex); } } } toDelete.ForEach(null); // Clean edges IEnumerator <SemanticGraphEdge> iter = tree.EdgeIterable().GetEnumerator(); IList <Triple <IndexedWord, IndexedWord, SemanticGraphEdge> > toAdd = new List <Triple <IndexedWord, IndexedWord, SemanticGraphEdge> >(); toDelete.Clear(); while (iter.MoveNext()) { SemanticGraphEdge edge = iter.Current; if (edge.GetDependent().Index() == edge.GetGovernor().Index()) { // Clean up copy-edges if (edge.GetDependent().IsCopy(edge.GetGovernor())) { foreach (SemanticGraphEdge toCopy in tree.OutgoingEdgeIterable(edge.GetDependent())) { toAdd.Add(Triple.MakeTriple(edge.GetGovernor(), toCopy.GetDependent(), toCopy)); } toDelete.Add(edge.GetDependent()); } if (edge.GetGovernor().IsCopy(edge.GetDependent())) { foreach (SemanticGraphEdge toCopy in tree.OutgoingEdgeIterable(edge.GetGovernor())) { toAdd.Add(Triple.MakeTriple(edge.GetDependent(), toCopy.GetDependent(), toCopy)); } toDelete.Add(edge.GetGovernor()); } // Clean self-edges iter.Remove(); } else { if (edge.GetRelation().ToString().Equals("punct")) { // Clean punctuation (again) if (!tree.OutgoingEdgeIterator(edge.GetDependent()).MoveNext()) { // This should really never happen, but it does. iter.Remove(); } } } } // (add edges we wanted to add) toDelete.ForEach(null); foreach (Triple <IndexedWord, IndexedWord, SemanticGraphEdge> edge_1 in toAdd) { tree.AddEdge(edge_1.first, edge_1.second, edge_1.third.GetRelation(), edge_1.third.GetWeight(), edge_1.third.IsExtra()); } // Handle extra edges. // Two cases: // (1) the extra edge is a subj/obj edge and the main edge is a conj:.* // in this case, keep the extra // (2) otherwise, delete the extra IList <SemanticGraphEdge> extraEdges = new List <SemanticGraphEdge>(); foreach (SemanticGraphEdge edge_2 in tree.EdgeIterable()) { if (edge_2.IsExtra()) { IList <SemanticGraphEdge> incomingEdges = tree.IncomingEdgeList(edge_2.GetDependent()); SemanticGraphEdge toKeep = null; foreach (SemanticGraphEdge candidate in incomingEdges) { if (toKeep == null) { toKeep = candidate; } else { if (toKeep.GetRelation().ToString().StartsWith("conj") && candidate.GetRelation().ToString().Matches(".subj.*|.obj.*")) { toKeep = candidate; } else { if (!candidate.IsExtra() && !(candidate.GetRelation().ToString().StartsWith("conj") && toKeep.GetRelation().ToString().Matches(".subj.*|.obj.*"))) { toKeep = candidate; } } } } foreach (SemanticGraphEdge candidate_1 in incomingEdges) { if (candidate_1 != toKeep) { extraEdges.Add(candidate_1); } } } } extraEdges.ForEach(null); // Add apposition edges (simple coref) foreach (SemanticGraphEdge extraEdge in new List <SemanticGraphEdge>(extraEdges)) { // note[gabor] prevent concurrent modification exception foreach (SemanticGraphEdge candidateAppos in tree.IncomingEdgeIterable(extraEdge.GetDependent())) { if (candidateAppos.GetRelation().ToString().Equals("appos")) { extraEdges.Add(new SemanticGraphEdge(extraEdge.GetGovernor(), candidateAppos.GetGovernor(), extraEdge.GetRelation(), extraEdge.GetWeight(), extraEdge.IsExtra())); } } foreach (SemanticGraphEdge candidateAppos_1 in tree.OutgoingEdgeIterable(extraEdge.GetDependent())) { if (candidateAppos_1.GetRelation().ToString().Equals("appos")) { extraEdges.Add(new SemanticGraphEdge(extraEdge.GetGovernor(), candidateAppos_1.GetDependent(), extraEdge.GetRelation(), extraEdge.GetWeight(), extraEdge.IsExtra())); } } } // Brute force ensure tree // Remove incoming edges from roots IList <SemanticGraphEdge> rootIncomingEdges = new List <SemanticGraphEdge>(); foreach (IndexedWord root in tree.GetRoots()) { foreach (SemanticGraphEdge incomingEdge in tree.IncomingEdgeIterable(root)) { rootIncomingEdges.Add(incomingEdge); } } rootIncomingEdges.ForEach(null); // Loop until it becomes a tree. bool changed = true; while (changed) { // I just want trees to be trees; is that so much to ask!? changed = false; IList <IndexedWord> danglingNodes = new List <IndexedWord>(); IList <SemanticGraphEdge> invalidEdges = new List <SemanticGraphEdge>(); foreach (IndexedWord vertex_1 in tree.VertexSet()) { // Collect statistics IEnumerator <SemanticGraphEdge> incomingIter = tree.IncomingEdgeIterator(vertex_1); bool hasIncoming = incomingIter.MoveNext(); bool hasMultipleIncoming = false; if (hasIncoming) { incomingIter.Current; hasMultipleIncoming = incomingIter.MoveNext(); } // Register actions if (!hasIncoming && !tree.GetRoots().Contains(vertex_1)) { danglingNodes.Add(vertex_1); } else { if (hasMultipleIncoming) { foreach (SemanticGraphEdge edge in new IterableIterator <SemanticGraphEdge>(incomingIter)) { invalidEdges.Add(edge_2); } } } } // Perform actions foreach (IndexedWord vertex_2 in danglingNodes) { tree.RemoveVertex(vertex_2); changed = true; } foreach (SemanticGraphEdge edge_3 in invalidEdges) { tree.RemoveEdge(edge_3); changed = true; } } // Edge case: remove duplicate dobj to "that." // This is a common parse error. foreach (IndexedWord vertex_3 in tree.VertexSet()) { SemanticGraphEdge thatEdge = null; int dobjCount = 0; foreach (SemanticGraphEdge edge in tree.OutgoingEdgeIterable(vertex_3)) { if (Sharpen.Runtime.EqualsIgnoreCase("that", edge_2.GetDependent().Word())) { thatEdge = edge_2; } if ("dobj".Equals(edge_2.GetRelation().ToString())) { dobjCount += 1; } } if (dobjCount > 1 && thatEdge != null) { // Case: there are two dobj edges, one of which goes to the word "that" // Action: rewrite the dobj edge to "that" to be a "mark" edge. tree.RemoveEdge(thatEdge); tree.AddEdge(thatEdge.GetGovernor(), thatEdge.GetDependent(), GrammaticalRelation.ValueOf(thatEdge.GetRelation().GetLanguage(), "mark"), thatEdge.GetWeight(), thatEdge.IsExtra()); } } // Return System.Diagnostics.Debug.Assert(IsTree(tree)); return(extraEdges); }
public virtual void Process(int id, Document document) { IJsonArrayBuilder clusters = Javax.Json.Json.CreateArrayBuilder(); foreach (CorefCluster gold in document.goldCorefClusters.Values) { IJsonArrayBuilder c = Javax.Json.Json.CreateArrayBuilder(); foreach (Mention m in gold.corefMentions) { c.Add(m.mentionID); } clusters.Add(c.Build()); } goldClusterWriter.Println(Javax.Json.Json.CreateObjectBuilder().Add(id.ToString(), clusters.Build()).Build()); IDictionary <Pair <int, int>, bool> mentionPairs = CorefUtils.GetLabeledMentionPairs(document); IList <Mention> mentionsList = CorefUtils.GetSortedMentions(document); IDictionary <int, IList <Mention> > mentionsByHeadIndex = new Dictionary <int, IList <Mention> >(); foreach (Mention m_1 in mentionsList) { IList <Mention> withIndex = mentionsByHeadIndex.ComputeIfAbsent(m_1.headIndex, null); withIndex.Add(m_1); } IJsonObjectBuilder docFeatures = Javax.Json.Json.CreateObjectBuilder(); docFeatures.Add("doc_id", id); docFeatures.Add("type", document.docType == Document.DocType.Article ? 1 : 0); docFeatures.Add("source", document.docInfo["DOC_ID"].Split("/")[0]); IJsonArrayBuilder sentences = Javax.Json.Json.CreateArrayBuilder(); foreach (ICoreMap sentence in document.annotation.Get(typeof(CoreAnnotations.SentencesAnnotation))) { sentences.Add(GetSentenceArray(sentence.Get(typeof(CoreAnnotations.TokensAnnotation)))); } IJsonObjectBuilder mentions = Javax.Json.Json.CreateObjectBuilder(); foreach (Mention m_2 in document.predictedMentionsByID.Values) { IEnumerator <SemanticGraphEdge> iterator = m_2.enhancedDependency.IncomingEdgeIterator(m_2.headIndexedWord); SemanticGraphEdge relation = iterator.MoveNext() ? iterator.Current : null; string depRelation = relation == null ? "no-parent" : relation.GetRelation().ToString(); string depParent = relation == null ? "<missing>" : relation.GetSource().Word(); mentions.Add(m_2.mentionNum.ToString(), Javax.Json.Json.CreateObjectBuilder().Add("doc_id", id).Add("mention_id", m_2.mentionID).Add("mention_num", m_2.mentionNum).Add("sent_num", m_2.sentNum).Add("start_index", m_2.startIndex).Add("end_index" , m_2.endIndex).Add("head_index", m_2.headIndex).Add("mention_type", m_2.mentionType.ToString()).Add("dep_relation", depRelation).Add("dep_parent", depParent).Add("sentence", GetSentenceArray(m_2.sentenceWords)).Add("contained-in-other-mention" , mentionsByHeadIndex[m_2.headIndex].Stream().AnyMatch(null) ? 1 : 0).Build()); } IJsonArrayBuilder featureNames = Javax.Json.Json.CreateArrayBuilder().Add("same-speaker").Add("antecedent-is-mention-speaker").Add("mention-is-antecedent-speaker").Add("relaxed-head-match").Add("exact-string-match").Add("relaxed-string-match" ); IJsonObjectBuilder features = Javax.Json.Json.CreateObjectBuilder(); IJsonObjectBuilder labels = Javax.Json.Json.CreateObjectBuilder(); foreach (KeyValuePair <Pair <int, int>, bool> e in mentionPairs) { Mention m1 = document.predictedMentionsByID[e.Key.first]; Mention m2 = document.predictedMentionsByID[e.Key.second]; string key = m1.mentionNum + " " + m2.mentionNum; IJsonArrayBuilder builder = Javax.Json.Json.CreateArrayBuilder(); foreach (int val in CategoricalFeatureExtractor.PairwiseFeatures(document, m1, m2, dictionaries, conll)) { builder.Add(val); } features.Add(key, builder.Build()); labels.Add(key, e.Value ? 1 : 0); } IJsonObject docData = Javax.Json.Json.CreateObjectBuilder().Add("sentences", sentences.Build()).Add("mentions", mentions.Build()).Add("labels", labels.Build()).Add("pair_feature_names", featureNames.Build()).Add("pair_features", features.Build ()).Add("document_features", docFeatures.Build()).Build(); dataWriter.Println(docData); }