/// <summary>Determine the case of the pronoun "you" or "it".</summary> private static string PronounCase(SemanticGraph sg, IndexedWord word) { word = sg.GetNodeByIndex(word.Index()); IndexedWord parent = sg.GetParent(word); if (parent != null) { SemanticGraphEdge edge = sg.GetEdge(parent, word); if (edge != null) { if (UniversalEnglishGrammaticalRelations.Object.IsAncestor(edge.GetRelation())) { /* "you" is an object. */ return("Acc"); } else { if (UniversalEnglishGrammaticalRelations.NominalModifier.IsAncestor(edge.GetRelation()) || edge.GetRelation() == GrammaticalRelation.Root) { if (sg.HasChildWithReln(word, UniversalEnglishGrammaticalRelations.CaseMarker)) { /* "you" is the head of a prepositional phrase. */ return("Acc"); } } } } } return("Nom"); }
/// <summary>Extracts features from relative and interrogative pronouns.</summary> private static Dictionary <string, string> GetRelAndIntPronFeatures(SemanticGraph sg, IndexedWord word) { Dictionary <string, string> features = new Dictionary <string, string>(); if (word.Tag().StartsWith("W")) { bool isRel = false; IndexedWord parent = sg.GetParent(word); if (parent != null) { IndexedWord parentParent = sg.GetParent(parent); if (parentParent != null) { SemanticGraphEdge edge = sg.GetEdge(parentParent, parent); isRel = edge.GetRelation().Equals(UniversalEnglishGrammaticalRelations.RelativeClauseModifier); } } if (isRel) { features["PronType"] = "Rel"; } else { if (Sharpen.Runtime.EqualsIgnoreCase(word.Value(), "that")) { features["PronType"] = "Dem"; } else { features["PronType"] = "Int"; } } } return(features); }
private static Dictionary <string, string> GetGraphFeatures(SemanticGraph sg, IndexedWord word) { Dictionary <string, string> features = new Dictionary <string, string>(); /* Determine the case of "you". */ if (word.Tag().Equals("PRP") && (Sharpen.Runtime.EqualsIgnoreCase(word.Value(), "you") || Sharpen.Runtime.EqualsIgnoreCase(word.Value(), "it"))) { features["Case"] = PronounCase(sg, word); } /* Determine the person of "was". */ if (word.Tag().Equals("VBD") && Sharpen.Runtime.EqualsIgnoreCase(word.Value(), "was")) { string person = WasPerson(sg, word); if (person != null) { features["Person"] = person; } } /* Determine features of relative and interrogative pronouns. */ features.PutAll(GetRelAndIntPronFeatures(sg, word)); /* Determine features of gerunds and present participles. */ if (word.Tag().Equals("VBG")) { if (HasBeAux(sg, word)) { features["VerbForm"] = "Part"; features["Tense"] = "Pres"; } else { features["VerbForm"] = "Ger"; } } /* Determine whether reflexive pronoun is reflexive or intensive. */ if (word.Value().Matches(SelfRegex) && word.Tag().Equals("PRP")) { IndexedWord parent = sg.GetParent(word); if (parent != null) { SemanticGraphEdge edge = sg.GetEdge(parent, word); if (edge.GetRelation() != UniversalEnglishGrammaticalRelations.NpAdverbialModifier) { features["Case"] = "Acc"; features["Reflex"] = "Yes"; } } } /* Voice feature. */ if (word.Tag().Equals("VBN")) { if (sg.HasChildWithReln(word, UniversalEnglishGrammaticalRelations.AuxPassiveModifier)) { features["Voice"] = "Pass"; } } return(features); }
public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm) { string relation = sm.GetRelnString(edgeName); IndexedWord govNode = GetNamedNode(govName, sm); IndexedWord depNode = GetNamedNode(depName, sm); SemanticGraphEdge edge = sg.GetEdge(govNode, depNode, GrammaticalRelation.ValueOf(relation)); if (edge != null) { sg.RemoveEdge(edge); } }
private void ExtractPronounForHeadword(IndexedWord headword, SemanticGraph dep, ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet) { IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation)); SemanticGraph basic = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); SemanticGraph enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)); if (enhanced == null) { enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); } int beginIdx = headword.Index() - 1; int endIdx = headword.Index(); // handle "you all", "they both" etc if (sent.Count > headword.Index() && sent[headword.Index()].Word().Matches("all|both")) { IndexedWord c = dep.GetNodeByIndex(headword.Index() + 1); SemanticGraphEdge edge = dep.GetEdge(headword, c); if (edge != null) { endIdx++; } } IntPair mSpan = new IntPair(beginIdx, endIdx); if (!mentionSpanSet.Contains(mSpan) && (!InsideNE(mSpan, namedEntitySpanSet))) { int dummyMentionId = -1; Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basic, enhanced, new List <CoreLabel>(sent.SubList(beginIdx, endIdx))); m.headIndex = headword.Index() - 1; m.headWord = sent[m.headIndex]; m.headString = m.headWord.Word().ToLower(Locale.English); mentions.Add(m); mentionSpanSet.Add(mSpan); } // when pronoun is a part of conjunction (e.g., you and I) ICollection <IndexedWord> conjChildren = dep.GetChildrenWithReln(headword, UniversalEnglishGrammaticalRelations.Conjunct); if (conjChildren.Count > 0) { IntPair npSpan = GetNPSpan(headword, dep, sent); beginIdx = npSpan.Get(0); endIdx = npSpan.Get(1) + 1; if (",".Equals(sent[endIdx - 1].Word())) { endIdx--; } // try not to have span that ends with , AddMention(beginIdx, endIdx, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced); } }
/// <summary> /// Returns true if /// <paramref name="word"/> /// has an auxiliary verb attached to it. /// </summary> private static bool HasAux(SemanticGraph sg, IndexedWord word) { if (sg.HasChildWithReln(word, UniversalEnglishGrammaticalRelations.AuxModifier)) { return(true); } IndexedWord gov = sg.GetParent(word); if (gov != null) { SemanticGraphEdge edge = sg.GetEdge(gov, word); if (UniversalEnglishGrammaticalRelations.Conjunct.IsAncestor(edge.GetRelation()) || UniversalEnglishGrammaticalRelations.Copula.Equals(edge.GetRelation())) { return(HasAux(sg, gov)); } } return(false); }
public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm) { bool govWild = govName.Equals(WildcardNode); bool depWild = depName.Equals(WildcardNode); IndexedWord govNode = GetNamedNode(govName, sm); IndexedWord depNode = GetNamedNode(depName, sm); if (govNode != null && depNode != null) { SemanticGraphEdge edge = sg.GetEdge(govNode, depNode, relation); if (edge != null) { bool successFlag = sg.RemoveEdge(edge); } } else { if (depNode != null && govWild) { // dep known, wildcard gov foreach (SemanticGraphEdge edge in sg.IncomingEdgeIterable(depNode)) { if (edge.GetRelation().Equals(relation) && sg.ContainsEdge(edge)) { sg.RemoveEdge(edge); } } } else { if (govNode != null && depWild) { // gov known, wildcard dep foreach (SemanticGraphEdge edge in sg.OutgoingEdgeIterable(govNode)) { if (edge.GetRelation().Equals(relation) && sg.ContainsEdge(edge)) { sg.RemoveEdge(edge); } } } } } }
private void ExtractNPorPRPFromDependency(ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet) { IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation)); SemanticGraph basic = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); IList <IndexedWord> nounsOrPrp = basic.GetAllNodesByPartOfSpeechPattern("N.*|PRP.*|DT"); // DT is for "this, these, etc" Tree tree = s.Get(typeof(TreeCoreAnnotations.TreeAnnotation)); foreach (IndexedWord w in nounsOrPrp) { SemanticGraphEdge edge = basic.GetEdge(basic.GetParent(w), w); GrammaticalRelation rel = null; string shortname = "root"; // if edge is null, it's root if (edge != null) { rel = edge.GetRelation(); shortname = rel.GetShortName(); } // TODO: what to remove? remove more? if (shortname.Matches("det|compound")) { // // for debug --------------- // Tree t = tree.getLeaves().get(w.index()-1); // for(Tree p : tree.pathNodeToNode(t, tree)) { // if(p.label().value().equals("NP")) { // HeadFinder headFinder = new SemanticHeadFinder(); // Tree head = headFinder.determineHead(p); // if(head == t.parent(tree)) { // log.info(); // } // break; // } // } // for debug ------------- continue; } else { ExtractMentionForHeadword(w, basic, s, mentions, mentionSpanSet, namedEntitySpanSet); } } }
public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm) { IndexedWord govNode = GetNamedNode(govName, sm); IndexedWord depNode = GetNamedNode(depName, sm); SemanticGraphEdge existingEdge = sg.GetEdge(govNode, depNode, relation); if (existingEdge == null) { // When adding the edge, check to see if the gov/dep nodes are presently in the graph. // if (!sg.ContainsVertex(govNode)) { sg.AddVertex(govNode); } if (!sg.ContainsVertex(depNode)) { sg.AddVertex(depNode); } sg.AddEdge(govNode, depNode, relation, weight, false); } }
/// <summary> /// Returns true if /// <paramref name="word"/> /// has an inflection of "be" as an auxiliary. /// </summary> private static bool HasBeAux(SemanticGraph sg, IndexedWord word) { foreach (IndexedWord aux in sg.GetChildrenWithReln(word, UniversalEnglishGrammaticalRelations.AuxModifier)) { if (aux.Value().Matches(BeRegex)) { return(true); } } /* Check if head of conjunction has an auxiliary in case the word is part of a conjunction */ IndexedWord gov = sg.GetParent(word); if (gov != null) { SemanticGraphEdge edge = sg.GetEdge(gov, word); if (UniversalEnglishGrammaticalRelations.Conjunct.IsAncestor(edge.GetRelation())) { return(HasBeAux(sg, gov)); } } return(false); }
/// <summary>Determine the person of "was".</summary> private static string WasPerson(SemanticGraph sg, IndexedWord word) { IndexedWord subj = sg.GetChildWithReln(word, UniversalEnglishGrammaticalRelations.NominalSubject); if (subj == null) { subj = sg.GetChildWithReln(word, UniversalEnglishGrammaticalRelations.NominalPassiveSubject); } if (subj != null) { if (Sharpen.Runtime.EqualsIgnoreCase(subj.Word(), "i")) { /* "I" is the subject of "was". */ return("1"); } } IndexedWord parent = sg.GetParent(word); if (parent == null) { return(subj != null ? "3" : null); } SemanticGraphEdge edge = sg.GetEdge(parent, word); if (edge == null) { return(subj != null ? "3" : null); } if (UniversalEnglishGrammaticalRelations.AuxModifier.Equals(edge.GetRelation()) || UniversalEnglishGrammaticalRelations.AuxPassiveModifier.Equals(edge.GetRelation())) { return(WasPerson(sg, parent)); } if (UniversalEnglishGrammaticalRelations.Conjunct.IsAncestor(edge.GetRelation())) { /* Check if the subject of the head of a conjunction is "I". */ return(WasPerson(sg, parent)); } return("3"); }
/// <summary> /// return the left and right most node except copula relation (nsubj & cop) and some others (maybe discourse?) /// e.g., you are the person -> return "the person" /// </summary> private IntPair GetNPSpan(IndexedWord headword, SemanticGraph dep, IList <CoreLabel> sent) { int headwordIdx = headword.Index() - 1; IList <IndexedWord> children = dep.GetChildList(headword); // if(children.size()==0) return new IntPair(headwordIdx, headwordIdx); // the headword is the only word // check if we have copula relation IndexedWord cop = dep.GetChildWithReln(headword, UniversalEnglishGrammaticalRelations.Copula); int startIdx = (cop == null) ? 0 : children.IndexOf(cop) + 1; // children which will be inside of NP IList <IndexedWord> insideNP = Generics.NewArrayList(); for (int i = startIdx; i < children.Count; i++) { IndexedWord child = children[i]; SemanticGraphEdge edge = dep.GetEdge(headword, child); if (edge.GetRelation().GetShortName().Matches("dep|discourse|punct")) { continue; } else { // skip insideNP.Add(child); } } if (insideNP.Count == 0) { return(new IntPair(headwordIdx, headwordIdx)); } // the headword is the only word Pair <IndexedWord, IndexedWord> firstChildLeftRight = SemanticGraphUtils.LeftRightMostChildVertices(insideNP[0], dep); Pair <IndexedWord, IndexedWord> lastChildLeftRight = SemanticGraphUtils.LeftRightMostChildVertices(insideNP[insideNP.Count - 1], dep); // headword can be first or last word int beginIdx = Math.Min(headwordIdx, firstChildLeftRight.first.Index() - 1); int endIdx = Math.Max(headwordIdx, lastChildLeftRight.second.Index() - 1); return(new IntPair(beginIdx, endIdx)); }
//using quote-removed depparses public virtual void DependencyParses(Annotation doc) { IList <ICoreMap> quotes = doc.Get(typeof(CoreAnnotations.QuotationsAnnotation)); IList <CoreLabel> tokens = doc.Get(typeof(CoreAnnotations.TokensAnnotation)); IList <ICoreMap> sentences = doc.Get(typeof(CoreAnnotations.SentencesAnnotation)); foreach (ICoreMap quote in quotes) { if (quote.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)) != null) { continue; } Pair <int, int> range = QuoteAttributionUtils.GetRemainderInSentence(doc, quote); if (range == null) { continue; } //search for mentions in the first run Pair <List <string>, List <Pair <int, int> > > namesAndNameIndices = ScanForNames(range); List <string> names = namesAndNameIndices.first; List <Pair <int, int> > nameIndices = namesAndNameIndices.second; SemanticGraph graph = quote.Get(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation)); SemgrexMatcher matcher = subjVerbPattern.Matcher(graph); IList <Pair <IndexedWord, IndexedWord> > subjVerbPairs = new List <Pair <IndexedWord, IndexedWord> >(); //TODO: check and see if this is necessary while (matcher.Find()) { IndexedWord subj = matcher.GetNode("SUBJ"); IndexedWord verb = matcher.GetNode("VERB"); subjVerbPairs.Add(new Pair <IndexedWord, IndexedWord>(subj, verb)); } IList <IndexedWord> vbs = graph.GetAllNodesByPartOfSpeechPattern("VB.*"); foreach (IndexedWord iw in vbs) { // does it have an nsubj child? ICollection <IndexedWord> children = graph.GetChildren(iw); IList <IndexedWord> deps = Generics.NewArrayList(); IndexedWord nsubj = null; foreach (IndexedWord child in children) { SemanticGraphEdge sge = graph.GetEdge(iw, child); if (sge.GetRelation().GetShortName().Equals("dep") && child.Tag().StartsWith("VB")) { deps.Add(child); } else { if (sge.GetRelation().GetShortName().Equals("nsubj")) { nsubj = child; } } } if (nsubj != null) { foreach (IndexedWord dep in deps) { subjVerbPairs.Add(new Pair(nsubj, dep)); } } } //look for a speech verb foreach (Pair <IndexedWord, IndexedWord> SVPair in subjVerbPairs) { IndexedWord verb = SVPair.second; IndexedWord subj = SVPair.first; //check if subj and verb outside of quote int verbTokPos = TokenToLocation(verb.BackingLabel()); int subjTokPos = TokenToLocation(verb.BackingLabel()); if (InRange(range, verbTokPos) && InRange(range, subjTokPos) && commonSpeechWords.Contains(verb.Lemma())) { if (subj.Tag().Equals("NNP")) { int startChar = subj.BeginPosition(); for (int i = 0; i < names.Count; i++) { Pair <int, int> nameIndex = nameIndices[i]; //avoid names that don't actually exist in if (RangeContainsCharIndex(nameIndex, startChar)) { FillInMention(quote, TokenRangeToString(nameIndex), nameIndex.first, nameIndex.second, sieveName, Name); break; } } } else { if (subj.Tag().Equals("PRP")) { int loc = TokenToLocation(subj.BackingLabel()); FillInMention(quote, subj.Word(), loc, loc, sieveName, Pronoun); break; } else { if (subj.Tag().Equals("NN") && animacySet.Contains(subj.Word())) { int loc = TokenToLocation(subj.BackingLabel()); FillInMention(quote, subj.Word(), loc, loc, sieveName, AnimateNoun); break; } } } } } } }
public virtual string PrintSemanticGraph(SemanticGraph sg, bool unescapeParenthesis) { bool isTree = SemanticGraphUtils.IsTree(sg); StringBuilder sb = new StringBuilder(); /* Print comments. */ foreach (string comment in sg.GetComments()) { sb.Append(comment).Append("\n"); } foreach (IndexedWord token in sg.VertexListSorted()) { /* Check for multiword tokens. */ if (token.ContainsKey(typeof(CoreAnnotations.CoNLLUTokenSpanAnnotation))) { IntPair tokenSpan = token.Get(typeof(CoreAnnotations.CoNLLUTokenSpanAnnotation)); if (tokenSpan.GetSource() == token.Index()) { string range = string.Format("%d-%d", tokenSpan.GetSource(), tokenSpan.GetTarget()); sb.Append(string.Format("%s\t%s\t_\t_\t_\t_\t_\t_\t_\t_%n", range, token.OriginalText())); } } /* Try to find main governor and additional dependencies. */ string govIdx = null; GrammaticalRelation reln = null; Dictionary <string, string> enhancedDependencies = new Dictionary <string, string>(); foreach (IndexedWord parent in sg.GetParents(token)) { SemanticGraphEdge edge = sg.GetEdge(parent, token); if (govIdx == null && !edge.IsExtra()) { govIdx = parent.ToCopyIndex(); reln = edge.GetRelation(); } enhancedDependencies[parent.ToCopyIndex()] = edge.GetRelation().ToString(); } string additionalDepsString = isTree ? "_" : CoNLLUUtils.ToExtraDepsString(enhancedDependencies); string word = token.Word(); string featuresString = CoNLLUUtils.ToFeatureString(token.Get(typeof(CoreAnnotations.CoNLLUFeats))); string pos = token.GetString <CoreAnnotations.PartOfSpeechAnnotation>("_"); string upos = token.GetString <CoreAnnotations.CoarseTagAnnotation>("_"); string misc = token.GetString <CoreAnnotations.CoNLLUMisc>("_"); string lemma = token.GetString <CoreAnnotations.LemmaAnnotation>("_"); string relnName = reln == null ? "_" : reln.ToString(); /* Root. */ if (govIdx == null && sg.GetRoots().Contains(token)) { govIdx = "0"; relnName = GrammaticalRelation.Root.ToString(); additionalDepsString = isTree ? "_" : "0:" + relnName; } else { if (govIdx == null) { govIdx = "_"; relnName = "_"; } } if (unescapeParenthesis) { word = word.ReplaceAll(LrbPattern, "("); word = word.ReplaceAll(RrbPattern, ")"); lemma = lemma.ReplaceAll(LrbPattern, "("); lemma = lemma.ReplaceAll(RrbPattern, ")"); } sb.Append(string.Format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n", token.ToCopyIndex(), word, lemma, upos, pos, featuresString, govIdx, relnName, additionalDepsString, misc)); } sb.Append("\n"); return(sb.ToString()); }