private IntPair GetNPSpanOld(IndexedWord headword, SemanticGraph dep, IList <CoreLabel> sent) { IndexedWord cop = dep.GetChildWithReln(headword, UniversalEnglishGrammaticalRelations.Copula); Pair <IndexedWord, IndexedWord> leftRight = SemanticGraphUtils.LeftRightMostChildVertices(headword, dep); // headword can be first or last word int beginIdx = Math.Min(headword.Index() - 1, leftRight.first.Index() - 1); int endIdx = Math.Max(headword.Index() - 1, leftRight.second.Index() - 1); // no copula relation if (cop == null) { return(new IntPair(beginIdx, endIdx)); } // if we have copula relation IList <IndexedWord> children = dep.GetChildList(headword); int copIdx = children.IndexOf(cop); if (copIdx + 1 < children.Count) { beginIdx = Math.Min(headword.Index() - 1, SemanticGraphUtils.LeftMostChildVertice(children[copIdx + 1], dep).Index() - 1); } else { beginIdx = headword.Index() - 1; } return(new IntPair(beginIdx, endIdx)); }
/// <summary>TODO: figure out how to specify where in the sentence this node goes.</summary> /// <remarks> /// TODO: figure out how to specify where in the sentence this node goes. /// TODO: determine if we should be copying an IndexedWord, or working just with a FeatureLabel. /// TODO: bombproof if this gov, dep, and reln already exist. /// </remarks> public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm) { IndexedWord govNode = sm.GetNode(govNodeName); IndexedWord newNode = new IndexedWord(newNodePrototype); int newIndex = SemanticGraphUtils.LeftMostChildVertice(govNode, sg).Index(); // cheap En-specific hack for placing copula (beginning of governing phrase) newNode.SetDocID(govNode.DocID()); newNode.SetIndex(newIndex); newNode.SetSentIndex(govNode.SentIndex()); sg.AddVertex(newNode); sg.AddEdge(govNode, newNode, relation, weight, false); }
public virtual void TestCreateSemgrexPattern() { try { SemanticGraph graph = SemanticGraph.ValueOf("[ate subj>Bill]"); Func <IndexedWord, string> transformNode = null; string pat = SemanticGraphUtils.SemgrexFromGraphOrderedNodes(graph, null, null, transformNode); NUnit.Framework.Assert.AreEqual("{word: ate; tag: null; ner: null}=ate >subj=E1 {word: bill; tag: null; ner: null}=Bill", pat.Trim()); } catch (Exception e) { Sharpen.Runtime.PrintStackTrace(e); } }
/// <summary> /// Similar to the expandFromPatterns, but performs an exhaustive /// search, performing simplifications on the graphs until exhausted. /// </summary> /// <remarks> /// Similar to the expandFromPatterns, but performs an exhaustive /// search, performing simplifications on the graphs until exhausted. /// TODO: ensure cycles do not occur /// NOTE: put in an arbitrary depth limit of 3, to prevent churning way too much (heuristic) /// </remarks> /// <exception cref="System.Exception"/> public virtual ICollection <SemanticGraph> ExhaustFromPatterns(IList <SsurgeonPattern> patternList, SemanticGraph sg) { ICollection <SemanticGraph> generated = ExhaustFromPatterns(patternList, sg, 1); if (generated.Count > 1) { if (log != null) { log.Info("Before remove dupe, size=" + generated.Count); } generated = SemanticGraphUtils.RemoveDuplicates(generated, sg); if (log != null) { log.Info("AFTER remove dupe, size=" + generated.Count); } } return(generated); }
/// <summary> /// Given a list of SsurgeonPattern edit scripts, and a SemanticGraph /// to operate over, returns a list of expansions of that graph, with /// the result of each edit applied against a copy of the graph. /// </summary> /// <exception cref="System.Exception"/> public virtual IList <SemanticGraph> ExpandFromPatterns(IList <SsurgeonPattern> patternList, SemanticGraph sg) { IList <SemanticGraph> retList = new List <SemanticGraph>(); foreach (SsurgeonPattern pattern in patternList) { ICollection <SemanticGraph> generated = pattern.Execute(sg); foreach (SemanticGraph orderedGraph in generated) { //orderedGraph.vertexList(true); //orderedGraph.edgeList(true); retList.Add(orderedGraph); System.Console.Out.WriteLine("\ncompact = " + orderedGraph.ToCompactString()); System.Console.Out.WriteLine("regular=" + orderedGraph); } if (generated.Count > 0) { if (log != null) { log.Info("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); log.Info("Pre remove duplicates, num=" + generated.Count); } SemanticGraphUtils.RemoveDuplicates(generated, sg); if (log != null) { log.Info("Expand from patterns"); if (logPrefix != null) { log.Info(logPrefix); } log.Info("Pattern = '" + pattern.GetUID() + "' generated " + generated.Count + " matches"); log.Info("= = = = = = = = = =\nSrc graph:\n" + sg + "\n= = = = = = = = = =\n"); int index = 1; foreach (SemanticGraph genSg in generated) { log.Info("REWRITE " + (index++)); log.Info(genSg.ToString()); log.Info(". . . . .\n"); } } } } return(retList); }
/// <summary> /// return the left and right most node except copula relation (nsubj & cop) and some others (maybe discourse?) /// e.g., you are the person -> return "the person" /// </summary> private IntPair GetNPSpan(IndexedWord headword, SemanticGraph dep, IList <CoreLabel> sent) { int headwordIdx = headword.Index() - 1; IList <IndexedWord> children = dep.GetChildList(headword); // if(children.size()==0) return new IntPair(headwordIdx, headwordIdx); // the headword is the only word // check if we have copula relation IndexedWord cop = dep.GetChildWithReln(headword, UniversalEnglishGrammaticalRelations.Copula); int startIdx = (cop == null) ? 0 : children.IndexOf(cop) + 1; // children which will be inside of NP IList <IndexedWord> insideNP = Generics.NewArrayList(); for (int i = startIdx; i < children.Count; i++) { IndexedWord child = children[i]; SemanticGraphEdge edge = dep.GetEdge(headword, child); if (edge.GetRelation().GetShortName().Matches("dep|discourse|punct")) { continue; } else { // skip insideNP.Add(child); } } if (insideNP.Count == 0) { return(new IntPair(headwordIdx, headwordIdx)); } // the headword is the only word Pair <IndexedWord, IndexedWord> firstChildLeftRight = SemanticGraphUtils.LeftRightMostChildVertices(insideNP[0], dep); Pair <IndexedWord, IndexedWord> lastChildLeftRight = SemanticGraphUtils.LeftRightMostChildVertices(insideNP[insideNP.Count - 1], dep); // headword can be first or last word int beginIdx = Math.Min(headwordIdx, firstChildLeftRight.first.Index() - 1); int endIdx = Math.Max(headwordIdx, lastChildLeftRight.second.Index() - 1); return(new IntPair(beginIdx, endIdx)); }
public virtual string PrintSemanticGraph(SemanticGraph sg, bool unescapeParenthesis) { bool isTree = SemanticGraphUtils.IsTree(sg); StringBuilder sb = new StringBuilder(); /* Print comments. */ foreach (string comment in sg.GetComments()) { sb.Append(comment).Append("\n"); } foreach (IndexedWord token in sg.VertexListSorted()) { /* Check for multiword tokens. */ if (token.ContainsKey(typeof(CoreAnnotations.CoNLLUTokenSpanAnnotation))) { IntPair tokenSpan = token.Get(typeof(CoreAnnotations.CoNLLUTokenSpanAnnotation)); if (tokenSpan.GetSource() == token.Index()) { string range = string.Format("%d-%d", tokenSpan.GetSource(), tokenSpan.GetTarget()); sb.Append(string.Format("%s\t%s\t_\t_\t_\t_\t_\t_\t_\t_%n", range, token.OriginalText())); } } /* Try to find main governor and additional dependencies. */ string govIdx = null; GrammaticalRelation reln = null; Dictionary <string, string> enhancedDependencies = new Dictionary <string, string>(); foreach (IndexedWord parent in sg.GetParents(token)) { SemanticGraphEdge edge = sg.GetEdge(parent, token); if (govIdx == null && !edge.IsExtra()) { govIdx = parent.ToCopyIndex(); reln = edge.GetRelation(); } enhancedDependencies[parent.ToCopyIndex()] = edge.GetRelation().ToString(); } string additionalDepsString = isTree ? "_" : CoNLLUUtils.ToExtraDepsString(enhancedDependencies); string word = token.Word(); string featuresString = CoNLLUUtils.ToFeatureString(token.Get(typeof(CoreAnnotations.CoNLLUFeats))); string pos = token.GetString <CoreAnnotations.PartOfSpeechAnnotation>("_"); string upos = token.GetString <CoreAnnotations.CoarseTagAnnotation>("_"); string misc = token.GetString <CoreAnnotations.CoNLLUMisc>("_"); string lemma = token.GetString <CoreAnnotations.LemmaAnnotation>("_"); string relnName = reln == null ? "_" : reln.ToString(); /* Root. */ if (govIdx == null && sg.GetRoots().Contains(token)) { govIdx = "0"; relnName = GrammaticalRelation.Root.ToString(); additionalDepsString = isTree ? "_" : "0:" + relnName; } else { if (govIdx == null) { govIdx = "_"; relnName = "_"; } } if (unescapeParenthesis) { word = word.ReplaceAll(LrbPattern, "("); word = word.ReplaceAll(RrbPattern, ")"); lemma = lemma.ReplaceAll(LrbPattern, "("); lemma = lemma.ReplaceAll(RrbPattern, ")"); } sb.Append(string.Format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n", token.ToCopyIndex(), word, lemma, upos, pos, featuresString, govIdx, relnName, additionalDepsString, misc)); } sb.Append("\n"); return(sb.ToString()); }
private void ExtractMentionForHeadword(IndexedWord headword, SemanticGraph dep, ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet) { IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation)); SemanticGraph basic = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); SemanticGraph enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)); if (enhanced == null) { enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); } // pronoun if (headword.Tag().StartsWith("PRP")) { ExtractPronounForHeadword(headword, dep, s, mentions, mentionSpanSet, namedEntitySpanSet); return; } // add NP mention IntPair npSpan = GetNPSpan(headword, dep, sent); int beginIdx = npSpan.Get(0); int endIdx = npSpan.Get(1) + 1; if (",".Equals(sent[endIdx - 1].Word())) { endIdx--; } // try not to have span that ends with , if ("IN".Equals(sent[beginIdx].Tag())) { beginIdx++; } // try to remove first IN. AddMention(beginIdx, endIdx, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced); // // extract the first element in conjunction (A and B -> extract A here "A and B", "B" will be extracted above) // // to make sure we find the first conjunction ICollection <IndexedWord> conjChildren = dep.GetChildrenWithReln(headword, UniversalEnglishGrammaticalRelations.Conjunct); if (conjChildren.Count > 0) { IndexedWord conjChild = dep.GetChildWithReln(headword, UniversalEnglishGrammaticalRelations.Conjunct); foreach (IndexedWord c in conjChildren) { if (c.Index() < conjChild.Index()) { conjChild = c; } } IndexedWord left = SemanticGraphUtils.LeftMostChildVertice(conjChild, dep); for (int endIdxFirstElement = left.Index() - 1; endIdxFirstElement > beginIdx; endIdxFirstElement--) { if (!sent[endIdxFirstElement - 1].Tag().Matches("CC|,")) { if (headword.Index() - 1 < endIdxFirstElement) { AddMention(beginIdx, endIdxFirstElement, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced); } break; } } } }