private ICoreMap MakeTimexMap(HeidelTimeKBPAnnotator.HeidelTimeOutputReader.TimexNode node, IList <CoreLabel> tokens, ICoreMap sentence) { ICoreMap timexMap = new ArrayCoreMap(); timexMap.Set(typeof(TimeAnnotations.TimexAnnotation), node.timex); timexMap.Set(typeof(CoreAnnotations.TextAnnotation), node.contents); timexMap.Set(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation), BeginOffset(tokens[0])); timexMap.Set(typeof(CoreAnnotations.CharacterOffsetEndAnnotation), EndOffset(tokens[tokens.Count - 1])); timexMap.Set(typeof(CoreAnnotations.TokenBeginAnnotation), tokens[0].Index()); timexMap.Set(typeof(CoreAnnotations.TokenEndAnnotation), tokens[tokens.Count - 1].Index()); timexMap.Set(typeof(CoreAnnotations.TokensAnnotation), tokens); if (sentence.Get(typeof(TimeAnnotations.TimexAnnotations)) == null) { sentence.Set(typeof(TimeAnnotations.TimexAnnotations), new List <ICoreMap>()); } sentence.Get(typeof(TimeAnnotations.TimexAnnotations)).Add(timexMap); // update NER for tokens foreach (CoreLabel token in tokens) { token.Set(typeof(CoreAnnotations.NamedEntityTagAnnotation), "DATE"); token.Set(typeof(CoreAnnotations.NormalizedNamedEntityTagAnnotation), node.timex.Value()); token.Set(typeof(TimeAnnotations.TimexAnnotation), node.timex); } return(timexMap); }
protected internal static void ExtractEnumerations(ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet) { IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation)); Tree tree = s.Get(typeof(TreeCoreAnnotations.TreeAnnotation)); SemanticGraph dependency = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)); TregexPattern tgrepPattern = enumerationsMentionPattern; TregexMatcher matcher = tgrepPattern.Matcher(tree); IDictionary <IntPair, Tree> spanToMentionSubTree = Generics.NewHashMap(); while (matcher.Find()) { matcher.GetMatch(); Tree m1 = matcher.GetNode("m1"); Tree m2 = matcher.GetNode("m2"); IList <Tree> mLeaves = m1.GetLeaves(); int beginIdx = ((CoreLabel)mLeaves[0].Label()).Get(typeof(CoreAnnotations.IndexAnnotation)) - 1; int endIdx = ((CoreLabel)mLeaves[mLeaves.Count - 1].Label()).Get(typeof(CoreAnnotations.IndexAnnotation)); spanToMentionSubTree[new IntPair(beginIdx, endIdx)] = m1; mLeaves = m2.GetLeaves(); beginIdx = ((CoreLabel)mLeaves[0].Label()).Get(typeof(CoreAnnotations.IndexAnnotation)) - 1; endIdx = ((CoreLabel)mLeaves[mLeaves.Count - 1].Label()).Get(typeof(CoreAnnotations.IndexAnnotation)); spanToMentionSubTree[new IntPair(beginIdx, endIdx)] = m2; } foreach (IntPair mSpan in spanToMentionSubTree.Keys) { if (!mentionSpanSet.Contains(mSpan) && !InsideNE(mSpan, namedEntitySpanSet)) { int dummyMentionId = -1; Mention m = new Mention(dummyMentionId, mSpan.Get(0), mSpan.Get(1), dependency, new List <CoreLabel>(sent.SubList(mSpan.Get(0), mSpan.Get(1))), spanToMentionSubTree[mSpan]); mentions.Add(m); mentionSpanSet.Add(mSpan); } } }
protected internal override void DoOneSentence(Annotation annotation, ICoreMap sentence) { SemanticGraph sg = sentence.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); Tree t = sentence.Get(typeof(TreeCoreAnnotations.TreeAnnotation)); featureAnnotator.AddFeatures(sg, t, false, true); }
public virtual ExtractionObject GetSingleParent(ICoreMap sentence) { if (GetParents().Count > 1) { ICollection <ExtractionObject> parents = GetParents(); log.Info("This event has multiple parents: " + this); int count = 1; foreach (ExtractionObject po in parents) { log.Info("PARENT #" + count + ": " + po); count++; } log.Info("DOC " + sentence.Get(typeof(CoreAnnotations.DocIDAnnotation))); log.Info("SENTENCE:"); foreach (CoreLabel t in sentence.Get(typeof(CoreAnnotations.TokensAnnotation))) { log.Info(" " + t.Word()); } log.Info("EVENTS IN SENTENCE:"); count = 1; foreach (Edu.Stanford.Nlp.IE.Machinereading.Structure.EventMention e in sentence.Get(typeof(MachineReadingAnnotations.EventMentionsAnnotation))) { log.Info("EVENT #" + count + ": " + e); count++; } } System.Diagnostics.Debug.Assert((GetParents().Count <= 1)); foreach (ExtractionObject p in GetParents()) { return(p); } return(null); }
protected internal static void ExtractNPorPRP(ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet) { IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation)); Tree tree = s.Get(typeof(TreeCoreAnnotations.TreeAnnotation)); tree.IndexLeaves(); SemanticGraph dependency = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)); TregexPattern tgrepPattern = npOrPrpMentionPattern; TregexMatcher matcher = tgrepPattern.Matcher(tree); while (matcher.Find()) { Tree t = matcher.GetMatch(); IList <Tree> mLeaves = t.GetLeaves(); int beginIdx = ((CoreLabel)mLeaves[0].Label()).Get(typeof(CoreAnnotations.IndexAnnotation)) - 1; int endIdx = ((CoreLabel)mLeaves[mLeaves.Count - 1].Label()).Get(typeof(CoreAnnotations.IndexAnnotation)); if (",".Equals(sent[endIdx - 1].Word())) { endIdx--; } // try not to have span that ends with , IntPair mSpan = new IntPair(beginIdx, endIdx); if (!mentionSpanSet.Contains(mSpan) && !InsideNE(mSpan, namedEntitySpanSet)) { int dummyMentionId = -1; Mention m = new Mention(dummyMentionId, beginIdx, endIdx, dependency, new List <CoreLabel>(sent.SubList(beginIdx, endIdx)), t); mentions.Add(m); mentionSpanSet.Add(mSpan); } } }
private string FindNextParagraphSpeaker(IList <ICoreMap> paragraph, int paragraphOffset, Dictionaries dict) { ICoreMap lastSent = paragraph[paragraph.Count - 1]; string speaker = string.Empty; foreach (CoreLabel w in lastSent.Get(typeof(CoreAnnotations.TokensAnnotation))) { if (w.Get(typeof(CoreAnnotations.LemmaAnnotation)).Equals("report") || w.Get(typeof(CoreAnnotations.LemmaAnnotation)).Equals("say")) { string word = w.Get(typeof(CoreAnnotations.TextAnnotation)); SemanticGraph dependency = lastSent.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)); IndexedWord t = dependency.GetNodeByWordPattern(word); foreach (Pair <GrammaticalRelation, IndexedWord> child in dependency.ChildPairs(t)) { if (child.First().GetShortName().Equals("nsubj")) { int subjectIndex = child.Second().Index(); // start from 1 IntTuple headPosition = new IntTuple(2); headPosition.Set(0, paragraph.Count - 1 + paragraphOffset); headPosition.Set(1, subjectIndex - 1); if (mentionheadPositions.Contains(headPosition) && mentionheadPositions[headPosition].nerString.StartsWith("PER")) { speaker = int.ToString(mentionheadPositions[headPosition].mentionID); } } } } } return(speaker); }
/// <summary>see if a potential mention is longer or same length and appears earlier</summary> public virtual bool MoreCanonicalMention(ICoreMap entityMention, ICoreMap potentialCanonicalMention) { // text of the mentions string entityMentionText = entityMention.Get(typeof(CoreAnnotations.TextAnnotation)); string potentialCanonicalMentionText = potentialCanonicalMention.Get(typeof(CoreAnnotations.TextAnnotation)); // start positions of mentions int entityMentionStart = entityMention.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation)); int potentialCanonicalMentionStart = potentialCanonicalMention.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation)); if (potentialCanonicalMentionText.Length > entityMentionText.Length) { return(true); } else { if (potentialCanonicalMentionText.Length == entityMentionText.Length && potentialCanonicalMentionStart < entityMentionStart) { return(true); } else { return(false); } } }
// throw e; private Optional <string> GetPubDate(ICoreMap document) { //--Get Date //(error checks) if (!document.ContainsKey(typeof(CoreAnnotations.CalendarAnnotation)) && !document.ContainsKey(typeof(CoreAnnotations.DocDateAnnotation))) { throw new ArgumentException("CoreMap must have either a Calendar or DocDate annotation"); } //not strictly necessary, technically... //(variables) Calendar dateCalendar = document.Get(typeof(CoreAnnotations.CalendarAnnotation)); if (dateCalendar != null) { //(case: calendar annotation) return(Optional.Of(string.Format("%TF", dateCalendar))); } else { //(case: docdateannotation) string s = document.Get(typeof(CoreAnnotations.DocDateAnnotation)); if (s != null) { return(Optional.Of(s)); } } return(Optional.Empty()); }
// nothing to do by default /// <summary> /// Converts NamedEntityTagAnnotation tags into /// <see cref="Edu.Stanford.Nlp.IE.Machinereading.Structure.EntityMention"/> /// s. This /// finds the longest sequence of NamedEntityTagAnnotation tags of the matching /// type. /// </summary> /// <param name="sentence">A sentence, ideally annotated with NamedEntityTagAnnotation</param> /// <param name="nerTag">The name of the NER tag to copy, e.g. "DATE".</param> /// <param name="entityType"> /// The type of the /// <see cref="Edu.Stanford.Nlp.IE.Machinereading.Structure.EntityMention"/> /// objects created /// </param> public virtual void MakeAnnotationFromGivenNERTag(ICoreMap sentence, string nerTag, string entityType) { IList <CoreLabel> words = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)); IList <EntityMention> mentions = sentence.Get(typeof(MachineReadingAnnotations.EntityMentionsAnnotation)); System.Diagnostics.Debug.Assert(words != null); System.Diagnostics.Debug.Assert(mentions != null); for (int start = 0; start < words.Count; start++) { int end; // find the first token after start that isn't of nerType for (end = start; end < words.Count; end++) { string ne = words[end].Get(typeof(CoreAnnotations.NamedEntityTagAnnotation)); if (!ne.Equals(nerTag)) { break; } } if (end > start) { // found a match! EntityMention m = entityMentionFactory.ConstructEntityMention(EntityMention.MakeUniqueId(), sentence, new Span(start, end), new Span(start, end), entityType, null, null); logger.Info("Created " + entityType + " entity mention: " + m); start = end - 1; mentions.Add(m); } } sentence.Set(typeof(MachineReadingAnnotations.EntityMentionsAnnotation), mentions); }
public virtual void ExtractNPorPRP(ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet) { IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation)); Tree tree = s.Get(typeof(TreeCoreAnnotations.TreeAnnotation)); tree.IndexLeaves(); SemanticGraph basicDependency = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); SemanticGraph enhancedDependency = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)); if (enhancedDependency == null) { enhancedDependency = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); } TregexPattern tgrepPattern = npOrPrpMentionPattern; TregexMatcher matcher = tgrepPattern.Matcher(tree); while (matcher.Find()) { Tree t = matcher.GetMatch(); IList <Tree> mLeaves = t.GetLeaves(); int beginIdx = ((CoreLabel)mLeaves[0].Label()).Get(typeof(CoreAnnotations.IndexAnnotation)) - 1; int endIdx = ((CoreLabel)mLeaves[mLeaves.Count - 1].Label()).Get(typeof(CoreAnnotations.IndexAnnotation)); //if (",".equals(sent.get(endIdx-1).word())) { endIdx--; } // try not to have span that ends with , IntPair mSpan = new IntPair(beginIdx, endIdx); if (!mentionSpanSet.Contains(mSpan) && (lang == Locale.Chinese || !InsideNE(mSpan, namedEntitySpanSet))) { // if(!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet) || t.value().startsWith("PRP")) ) { int dummyMentionId = -1; Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basicDependency, enhancedDependency, new List <CoreLabel>(sent.SubList(beginIdx, endIdx)), t); mentions.Add(m); mentionSpanSet.Add(mSpan); } } }
private void FinishSentence(ICoreMap sentence, IList <Tree> trees) { if (treeMap != null) { IList <Tree> mappedTrees = Generics.NewLinkedList(); foreach (Tree tree in trees) { Tree mappedTree = treeMap.Apply(tree); mappedTrees.Add(mappedTree); } trees = mappedTrees; } ParserAnnotatorUtils.FillInParseAnnotations(Verbose, BuildGraphs, gsf, sentence, trees, extraDependencies); if (saveBinaryTrees) { TreeBinarizer binarizer = TreeBinarizer.SimpleTreeBinarizer(parser.GetTLPParams().HeadFinder(), parser.TreebankLanguagePack()); Tree binarized = binarizer.TransformTree(trees[0]); Edu.Stanford.Nlp.Trees.Trees.ConvertToCoreLabels(binarized); sentence.Set(typeof(TreeCoreAnnotations.BinarizedTreeAnnotation), binarized); } // for some reason in some corner cases nodes aren't having sentenceIndex set // do a pass and make sure all nodes have sentenceIndex set SemanticGraph sg = sentence.Get(typeof(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation)); if (sg != null) { foreach (IndexedWord iw in sg.VertexSet()) { if (iw.Get(typeof(CoreAnnotations.SentenceIndexAnnotation)) == null && sentence.Get(typeof(CoreAnnotations.SentenceIndexAnnotation)) != null) { iw.SetSentIndex(sentence.Get(typeof(CoreAnnotations.SentenceIndexAnnotation))); } } } }
public virtual void FindHead(ICoreMap s, IList <Mention> mentions) { Tree tree = s.Get(typeof(TreeCoreAnnotations.TreeAnnotation)); IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation)); tree.IndexSpans(0); foreach (Mention m in mentions) { if (lang == Locale.Chinese) { FindHeadChinese(sent, m); } else { CoreLabel head = (CoreLabel)FindSyntacticHead(m, tree, sent).Label(); m.headIndex = head.Get(typeof(CoreAnnotations.IndexAnnotation)) - 1; m.headWord = sent[m.headIndex]; m.headString = m.headWord.Get(typeof(CoreAnnotations.TextAnnotation)).ToLower(Locale.English); } int start = m.headIndex - m.startIndex; if (start < 0 || start >= m.originalSpan.Count) { Redwood.Log("Invalid index for head " + start + "=" + m.headIndex + "-" + m.startIndex + ": originalSpan=[" + StringUtils.JoinWords(m.originalSpan, " ") + "], head=" + m.headWord); Redwood.Log("Setting head string to entire mention"); m.headIndex = m.startIndex; m.headWord = m.originalSpan.Count > 0 ? m.originalSpan[0] : sent[m.startIndex]; m.headString = m.originalSpan.ToString(); } } }
/// <summary>TODO(gabor) JavaDoc</summary> /// <param name="sentence"/> /// <param name="pipeline"/> public static void Annotate(ICoreMap sentence, AnnotationPipeline pipeline) { Annotation ann = new Annotation(StringUtils.Join(sentence.Get(typeof(CoreAnnotations.TokensAnnotation)), " ")); ann.Set(typeof(CoreAnnotations.TokensAnnotation), sentence.Get(typeof(CoreAnnotations.TokensAnnotation))); ann.Set(typeof(CoreAnnotations.SentencesAnnotation), Java.Util.Collections.SingletonList(sentence)); pipeline.Annotate(ann); }
public DataInstanceDep(ICoreMap s) { graph = s.Get(typeof(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation)); // System.out.println("CollapsedCCProcessedDependenciesAnnotation graph is " + s.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class)); // System.out.println("CollapsedDependenciesAnnotation graph is " + s.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class)); // System.out.println("BasicDependenciesAnnotation graph is " + s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class)); tokens = s.Get(typeof(CoreAnnotations.TokensAnnotation)); }
protected internal virtual Sieve.MentionData GetMentionData(ICoreMap quote) { string text = quote.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)); int begin = quote.Get(typeof(QuoteAttributionAnnotator.MentionBeginAnnotation)); int end = quote.Get(typeof(QuoteAttributionAnnotator.MentionEndAnnotation)); string type = quote.Get(typeof(QuoteAttributionAnnotator.MentionTypeAnnotation)); return(new Sieve.MentionData(this, begin, end, text, type)); }
public virtual Sieve.MentionData MakeMentionData(ICoreMap q) { if (q.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)) != null) { return(new Sieve.MentionData(this, q.Get(typeof(QuoteAttributionAnnotator.MentionBeginAnnotation)), q.Get(typeof(QuoteAttributionAnnotator.MentionEndAnnotation)), q.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)), q.Get(typeof(QuoteAttributionAnnotator.MentionTypeAnnotation )))); } return(new Sieve.MentionData(this, -1, -1, null, null)); }
public static IList <ICoreMap> ReplaceMergedUsingTokenOffsets <_T0, _T1>(IList <_T0> list, IList <_T1> matchedExprs) where _T0 : ICoreMap where _T1 : MatchedExpression { if (matchedExprs == null) { return(list); } IDictionary <int, int> tokenBeginToListIndexMap = new Dictionary <int, int>(); //Generics.newHashMap(); IDictionary <int, int> tokenEndToListIndexMap = new Dictionary <int, int>(); //Generics.newHashMap(); for (int i = 0; i < list.Count; i++) { ICoreMap cm = list[i]; if (cm.ContainsKey(typeof(CoreAnnotations.TokenBeginAnnotation)) && cm.ContainsKey(typeof(CoreAnnotations.TokenEndAnnotation))) { tokenBeginToListIndexMap[cm.Get(typeof(CoreAnnotations.TokenBeginAnnotation))] = i; tokenEndToListIndexMap[cm.Get(typeof(CoreAnnotations.TokenEndAnnotation))] = i + 1; } else { tokenBeginToListIndexMap[i] = i; tokenEndToListIndexMap[i + 1] = i + 1; } } matchedExprs.Sort(ExprTokenOffsetComparator); IList <ICoreMap> merged = new List <ICoreMap>(list.Count); // Approximate size int last = 0; foreach (MatchedExpression expr in matchedExprs) { int start = expr.tokenOffsets.First(); int end = expr.tokenOffsets.Second(); int istart = tokenBeginToListIndexMap[start]; int iend = tokenEndToListIndexMap[end]; if (istart != null && iend != null) { if (istart >= last) { Sharpen.Collections.AddAll(merged, list.SubList(last, istart)); ICoreMap m = expr.GetAnnotation(); merged.Add(m); last = iend; } } } // Add rest of elements if (last < list.Count) { Sharpen.Collections.AddAll(merged, list.SubList(last, list.Count)); } return(merged); }
// if(m.originalSpan.size() > 1) { // boolean isNE = true; // for(CoreLabel cl : m.originalSpan) { // if(!cl.tag().startsWith("NNP")) isNE = false; // } // if(isNE) { // namedEntitySpanSet.add(mSpan); // } // } protected internal static void ExtractNamedEntityMentions(ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet) { IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation)); SemanticGraph basicDependency = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); SemanticGraph enhancedDependency = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)); if (enhancedDependency == null) { enhancedDependency = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); } string preNE = "O"; int beginIndex = -1; foreach (CoreLabel w in sent) { string nerString = w.Ner(); if (!nerString.Equals(preNE)) { int endIndex = w.Get(typeof(CoreAnnotations.IndexAnnotation)) - 1; if (!preNE.Matches("O|QUANTITY|CARDINAL|PERCENT|DATE|DURATION|TIME|SET")) { if (w.Get(typeof(CoreAnnotations.TextAnnotation)).Equals("'s") && w.Tag().Equals("POS")) { endIndex++; } IntPair mSpan = new IntPair(beginIndex, endIndex); // Need to check if beginIndex < endIndex because, for // example, there could be a 's mislabeled by the NER and // attached to the previous NER by the earlier heuristic if (beginIndex < endIndex && !mentionSpanSet.Contains(mSpan)) { int dummyMentionId = -1; Mention m = new Mention(dummyMentionId, beginIndex, endIndex, sent, basicDependency, enhancedDependency, new List <CoreLabel>(sent.SubList(beginIndex, endIndex))); mentions.Add(m); mentionSpanSet.Add(mSpan); namedEntitySpanSet.Add(mSpan); } } beginIndex = endIndex; preNE = nerString; } } // NE at the end of sentence if (!preNE.Matches("O|QUANTITY|CARDINAL|PERCENT|DATE|DURATION|TIME|SET")) { IntPair mSpan = new IntPair(beginIndex, sent.Count); if (!mentionSpanSet.Contains(mSpan)) { int dummyMentionId = -1; Mention m = new Mention(dummyMentionId, beginIndex, sent.Count, sent, basicDependency, enhancedDependency, new List <CoreLabel>(sent.SubList(beginIndex, sent.Count))); mentions.Add(m); mentionSpanSet.Add(mSpan); namedEntitySpanSet.Add(mSpan); } } }
private void ExtractGoldMentions(ICoreMap s, IList <IList <Mention> > allGoldMentions, ACEMentionExtractor.EntityComparator comparator) { IList <Mention> goldMentions = new List <Mention>(); allGoldMentions.Add(goldMentions); IList <EntityMention> goldMentionList = s.Get(typeof(MachineReadingAnnotations.EntityMentionsAnnotation)); IList <CoreLabel> words = s.Get(typeof(CoreAnnotations.TokensAnnotation)); TreeSet <EntityMention> treeForSortGoldMentions = new TreeSet <EntityMention>(comparator); if (goldMentionList != null) { Sharpen.Collections.AddAll(treeForSortGoldMentions, goldMentionList); } if (!treeForSortGoldMentions.IsEmpty()) { foreach (EntityMention e in treeForSortGoldMentions) { Mention men = new Mention(); men.dependency = s.Get(typeof(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation)); if (men.dependency == null) { men.dependency = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)); } men.startIndex = e.GetExtentTokenStart(); men.endIndex = e.GetExtentTokenEnd(); string[] parseID = e.GetObjectId().Split("-"); men.mentionID = System.Convert.ToInt32(parseID[parseID.Length - 1]); string[] parseCorefID = e.GetCorefID().Split("-E"); men.goldCorefClusterID = System.Convert.ToInt32(parseCorefID[parseCorefID.Length - 1]); men.originalRef = -1; for (int j = allGoldMentions.Count - 1; j >= 0; j--) { IList <Mention> l = allGoldMentions[j]; for (int k = l.Count - 1; k >= 0; k--) { Mention m = l[k]; if (men.goldCorefClusterID == m.goldCorefClusterID) { men.originalRef = m.mentionID; } } } goldMentions.Add(men); if (men.mentionID > maxID) { maxID = men.mentionID; } // set ner type for (int j_1 = e.GetExtentTokenStart(); j_1 < e.GetExtentTokenEnd(); j_1++) { CoreLabel word = words[j_1]; string ner = e.GetType() + "-" + e.GetSubType(); } } } }
protected internal virtual bool ExtractAnnotation <_T0>(IList <_T0> source, CoreMapAggregator aggregator) where _T0 : ICoreMap { annotation = aggregator.Merge(source, chunkOffsets.GetBegin(), chunkOffsets.GetEnd()); charOffsets = Interval.ToInterval(annotation.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation)), annotation.Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation)), Interval.IntervalOpenEnd); tokenOffsets = Interval.ToInterval(annotation.Get(typeof(CoreAnnotations.TokenBeginAnnotation)), annotation.Get(typeof(CoreAnnotations.TokenEndAnnotation)), Interval.IntervalOpenEnd); text = annotation.Get(typeof(CoreAnnotations.TextAnnotation)); extractFunc.Annotate(this, source.SubList(chunkOffsets.GetBegin(), chunkOffsets.GetEnd())); return(true); }
/// <summary>Annotate a single sentence.</summary> /// <remarks> /// Annotate a single sentence. /// This annotator will, in particular, set the /// <see cref="EntailedSentencesAnnotation"/> /// and /// <see cref="RelationTriplesAnnotation"/> /// annotations. /// </remarks> public virtual void AnnotateSentence(ICoreMap sentence, IDictionary <CoreLabel, IList <CoreLabel> > canonicalMentionMap) { IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)); if (tokens.Count < 2) { // Short sentence. Skip annotating it. sentence.Set(typeof(NaturalLogicAnnotations.RelationTriplesAnnotation), Java.Util.Collections.EmptyList()); if (!stripEntailments) { sentence.Set(typeof(NaturalLogicAnnotations.EntailedSentencesAnnotation), Java.Util.Collections.EmptySet()); } } else { // Get the dependency tree SemanticGraph parse = sentence.Get(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation)); if (parse == null) { parse = sentence.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); } if (parse == null) { throw new InvalidOperationException("Cannot run OpenIE without a parse tree!"); } // Clean the tree parse = new SemanticGraph(parse); Edu.Stanford.Nlp.Naturalli.Util.CleanTree(parse); // Resolve Coreference SemanticGraph canonicalizedParse = parse; if (resolveCoref && !canonicalMentionMap.IsEmpty()) { canonicalizedParse = CanonicalizeCoref(parse, canonicalMentionMap); } // Run OpenIE // (clauses) IList <SentenceFragment> clauses = ClausesInSentence(canonicalizedParse, true); // note: uses coref-canonicalized parse // (entailment) ICollection <SentenceFragment> fragments = EntailmentsFromClauses(clauses); // (segment) IList <RelationTriple> extractions = segmenter.Extract(parse, tokens); // note: uses non-coref-canonicalized parse! Sharpen.Collections.AddAll(extractions, RelationsInFragments(fragments, sentence)); // Set the annotations sentence.Set(typeof(NaturalLogicAnnotations.EntailedClausesAnnotation), new HashSet <SentenceFragment>(clauses)); sentence.Set(typeof(NaturalLogicAnnotations.EntailedSentencesAnnotation), fragments); sentence.Set(typeof(NaturalLogicAnnotations.RelationTriplesAnnotation), new List <RelationTriple>(new HashSet <RelationTriple>(extractions))); // uniq the extractions if (stripEntailments) { sentence.Remove(typeof(NaturalLogicAnnotations.EntailedSentencesAnnotation)); } } }
/// <summary> /// Convert an /// <see cref="Edu.Stanford.Nlp.IE.Machinereading.Domains.Ace.Reader.AceEntityMention"/> /// to an /// <see cref="Edu.Stanford.Nlp.IE.Machinereading.Structure.EntityMention"/> /// . /// </summary> /// <param name="entityMention"> /// /// <see cref="Edu.Stanford.Nlp.IE.Machinereading.Domains.Ace.Reader.AceEntityMention"/> /// to convert /// </param> /// <param name="docId">ID of the document containing this entity mention</param> /// <param name="sentence"/> /// <param name="tokenOffset"> /// An offset in the calculations of position of the extent to sentence boundary /// (the ace.reader stores absolute token offset from the beginning of the document, but /// we need token offsets from the beginning of the sentence => adjust by tokenOffset) /// </param> /// <returns> /// entity as an /// <see cref="Edu.Stanford.Nlp.IE.Machinereading.Structure.EntityMention"/> /// </returns> private EntityMention ConvertAceEntityMention(AceEntityMention entityMention, string docId, ICoreMap sentence, int tokenOffset) { //log.info("TYPE is " + entityMention.getParent().getType()); //log.info("SUBTYPE is " + entityMention.getParent().getSubtype()); //log.info("LDCTYPE is " + entityMention.getLdctype()); AceCharSeq ext = entityMention.GetExtent(); AceCharSeq head = entityMention.GetHead(); int extStart = ext.GetTokenStart() - tokenOffset; int extEnd = ext.GetTokenEnd() - tokenOffset + 1; if (extStart < 0) { logger.Severe("READER ERROR: Invalid extent start " + extStart + " for entity mention " + entityMention.GetId() + " in document " + docId + " in sentence " + sentence); logger.Severe("This may happen due to incorrect EOS detection. Adjusting entity extent."); extStart = 0; } if (extEnd > sentence.Get(typeof(CoreAnnotations.TokensAnnotation)).Count) { logger.Severe("READER ERROR: Invalid extent end " + extEnd + " for entity mention " + entityMention.GetId() + " in document " + docId + " in sentence " + sentence); logger.Severe("This may happen due to incorrect EOS detection. Adjusting entity extent."); extEnd = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)).Count; } int headStart = head.GetTokenStart() - tokenOffset; int headEnd = head.GetTokenEnd() - tokenOffset + 1; if (headStart < 0) { logger.Severe("READER ERROR: Invalid head start " + headStart + " for entity mention " + entityMention.GetId() + " in document " + docId + " in sentence " + sentence); logger.Severe("This may happen due to incorrect EOS detection. Adjusting entity head span."); headStart = 0; } if (headEnd > sentence.Get(typeof(CoreAnnotations.TokensAnnotation)).Count) { logger.Severe("READER ERROR: Invalid head end " + headEnd + " for entity mention " + entityMention.GetId() + " in document " + docId + " in sentence " + sentence); logger.Severe("This may happen due to incorrect EOS detection. Adjusting entity head span."); headEnd = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)).Count; } // must adjust due to possible incorrect EOS detection if (headStart < extStart) { headStart = extStart; } if (headEnd > extEnd) { headEnd = extEnd; } System.Diagnostics.Debug.Assert((headStart < headEnd)); // note: the ace.reader stores absolute token offset from the beginning of the document, but // we need token offsets from the beginning of the sentence => adjust by tokenOffset // note: in ace.reader the end token position is inclusive, but // in our setup the end token position is exclusive => add 1 to end EntityMention converted = new EntityMention(entityMention.GetId(), sentence, new Span(extStart, extEnd), new Span(headStart, headEnd), entityMention.GetParent().GetType(), entityMention.GetParent().GetSubtype(), entityMention.GetLdctype()); return(converted); }
private static void ExtractNPorPRP(ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet) { IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation)); Tree tree = s.Get(typeof(TreeCoreAnnotations.TreeAnnotation)); tree.IndexLeaves(); SemanticGraph basicDependency = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); SemanticGraph enhancedDependency = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)); if (enhancedDependency == null) { enhancedDependency = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); } TregexPattern tgrepPattern = npOrPrpMentionPattern; TregexMatcher matcher = tgrepPattern.Matcher(tree); while (matcher.Find()) { Tree t = matcher.GetMatch(); IList <Tree> mLeaves = t.GetLeaves(); int beginIdx = ((CoreLabel)mLeaves[0].Label()).Get(typeof(CoreAnnotations.IndexAnnotation)) - 1; int endIdx = ((CoreLabel)mLeaves[mLeaves.Count - 1].Label()).Get(typeof(CoreAnnotations.IndexAnnotation)); if (",".Equals(sent[endIdx - 1].Word())) { endIdx--; } // try not to have span that ends with , IntPair mSpan = new IntPair(beginIdx, endIdx); // if(!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet)) ) { if (!mentionSpanSet.Contains(mSpan) && (!InsideNE(mSpan, namedEntitySpanSet) || t.Value().StartsWith("PRP"))) { int dummyMentionId = -1; Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basicDependency, enhancedDependency, new List <CoreLabel>(sent.SubList(beginIdx, endIdx)), t); mentions.Add(m); mentionSpanSet.Add(mSpan); if (m.originalSpan.Count > 1) { bool isNE = true; foreach (CoreLabel cl in m.originalSpan) { if (!cl.Tag().StartsWith("NNP")) { isNE = false; } } if (isNE) { namedEntitySpanSet.Add(mSpan); } } } } }
public virtual void TopSpeakerInRange(Annotation doc) { IList <CoreLabel> toks = doc.Get(typeof(CoreAnnotations.TokensAnnotation)); IList <ICoreMap> quotes = doc.Get(typeof(CoreAnnotations.QuotationsAnnotation)); for (int quote_idx = 0; quote_idx < quotes.Count; quote_idx++) { ICoreMap quote = quotes[quote_idx]; if (quote.Get(typeof(QuoteAttributionAnnotator.SpeakerAnnotation)) == null) { Pair <int, int> quoteRun = new Pair <int, int>(quote.Get(typeof(CoreAnnotations.TokenBeginAnnotation)), quote.Get(typeof(CoreAnnotations.TokenEndAnnotation))); IList <Sieve.MentionData> closestMentionsBackward = FindClosestMentionsInSpanBackward(new Pair <int, int>(Math.Max(0, quoteRun.first - BackwardWindow), quoteRun.first - 1)); IList <Sieve.MentionData> closestMentions = FindClosestMentionsInSpanForward(new Pair <int, int>(quoteRun.second + 1, Math.Min(quoteRun.second + ForwardWindow, toks.Count - 1))); Sharpen.Collections.AddAll(closestMentions, closestMentionsBackward); Person.Gender gender = GetGender(MakeMentionData(quote)); IList <string> topSpeakers = Counters.ToSortedList(GetTopSpeakers(closestMentions, closestMentionsBackward, gender, quote, false)); //if none found, try again with bigger window if (topSpeakers.IsEmpty()) { closestMentionsBackward = FindClosestMentionsInSpanBackward(new Pair <int, int>(Math.Max(0, quoteRun.first - BackwardWindowBig), quoteRun.first - 1)); closestMentions = FindClosestMentionsInSpanForward(new Pair <int, int>(quoteRun.second + 1, Math.Min(quoteRun.second + ForwardWindowBig, toks.Count - 1))); topSpeakers = Counters.ToSortedList(GetTopSpeakers(closestMentions, closestMentionsBackward, gender, quote, true)); } if (topSpeakers.IsEmpty()) { log.Warn("Watch out, there's an empty top speakers list!"); continue; } topSpeakers = RemoveQuoteNames(topSpeakers, quote); string topSpeaker = topSpeakers[0]; Pair <string, string> nextPrediction = GetConversationalNextPrediction(quotes, quote_idx, gender); bool set = UpdatePredictions(quote, nextPrediction); if (set) { continue; } Pair <string, string> prevPrediction = GetConversationalPreviousPrediction(quotes, quote_idx, gender); set = UpdatePredictions(quote, prevPrediction); if (set) { continue; } Pair <string, string> famPrediction = GetFamilyAnimateVocative(quotes, quote_idx, gender, topSpeakers); set = UpdatePredictions(quote, famPrediction); if (set) { continue; } UpdatePredictions(quote, new Pair <string, string>(topSpeaker, string.Empty)); } } }
/// <summary>Annotate all the pronominal mentions in the document.</summary> /// <param name="ann">The document.</param> /// <returns>The list of pronominal mentions in the document.</returns> private static IList <ICoreMap> AnnotatePronominalMentions(Annotation ann) { IList <ICoreMap> pronouns = new List <ICoreMap>(); IList <ICoreMap> sentences = ann.Get(typeof(CoreAnnotations.SentencesAnnotation)); for (int sentenceIndex = 0; sentenceIndex < sentences.Count; sentenceIndex++) { ICoreMap sentence = sentences[sentenceIndex]; int annoTokenBegin = sentence.Get(typeof(CoreAnnotations.TokenBeginAnnotation)); if (annoTokenBegin == null) { annoTokenBegin = 0; } IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)); for (int tokenIndex = 0; tokenIndex < tokens.Count; tokenIndex++) { CoreLabel token = tokens[tokenIndex]; if (KbpIsPronominalMention(token)) { ICoreMap pronoun = ChunkAnnotationUtils.GetAnnotatedChunk(tokens, tokenIndex, tokenIndex + 1, annoTokenBegin, null, typeof(CoreAnnotations.TextAnnotation), null); pronoun.Set(typeof(CoreAnnotations.SentenceIndexAnnotation), sentenceIndex); pronoun.Set(typeof(CoreAnnotations.NamedEntityTagAnnotation), KBPRelationExtractor.NERTag.Person.name); pronoun.Set(typeof(CoreAnnotations.EntityTypeAnnotation), KBPRelationExtractor.NERTag.Person.name); // set gender string pronounGender = null; if (pronoun.Get(typeof(CoreAnnotations.TextAnnotation)).ToLower().Equals("she")) { pronounGender = "FEMALE"; pronoun.Set(typeof(CoreAnnotations.GenderAnnotation), pronounGender); } else { if (pronoun.Get(typeof(CoreAnnotations.TextAnnotation)).ToLower().Equals("he")) { pronounGender = "MALE"; pronoun.Set(typeof(CoreAnnotations.GenderAnnotation), pronounGender); } } if (pronounGender != null) { foreach (CoreLabel pronounToken in pronoun.Get(typeof(CoreAnnotations.TokensAnnotation))) { pronounToken.Set(typeof(CoreAnnotations.GenderAnnotation), pronounGender); } } sentence.Get(typeof(CoreAnnotations.MentionsAnnotation)).Add(pronoun); pronouns.Add(pronoun); } } } return(pronouns); }
private void ExtractPronounForHeadword(IndexedWord headword, SemanticGraph dep, ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet) { IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation)); SemanticGraph basic = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); SemanticGraph enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)); if (enhanced == null) { enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); } int beginIdx = headword.Index() - 1; int endIdx = headword.Index(); // handle "you all", "they both" etc if (sent.Count > headword.Index() && sent[headword.Index()].Word().Matches("all|both")) { IndexedWord c = dep.GetNodeByIndex(headword.Index() + 1); SemanticGraphEdge edge = dep.GetEdge(headword, c); if (edge != null) { endIdx++; } } IntPair mSpan = new IntPair(beginIdx, endIdx); if (!mentionSpanSet.Contains(mSpan) && (!InsideNE(mSpan, namedEntitySpanSet))) { int dummyMentionId = -1; Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basic, enhanced, new List <CoreLabel>(sent.SubList(beginIdx, endIdx))); m.headIndex = headword.Index() - 1; m.headWord = sent[m.headIndex]; m.headString = m.headWord.Word().ToLower(Locale.English); mentions.Add(m); mentionSpanSet.Add(mSpan); } // when pronoun is a part of conjunction (e.g., you and I) ICollection <IndexedWord> conjChildren = dep.GetChildrenWithReln(headword, UniversalEnglishGrammaticalRelations.Conjunct); if (conjChildren.Count > 0) { IntPair npSpan = GetNPSpan(headword, dep, sent); beginIdx = npSpan.Get(0); endIdx = npSpan.Get(1) + 1; if (",".Equals(sent[endIdx - 1].Word())) { endIdx--; } // try not to have span that ends with , AddMention(beginIdx, endIdx, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced); } }
private static IDictionary <int, IList <ICoreMap> > GetQuotesInParagraph(Annotation doc) { IList <ICoreMap> quotes = doc.Get(typeof(CoreAnnotations.QuotationsAnnotation)); IList <ICoreMap> sentences = doc.Get(typeof(CoreAnnotations.SentencesAnnotation)); IDictionary <int, IList <ICoreMap> > paragraphToQuotes = new Dictionary <int, IList <ICoreMap> >(); foreach (ICoreMap quote in quotes) { ICoreMap sentence = sentences[quote.Get(typeof(CoreAnnotations.SentenceBeginAnnotation))]; paragraphToQuotes.PutIfAbsent(sentence.Get(typeof(CoreAnnotations.ParagraphIndexAnnotation)), new List <ICoreMap>()); paragraphToQuotes[sentence.Get(typeof(CoreAnnotations.ParagraphIndexAnnotation))].Add(quote); } return(paragraphToQuotes); }
public virtual Pair <string, string> GetConversationalNextPrediction(IList <ICoreMap> quotes, int quoteIndex, Person.Gender gender) { string topSpeaker = null; string modifier = null; // if the n - 2 paragraph quotes are labelled with a speaker and // that speakers gender does not disagree, label with that speaker IList <int> quotesInNextNext = new List <int>(); ICoreMap quote = quotes[quoteIndex]; int quoteParagraph = GetQuoteParagraph(quote); for (int j = quoteIndex + 1; j < quotes.Count; j++) { if (GetQuoteParagraph(quotes[j]) == quoteParagraph + 2) { quotesInNextNext.Add(j); } } foreach (int nextNext in quotesInNextNext) { ICoreMap nextNextQuote = quotes[nextNext]; string speakerName = nextNextQuote.Get(typeof(QuoteAttributionAnnotator.SpeakerAnnotation)); Sieve.MentionData md = MakeMentionData(quotes[nextNext]); if (speakerName != null && (gender == Person.Gender.Unk) || GetGender(md) == gender) { topSpeaker = speakerName; modifier = " conversation - next"; } } return(new Pair <string, string>(topSpeaker, modifier)); }
public virtual Pair <string, string> GetConversationalPreviousPrediction(IList <ICoreMap> quotes, int quoteIndex, Person.Gender gender) { string topSpeaker = null; string modifier = null; // if the n - 2 paragraph quotes are labelled with a speaker and // that speakers gender does not disagree, label with that speaker IList <int> quotesInPrevPrev = new List <int>(); ICoreMap quote = quotes[quoteIndex]; int quoteParagraph = GetQuoteParagraph(quote); for (int j = quoteIndex - 1; j >= 0; j--) { if (GetQuoteParagraph(quotes[j]) == quoteParagraph - 2) { quotesInPrevPrev.Add(j); } } foreach (int prevPrev in quotesInPrevPrev) { ICoreMap prevprevQuote = quotes[prevPrev]; string speakerName = prevprevQuote.Get(typeof(QuoteAttributionAnnotator.SpeakerAnnotation)); if (speakerName != null && (gender == Person.Gender.Unk) || GetGender(MakeMentionData(prevprevQuote)) == gender) { topSpeaker = speakerName; modifier = " conversation - prev"; } } return(new Pair <string, string>(topSpeaker, modifier)); }
public virtual Sieve.MentionData GetClosestMention(ICoreMap quote) { Sieve.MentionData closestBackward = FindClosestMentionInSpanBackward(new Pair <int, int>(0, quote.Get(typeof(CoreAnnotations.TokenBeginAnnotation)) - 1)); Sieve.MentionData closestForward = FindClosestMentionInSpanForward(new Pair <int, int>(quote.Get(typeof(CoreAnnotations.TokenEndAnnotation)), doc.Get(typeof(CoreAnnotations.TokensAnnotation)).Count - 1)); int backDistance = quote.Get(typeof(CoreAnnotations.TokenBeginAnnotation)) - closestBackward.end; int forwardDistance = closestForward.begin - quote.Get(typeof(CoreAnnotations.TokenEndAnnotation)) + 1; if (backDistance < forwardDistance) { return(closestBackward); } else { return(closestForward); } }
/// <summary> /// Copy constructor. /// </summary> /// <param name="other">The ArrayCoreMap to copy. It may not be null.</param> public ArrayCoreMap(ICoreMap other) { /*Set<Class<?>>*/ var otherKeys = other.KeySet(); psize = otherKeys.Count; keys = new Type[psize]; values = new Object[psize]; int i = 0; foreach (var key in otherKeys) { this.keys[i] = key; this.values[i] = other.Get(key); i++; } }