/// <summary>Mark twin mentions: All mention boundaries should be matched</summary> private void FindTwinMentionsStrict() { for (int sentNum = 0; sentNum < goldOrderedMentionsBySentence.Count; sentNum++) { IList <Mention> golds = goldOrderedMentionsBySentence[sentNum]; IList <Mention> predicts = predictedOrderedMentionsBySentence[sentNum]; // For CoNLL training there are some documents with gold mentions with the same position offsets // See /scr/nlp/data/conll-2011/v2/data/train/data/english/annotations/nw/wsj/09/wsj_0990.v2_auto_conll // (Packwood - Roth) CollectionValuedMap <IntPair, Mention> goldMentionPositions = new CollectionValuedMap <IntPair, Mention>(); foreach (Mention g in golds) { IntPair ip = new IntPair(g.startIndex, g.endIndex); if (goldMentionPositions.Contains(ip)) { StringBuilder existingMentions = new StringBuilder(); foreach (Mention eg in goldMentionPositions[ip]) { if (existingMentions.Length > 0) { existingMentions.Append(","); } existingMentions.Append(eg.mentionID); } SieveCoreferenceSystem.logger.Warning("WARNING: gold mentions with the same offsets: " + ip + " mentions=" + g.mentionID + "," + existingMentions + ", " + g.SpanToString()); } //assert(!goldMentionPositions.containsKey(ip)); goldMentionPositions.Add(new IntPair(g.startIndex, g.endIndex), g); } foreach (Mention p in predicts) { IntPair pos = new IntPair(p.startIndex, p.endIndex); if (goldMentionPositions.Contains(pos)) { ICollection <Mention> cm = goldMentionPositions[pos]; Mention g_1 = cm.GetEnumerator().Current; cm.Remove(g_1); p.mentionID = g_1.mentionID; p.twinless = false; g_1.twinless = false; } } // temp: for making easy to recognize twinless mention foreach (Mention p_1 in predicts) { if (p_1.twinless) { p_1.mentionID += 10000; } } } }
/// <exception cref="System.Exception"/> private static void DescendantsHelper(SemanticGraph g, IndexedWord curr, ICollection <IndexedWord> descendantSet, IList <string> allCutOffRels, IList <IndexedWord> doNotAddThese, IList <IndexedWord> seenNodes, bool ignoreCommonTags, IPredicate <CoreLabel > acceptWord, CollectionValuedMap <int, string> feat) { if (seenNodes.Contains(curr)) { return; } seenNodes.Add(curr); if (descendantSet.Contains(curr) || (doNotAddThese != null && doNotAddThese.Contains(curr)) || !acceptWord.Test(curr.BackingLabel())) { return; } if (!ignoreCommonTags || !ignoreTags.Contains(curr.Tag().Trim())) { descendantSet.Add(curr); } foreach (IndexedWord child in g.GetChildren(curr)) { bool dontuse = false; if (doNotAddThese != null && doNotAddThese.Contains(child)) { dontuse = true; } GrammaticalRelation rel = null; if (dontuse == false) { rel = g.Reln(curr, child); dontuse = CheckIfSatisfiesRelConstrains(g, curr, child, allCutOffRels, rel); } if (dontuse == false) { foreach (string cutOffTagRegex in cutoffTags) { if (child.Tag().Matches(cutOffTagRegex)) { if (Debug >= 5) { System.Console.Out.WriteLine("ignored tag " + child + " because it satisfied " + cutOffTagRegex); } dontuse = true; break; } } } if (dontuse == false) { if (!feat.Contains(curr.Index())) { feat[curr.Index()] = new List <string>(); } GetPatternsFromDataMultiClass.GetFeatures(g, curr, false, feat[curr.Index()], rel); //feat.add(curr.index(), "REL-" + rel.getShortName()); DescendantsHelper(g, child, descendantSet, allCutOffRels, doNotAddThese, seenNodes, ignoreCommonTags, acceptWord, feat); } } }