Beispiel #1
0
 /// <summary>Mark twin mentions: All mention boundaries should be matched</summary>
 private void FindTwinMentionsStrict()
 {
     for (int sentNum = 0; sentNum < goldOrderedMentionsBySentence.Count; sentNum++)
     {
         IList <Mention> golds    = goldOrderedMentionsBySentence[sentNum];
         IList <Mention> predicts = predictedOrderedMentionsBySentence[sentNum];
         // For CoNLL training there are some documents with gold mentions with the same position offsets
         // See /scr/nlp/data/conll-2011/v2/data/train/data/english/annotations/nw/wsj/09/wsj_0990.v2_auto_conll
         //  (Packwood - Roth)
         CollectionValuedMap <IntPair, Mention> goldMentionPositions = new CollectionValuedMap <IntPair, Mention>();
         foreach (Mention g in golds)
         {
             IntPair ip = new IntPair(g.startIndex, g.endIndex);
             if (goldMentionPositions.Contains(ip))
             {
                 StringBuilder existingMentions = new StringBuilder();
                 foreach (Mention eg in goldMentionPositions[ip])
                 {
                     if (existingMentions.Length > 0)
                     {
                         existingMentions.Append(",");
                     }
                     existingMentions.Append(eg.mentionID);
                 }
                 SieveCoreferenceSystem.logger.Warning("WARNING: gold mentions with the same offsets: " + ip + " mentions=" + g.mentionID + "," + existingMentions + ", " + g.SpanToString());
             }
             //assert(!goldMentionPositions.containsKey(ip));
             goldMentionPositions.Add(new IntPair(g.startIndex, g.endIndex), g);
         }
         foreach (Mention p in predicts)
         {
             IntPair pos = new IntPair(p.startIndex, p.endIndex);
             if (goldMentionPositions.Contains(pos))
             {
                 ICollection <Mention> cm = goldMentionPositions[pos];
                 Mention g_1 = cm.GetEnumerator().Current;
                 cm.Remove(g_1);
                 p.mentionID  = g_1.mentionID;
                 p.twinless   = false;
                 g_1.twinless = false;
             }
         }
         // temp: for making easy to recognize twinless mention
         foreach (Mention p_1 in predicts)
         {
             if (p_1.twinless)
             {
                 p_1.mentionID += 10000;
             }
         }
     }
 }
 /// <exception cref="System.Exception"/>
 private static void DescendantsHelper(SemanticGraph g, IndexedWord curr, ICollection <IndexedWord> descendantSet, IList <string> allCutOffRels, IList <IndexedWord> doNotAddThese, IList <IndexedWord> seenNodes, bool ignoreCommonTags, IPredicate <CoreLabel
                                                                                                                                                                                                                                                      > acceptWord, CollectionValuedMap <int, string> feat)
 {
     if (seenNodes.Contains(curr))
     {
         return;
     }
     seenNodes.Add(curr);
     if (descendantSet.Contains(curr) || (doNotAddThese != null && doNotAddThese.Contains(curr)) || !acceptWord.Test(curr.BackingLabel()))
     {
         return;
     }
     if (!ignoreCommonTags || !ignoreTags.Contains(curr.Tag().Trim()))
     {
         descendantSet.Add(curr);
     }
     foreach (IndexedWord child in g.GetChildren(curr))
     {
         bool dontuse = false;
         if (doNotAddThese != null && doNotAddThese.Contains(child))
         {
             dontuse = true;
         }
         GrammaticalRelation rel = null;
         if (dontuse == false)
         {
             rel     = g.Reln(curr, child);
             dontuse = CheckIfSatisfiesRelConstrains(g, curr, child, allCutOffRels, rel);
         }
         if (dontuse == false)
         {
             foreach (string cutOffTagRegex in cutoffTags)
             {
                 if (child.Tag().Matches(cutOffTagRegex))
                 {
                     if (Debug >= 5)
                     {
                         System.Console.Out.WriteLine("ignored tag " + child + " because it satisfied " + cutOffTagRegex);
                     }
                     dontuse = true;
                     break;
                 }
             }
         }
         if (dontuse == false)
         {
             if (!feat.Contains(curr.Index()))
             {
                 feat[curr.Index()] = new List <string>();
             }
             GetPatternsFromDataMultiClass.GetFeatures(g, curr, false, feat[curr.Index()], rel);
             //feat.add(curr.index(), "REL-" + rel.getShortName());
             DescendantsHelper(g, child, descendantSet, allCutOffRels, doNotAddThese, seenNodes, ignoreCommonTags, acceptWord, feat);
         }
     }
 }