private IList <string> Identifiers(ICounter <string> features, Dictionaries.MentionType mentionType)
        {
            IList <string> identifiers = new List <string>();

            if (mentionType == Dictionaries.MentionType.Pronominal)
            {
                foreach (string feature in features.KeySet())
                {
                    if (feature.StartsWith("head-word="))
                    {
                        identifiers.Add(feature.Replace("head-word=", string.Empty));
                        return(identifiers);
                    }
                }
            }
            else
            {
                if (neTypeConjuntion && mentionType == Dictionaries.MentionType.Proper)
                {
                    foreach (string feature in features.KeySet())
                    {
                        if (feature.StartsWith("head-ne-type="))
                        {
                            identifiers.Add(mentionType.ToString() + "_" + feature.Replace("head-ne-type=", string.Empty));
                            return(identifiers);
                        }
                    }
                }
            }
            identifiers.Add(mentionType.ToString());
            return(identifiers);
        }
Exemplo n.º 2
0
 public Example(Edu.Stanford.Nlp.Coref.Statistical.Example pair, bool isPositive)
 {
     this.docId            = pair.docId;
     this.label            = isPositive ? 1 : 0;
     this.pairwiseFeatures = null;
     this.mentionId1       = -1;
     this.mentionId2       = pair.mentionId2;
     this.mentionType1     = null;
     this.mentionType2     = pair.mentionType2;
 }
Exemplo n.º 3
0
 public Example(int docId, Mention m1, Mention m2, double label, CompressedFeatureVector pairwiseFeatures)
 {
     this.docId            = docId;
     this.label            = label;
     this.pairwiseFeatures = pairwiseFeatures;
     this.mentionId1       = m1.mentionID;
     this.mentionId2       = m2.mentionID;
     this.mentionType1     = m1.mentionType;
     this.mentionType2     = m2.mentionType;
 }
        public virtual void RunCoref(Document document)
        {
            Compressor <string> compressor = new Compressor <string>();

            if (Thread.Interrupted())
            {
                // Allow interrupting
                throw new RuntimeInterruptedException();
            }
            IDictionary <Pair <int, int>, bool> pairs = new Dictionary <Pair <int, int>, bool>();

            foreach (KeyValuePair <int, IList <int> > e in CorefUtils.HeuristicFilter(CorefUtils.GetSortedMentions(document), maxMentionDistance, maxMentionDistanceWithStringMatch))
            {
                foreach (int m1 in e.Value)
                {
                    pairs[new Pair <int, int>(m1, e.Key)] = true;
                }
            }
            DocumentExamples            examples       = extractor.Extract(0, document, pairs, compressor);
            ICounter <Pair <int, int> > pairwiseScores = new ClassicCounter <Pair <int, int> >();

            foreach (Example mentionPair in examples.examples)
            {
                if (Thread.Interrupted())
                {
                    // Allow interrupting
                    throw new RuntimeInterruptedException();
                }
                pairwiseScores.IncrementCount(new Pair <int, int>(mentionPair.mentionId1, mentionPair.mentionId2), classifier.Predict(mentionPair, examples.mentionFeatures, compressor));
            }
            IList <Pair <int, int> > mentionPairs = new List <Pair <int, int> >(pairwiseScores.KeySet());

            mentionPairs.Sort(null);
            ICollection <int> seenAnaphors = new HashSet <int>();

            foreach (Pair <int, int> pair in mentionPairs)
            {
                if (seenAnaphors.Contains(pair.second))
                {
                    continue;
                }
                if (Thread.Interrupted())
                {
                    // Allow interrupting
                    throw new RuntimeInterruptedException();
                }
                seenAnaphors.Add(pair.second);
                Dictionaries.MentionType mt1 = document.predictedMentionsByID[pair.first].mentionType;
                Dictionaries.MentionType mt2 = document.predictedMentionsByID[pair.second].mentionType;
                if (pairwiseScores.GetCount(pair) > thresholds[new Pair <bool, bool>(mt1 == Dictionaries.MentionType.Pronominal, mt2 == Dictionaries.MentionType.Pronominal)])
                {
                    CorefUtils.MergeCoreferenceClusters(pair, document);
                }
            }
        }
        /// <summary>Loads the CorefChain objects from the serialized buffer.</summary>
        /// <param name="reader">the buffer</param>
        /// <returns>A map from cluster id to clusters</returns>
        /// <exception cref="System.IO.IOException"/>
        private static IDictionary <int, CorefChain> LoadCorefChains(BufferedReader reader)
        {
            string line = reader.ReadLine().Trim();

            if (line.IsEmpty())
            {
                return(null);
            }
            int clusterCount = System.Convert.ToInt32(line);
            IDictionary <int, CorefChain> chains = Generics.NewHashMap();

            // read each cluster
            for (int c = 0; c < clusterCount; c++)
            {
                line = reader.ReadLine().Trim();
                string[] bits         = line.Split("\\s");
                int      cid          = System.Convert.ToInt32(bits[0]);
                int      mentionCount = System.Convert.ToInt32(bits[1]);
                IDictionary <IntPair, ICollection <CorefChain.CorefMention> > mentionMap = Generics.NewHashMap();
                CorefChain.CorefMention representative = null;
                // read each mention in this cluster
                for (int m = 0; m < mentionCount; m++)
                {
                    line = reader.ReadLine();
                    bits = line.Split("\\s");
                    IntPair key = new IntPair(System.Convert.ToInt32(bits[0]), System.Convert.ToInt32(bits[1]));
                    bool    rep = bits[2].Equals("1");
                    Dictionaries.MentionType mentionType = ParseMentionType(bits[3]);
                    Dictionaries.Number      number      = ParseNumber(bits[4]);
                    Dictionaries.Gender      gender      = ParseGender(bits[5]);
                    Dictionaries.Animacy     animacy     = ParseAnimacy(bits[6]);
                    int   startIndex = System.Convert.ToInt32(bits[7]);
                    int   endIndex   = System.Convert.ToInt32(bits[8]);
                    int   headIndex  = System.Convert.ToInt32(bits[9]);
                    int   clusterID  = System.Convert.ToInt32(bits[10]);
                    int   mentionID  = System.Convert.ToInt32(bits[11]);
                    int   sentNum    = System.Convert.ToInt32(bits[12]);
                    int   posLen     = System.Convert.ToInt32(bits[13]);
                    int[] posElems   = new int[posLen];
                    for (int i = 0; i < posLen; i++)
                    {
                        posElems[i] = System.Convert.ToInt32(bits[14 + i]);
                    }
                    IntTuple position = new IntTuple(posElems);
                    string   span     = UnescapeSpace(bits[14 + posLen]);
                    CorefChain.CorefMention mention = new CorefChain.CorefMention(mentionType, number, gender, animacy, startIndex, endIndex, headIndex, clusterID, mentionID, sentNum, position, span);
                    ICollection <CorefChain.CorefMention> mentionsWithThisHead = mentionMap[key];
                    if (mentionsWithThisHead == null)
                    {
                        mentionsWithThisHead = Generics.NewHashSet();
                        mentionMap[key]      = mentionsWithThisHead;
                    }
                    mentionsWithThisHead.Add(mention);
                    if (rep)
                    {
                        representative = mention;
                    }
                }
                // construct the cluster
                CorefChain chain = new CorefChain(cid, mentionMap, representative);
                chains[cid] = chain;
            }
            reader.ReadLine();
            return(chains);
        }