public virtual void DoAction(bool isMerge)
 {
     if (isMerge)
     {
         if (c2.Size() > c1.Size())
         {
             Clusterer.Cluster tmp = c1;
             c1 = c2;
             c2 = tmp;
         }
         hash ^= 7 * c1.hash;
         hash ^= 7 * c2.hash;
         c1.Merge(c2);
         foreach (int m in c2.mentions)
         {
             mentionToCluster[m] = c1;
         }
         clusters.Remove(c2);
         hash ^= 7 * c1.hash;
     }
     currentIndex++;
     if (!IsComplete())
     {
         SetClusters();
     }
     while (c1 == c2)
     {
         currentIndex++;
         if (IsComplete())
         {
             break;
         }
         SetClusters();
     }
 }
            public virtual void SetClusters()
            {
                Pair <int, int> currentPair = mentionPairs[currentIndex];

                c1 = mentionToCluster[currentPair.first];
                c2 = mentionToCluster[currentPair.second];
            }
            public State(ClustererDataLoader.ClustererDoc doc)
            {
                currentDocId      = doc.id;
                this.doc          = doc;
                this.hashedScores = new Dictionary <Clusterer.MergeKey, bool>();
                this.hashedCosts  = new Dictionary <long, double>();
                this.clusters     = new List <Clusterer.Cluster>();
                this.hash         = 0;
                mentionToCluster  = new Dictionary <int, Clusterer.Cluster>();
                foreach (int m in doc.mentions)
                {
                    Clusterer.Cluster c = new Clusterer.Cluster(m);
                    clusters.Add(c);
                    mentionToCluster[m] = c;
                    hash ^= c.hash * 7;
                }
                IList <Pair <int, int> >    allPairs = new List <Pair <int, int> >(doc.classificationScores.KeySet());
                ICounter <Pair <int, int> > scores   = UseRanking ? doc.rankingScores : doc.classificationScores;

                allPairs.Sort(null);
                int i = 0;

                for (i = 0; i < allPairs.Count; i++)
                {
                    double score = scores.GetCount(allPairs[i]);
                    if (score < MinPairwiseScore && i > MinPairs)
                    {
                        break;
                    }
                    if (i >= EarlyStopThreshold && i / score > EarlyStopVal)
                    {
                        break;
                    }
                }
                mentionPairs = allPairs.SubList(0, i);
                ICounter <int> seenAnaphors    = new ClassicCounter <int>();
                ICounter <int> seenAntecedents = new ClassicCounter <int>();

                globalFeatures = new List <Clusterer.GlobalFeatures>();
                for (int j = 0; j < allPairs.Count; j++)
                {
                    Pair <int, int>          mentionPair = allPairs[j];
                    Clusterer.GlobalFeatures gf          = new Clusterer.GlobalFeatures();
                    gf.currentIndex = j;
                    gf.anaphorSeen  = seenAnaphors.ContainsKey(mentionPair.second);
                    gf.size         = mentionPairs.Count;
                    gf.docSize      = doc.mentions.Count / 300.0;
                    globalFeatures.Add(gf);
                    seenAnaphors.IncrementCount(mentionPair.second);
                    seenAntecedents.IncrementCount(mentionPair.first);
                }
                currentIndex = 0;
                SetClusters();
            }
        private static int EarliestMention(Clusterer.Cluster c, ClustererDataLoader.ClustererDoc doc)
        {
            int earliest = -1;

            foreach (int m in c.mentions)
            {
                int pos = doc.mentionIndices[m];
                if (earliest == -1 || pos < doc.mentionIndices[earliest])
                {
                    earliest = m;
                }
            }
            return(earliest);
        }
 public State(Clusterer.State state)
 {
     this.hashedScores     = state.hashedScores;
     this.hashedCosts      = state.hashedCosts;
     this.doc              = state.doc;
     this.hash             = state.hash;
     this.mentionPairs     = state.mentionPairs;
     this.currentIndex     = state.currentIndex;
     this.globalFeatures   = state.globalFeatures;
     this.clusters         = new List <Clusterer.Cluster>();
     this.mentionToCluster = new Dictionary <int, Clusterer.Cluster>();
     foreach (Clusterer.Cluster c in state.clusters)
     {
         Clusterer.Cluster copy = new Clusterer.Cluster(c);
         clusters.Add(copy);
         foreach (int m in copy.mentions)
         {
             mentionToCluster[m] = copy;
         }
     }
     SetClusters();
 }
        private static ICounter <string> GetFeatures(ClustererDataLoader.ClustererDoc doc, Clusterer.Cluster c1, Clusterer.Cluster c2, Clusterer.GlobalFeatures gf)
        {
            Clusterer.MergeKey      key      = new Clusterer.MergeKey(c1, c2, gf.currentIndex);
            CompressedFeatureVector cfv      = featuresCache[key];
            ICounter <string>       features = cfv == null ? null : compressor.Uncompress(cfv);

            if (features != null)
            {
                featuresCacheHits += isTraining;
                return(features);
            }
            featuresCacheMisses += isTraining;
            features             = new ClassicCounter <string>();
            if (gf.anaphorSeen)
            {
                features.IncrementCount("anaphorSeen");
            }
            features.IncrementCount("docSize", gf.docSize);
            features.IncrementCount("percentComplete", gf.currentIndex / (double)gf.size);
            features.IncrementCount("bias", 1.0);
            int earliest1 = EarliestMention(c1, doc);
            int earliest2 = EarliestMention(c2, doc);

            if (doc.mentionIndices[earliest1] > doc.mentionIndices[earliest2])
            {
                int tmp = earliest1;
                earliest1 = earliest2;
                earliest2 = tmp;
            }
            features.IncrementCount("anaphoricity", doc.anaphoricityScores.GetCount(earliest2));
            if (c1.mentions.Count == 1 && c2.mentions.Count == 1)
            {
                Pair <int, int> mentionPair = new Pair <int, int>(c1.mentions[0], c2.mentions[0]);
                features.AddAll(AddSuffix(GetFeatures(doc, mentionPair, doc.classificationScores), "-classification"));
                features.AddAll(AddSuffix(GetFeatures(doc, mentionPair, doc.rankingScores), "-ranking"));
                features = AddSuffix(features, "-single");
            }
            else
            {
                IList <Pair <int, int> > between = new List <Pair <int, int> >();
                foreach (int m1 in c1.mentions)
                {
                    foreach (int m2 in c2.mentions)
                    {
                        between.Add(new Pair <int, int>(m1, m2));
                    }
                }
                features.AddAll(AddSuffix(GetFeatures(doc, between, doc.classificationScores), "-classification"));
                features.AddAll(AddSuffix(GetFeatures(doc, between, doc.rankingScores), "-ranking"));
            }
            featuresCache[key] = compressor.Compress(features);
            return(features);
        }
 public virtual void Merge(Clusterer.Cluster c)
 {
     Sharpen.Collections.AddAll(mentions, c.mentions);
     hash ^= c.hash;
 }
 public Cluster(Clusterer.Cluster c)
 {
     mentions = new List <int>(c.mentions);
     hash     = c.hash;
 }
 public MergeKey(Clusterer.Cluster c1, Clusterer.Cluster c2, int ind)
 {
     hash = (int)(c1.hash ^ c2.hash) + (2003 * ind) + currentDocId;
 }