コード例 #1
0
            public State(ClustererDataLoader.ClustererDoc doc)
            {
                currentDocId      = doc.id;
                this.doc          = doc;
                this.hashedScores = new Dictionary <Clusterer.MergeKey, bool>();
                this.hashedCosts  = new Dictionary <long, double>();
                this.clusters     = new List <Clusterer.Cluster>();
                this.hash         = 0;
                mentionToCluster  = new Dictionary <int, Clusterer.Cluster>();
                foreach (int m in doc.mentions)
                {
                    Clusterer.Cluster c = new Clusterer.Cluster(m);
                    clusters.Add(c);
                    mentionToCluster[m] = c;
                    hash ^= c.hash * 7;
                }
                IList <Pair <int, int> >    allPairs = new List <Pair <int, int> >(doc.classificationScores.KeySet());
                ICounter <Pair <int, int> > scores   = UseRanking ? doc.rankingScores : doc.classificationScores;

                allPairs.Sort(null);
                int i = 0;

                for (i = 0; i < allPairs.Count; i++)
                {
                    double score = scores.GetCount(allPairs[i]);
                    if (score < MinPairwiseScore && i > MinPairs)
                    {
                        break;
                    }
                    if (i >= EarlyStopThreshold && i / score > EarlyStopVal)
                    {
                        break;
                    }
                }
                mentionPairs = allPairs.SubList(0, i);
                ICounter <int> seenAnaphors    = new ClassicCounter <int>();
                ICounter <int> seenAntecedents = new ClassicCounter <int>();

                globalFeatures = new List <Clusterer.GlobalFeatures>();
                for (int j = 0; j < allPairs.Count; j++)
                {
                    Pair <int, int>          mentionPair = allPairs[j];
                    Clusterer.GlobalFeatures gf          = new Clusterer.GlobalFeatures();
                    gf.currentIndex = j;
                    gf.anaphorSeen  = seenAnaphors.ContainsKey(mentionPair.second);
                    gf.size         = mentionPairs.Count;
                    gf.docSize      = doc.mentions.Count / 300.0;
                    globalFeatures.Add(gf);
                    seenAnaphors.IncrementCount(mentionPair.second);
                    seenAntecedents.IncrementCount(mentionPair.first);
                }
                currentIndex = 0;
                SetClusters();
            }
コード例 #2
0
        private static ICounter <string> GetFeatures(ClustererDataLoader.ClustererDoc doc, Clusterer.Cluster c1, Clusterer.Cluster c2, Clusterer.GlobalFeatures gf)
        {
            Clusterer.MergeKey      key      = new Clusterer.MergeKey(c1, c2, gf.currentIndex);
            CompressedFeatureVector cfv      = featuresCache[key];
            ICounter <string>       features = cfv == null ? null : compressor.Uncompress(cfv);

            if (features != null)
            {
                featuresCacheHits += isTraining;
                return(features);
            }
            featuresCacheMisses += isTraining;
            features             = new ClassicCounter <string>();
            if (gf.anaphorSeen)
            {
                features.IncrementCount("anaphorSeen");
            }
            features.IncrementCount("docSize", gf.docSize);
            features.IncrementCount("percentComplete", gf.currentIndex / (double)gf.size);
            features.IncrementCount("bias", 1.0);
            int earliest1 = EarliestMention(c1, doc);
            int earliest2 = EarliestMention(c2, doc);

            if (doc.mentionIndices[earliest1] > doc.mentionIndices[earliest2])
            {
                int tmp = earliest1;
                earliest1 = earliest2;
                earliest2 = tmp;
            }
            features.IncrementCount("anaphoricity", doc.anaphoricityScores.GetCount(earliest2));
            if (c1.mentions.Count == 1 && c2.mentions.Count == 1)
            {
                Pair <int, int> mentionPair = new Pair <int, int>(c1.mentions[0], c2.mentions[0]);
                features.AddAll(AddSuffix(GetFeatures(doc, mentionPair, doc.classificationScores), "-classification"));
                features.AddAll(AddSuffix(GetFeatures(doc, mentionPair, doc.rankingScores), "-ranking"));
                features = AddSuffix(features, "-single");
            }
            else
            {
                IList <Pair <int, int> > between = new List <Pair <int, int> >();
                foreach (int m1 in c1.mentions)
                {
                    foreach (int m2 in c2.mentions)
                    {
                        between.Add(new Pair <int, int>(m1, m2));
                    }
                }
                features.AddAll(AddSuffix(GetFeatures(doc, between, doc.classificationScores), "-classification"));
                features.AddAll(AddSuffix(GetFeatures(doc, between, doc.rankingScores), "-ranking"));
            }
            featuresCache[key] = compressor.Compress(features);
            return(features);
        }