private static ICounter <string> GetFeatures(ClustererDataLoader.ClustererDoc doc, Clusterer.Cluster c1, Clusterer.Cluster c2, Clusterer.GlobalFeatures gf)
        {
            Clusterer.MergeKey      key      = new Clusterer.MergeKey(c1, c2, gf.currentIndex);
            CompressedFeatureVector cfv      = featuresCache[key];
            ICounter <string>       features = cfv == null ? null : compressor.Uncompress(cfv);

            if (features != null)
            {
                featuresCacheHits += isTraining;
                return(features);
            }
            featuresCacheMisses += isTraining;
            features             = new ClassicCounter <string>();
            if (gf.anaphorSeen)
            {
                features.IncrementCount("anaphorSeen");
            }
            features.IncrementCount("docSize", gf.docSize);
            features.IncrementCount("percentComplete", gf.currentIndex / (double)gf.size);
            features.IncrementCount("bias", 1.0);
            int earliest1 = EarliestMention(c1, doc);
            int earliest2 = EarliestMention(c2, doc);

            if (doc.mentionIndices[earliest1] > doc.mentionIndices[earliest2])
            {
                int tmp = earliest1;
                earliest1 = earliest2;
                earliest2 = tmp;
            }
            features.IncrementCount("anaphoricity", doc.anaphoricityScores.GetCount(earliest2));
            if (c1.mentions.Count == 1 && c2.mentions.Count == 1)
            {
                Pair <int, int> mentionPair = new Pair <int, int>(c1.mentions[0], c2.mentions[0]);
                features.AddAll(AddSuffix(GetFeatures(doc, mentionPair, doc.classificationScores), "-classification"));
                features.AddAll(AddSuffix(GetFeatures(doc, mentionPair, doc.rankingScores), "-ranking"));
                features = AddSuffix(features, "-single");
            }
            else
            {
                IList <Pair <int, int> > between = new List <Pair <int, int> >();
                foreach (int m1 in c1.mentions)
                {
                    foreach (int m2 in c2.mentions)
                    {
                        between.Add(new Pair <int, int>(m1, m2));
                    }
                }
                features.AddAll(AddSuffix(GetFeatures(doc, between, doc.classificationScores), "-classification"));
                features.AddAll(AddSuffix(GetFeatures(doc, between, doc.rankingScores), "-ranking"));
            }
            featuresCache[key] = compressor.Compress(features);
            return(features);
        }
 static ChineseQuantifiableEntityNormalizer()
 {
     //Entity types that are quantifiable
     // used by money
     // used by money
     // Patterns we need
     // TODO (yuhao): here we are not considering 1) negative numbers, 2) Chinese traditional characters
     // This is the all-literal-number-characters sequence, excluding unit characters like 十 or 万
     // The decimal part of a float number should be exactly literal number sequence without units
     // Used by quantity modifiers
     // All the tags we need
     // static initialization of useful properties
     quantifiable = Generics.NewHashSet();
     quantifiable.Add(NumberTag);
     quantifiable.Add(DateTag);
     quantifiable.Add(TimeTag);
     quantifiable.Add(MoneyTag);
     quantifiable.Add(PercentTag);
     quantifiable.Add(OrdinalTag);
     quantityUnitToValues = new ClassicCounter <string>();
     quantityUnitToValues.SetCount("十", 10.0);
     quantityUnitToValues.SetCount("百", 100.0);
     quantityUnitToValues.SetCount("千", 1000.0);
     quantityUnitToValues.SetCount("万", 10000.0);
     quantityUnitToValues.SetCount("亿", 100000000.0);
     wordsToValues = new ClassicCounter <string>();
     wordsToValues.SetCount("零", 0.0);
     wordsToValues.SetCount("〇", 0.0);
     wordsToValues.SetCount("一", 1.0);
     wordsToValues.SetCount("二", 2.0);
     wordsToValues.SetCount("两", 2.0);
     wordsToValues.SetCount("三", 3.0);
     wordsToValues.SetCount("四", 4.0);
     wordsToValues.SetCount("五", 5.0);
     wordsToValues.SetCount("六", 6.0);
     wordsToValues.SetCount("七", 7.0);
     wordsToValues.SetCount("八", 8.0);
     wordsToValues.SetCount("九", 9.0);
     wordsToValues.AddAll(quantityUnitToValues);
     // all units are also quantifiable individually
     multiCharCurrencyWords       = Generics.NewHashMap();
     multiCharCurrencyWords["美元"] = '$';
     multiCharCurrencyWords["美分"] = '$';
     multiCharCurrencyWords["英镑"] = '£';
     multiCharCurrencyWords["先令"] = '£';
     multiCharCurrencyWords["便士"] = '£';
     multiCharCurrencyWords["欧元"] = '€';
     multiCharCurrencyWords["日元"] = '¥';
     multiCharCurrencyWords["韩元"] = '₩';
     oneCharCurrencyWords         = Generics.NewHashMap();
     oneCharCurrencyWords["刀"]    = '$';
     oneCharCurrencyWords["镑"]    = '£';
     oneCharCurrencyWords["元"]    = '元';
     // We follow the tradition in English to use 元 instead of ¥ for RMB
     // For all other currency, we use default currency symbol $
     yearModifiers             = Generics.NewHashMap();
     yearModifiers["前"]        = -2;
     yearModifiers["去"]        = -1;
     yearModifiers["上"]        = -1;
     yearModifiers["今"]        = 0;
     yearModifiers["同"]        = 0;
     yearModifiers["此"]        = 0;
     yearModifiers["该"]        = 0;
     yearModifiers["本"]        = 0;
     yearModifiers["明"]        = 1;
     yearModifiers["来"]        = 1;
     yearModifiers["下"]        = 1;
     yearModifiers["后"]        = 2;
     monthDayModifiers         = Generics.NewHashMap();
     monthDayModifiers["昨"]    = -1;
     monthDayModifiers["上"]    = -1;
     monthDayModifiers["今"]    = 0;
     monthDayModifiers["同"]    = 0;
     monthDayModifiers["此"]    = 0;
     monthDayModifiers["该"]    = 0;
     monthDayModifiers["本"]    = 0;
     monthDayModifiers["来"]    = 1;
     monthDayModifiers["明"]    = 1;
     monthDayModifiers["下"]    = 1;
     fullDigitToHalfDigit      = Generics.NewHashMap();
     fullDigitToHalfDigit["1"] = "1";
     fullDigitToHalfDigit["2"] = "2";
     fullDigitToHalfDigit["3"] = "3";
     fullDigitToHalfDigit["4"] = "4";
     fullDigitToHalfDigit["5"] = "5";
     fullDigitToHalfDigit["6"] = "6";
     fullDigitToHalfDigit["7"] = "7";
     fullDigitToHalfDigit["8"] = "8";
     fullDigitToHalfDigit["9"] = "9";
     fullDigitToHalfDigit["0"] = "0";
 }
        public virtual ICounter <string> GetFeatures(Example example, IDictionary <int, CompressedFeatureVector> mentionFeatures, Compressor <string> compressor)
        {
            ICounter <string> features     = new ClassicCounter <string>();
            ICounter <string> pairFeatures = new ClassicCounter <string>();
            ICounter <string> features1    = new ClassicCounter <string>();
            ICounter <string> features2    = compressor.Uncompress(mentionFeatures[example.mentionId2]);

            if (!example.IsNewLink())
            {
                System.Diagnostics.Debug.Assert((!anaphoricityClassifier));
                pairFeatures = compressor.Uncompress(example.pairwiseFeatures);
                features1    = compressor.Uncompress(mentionFeatures[example.mentionId1]);
            }
            else
            {
                features2.IncrementCount("bias");
            }
            if (!disallowedPrefixes.IsEmpty())
            {
                features1    = FilterOut(features1, disallowedPrefixes);
                features2    = FilterOut(features2, disallowedPrefixes);
                pairFeatures = FilterOut(pairFeatures, disallowedPrefixes);
            }
            IList <string> ids1 = example.IsNewLink() ? new List <string>() : Identifiers(features1, example.mentionType1);
            IList <string> ids2 = Identifiers(features2, example.mentionType2);

            features.AddAll(pairFeatures);
            foreach (string id1 in ids1)
            {
                foreach (string id2 in ids2)
                {
                    if (pairConjunctions.Contains(MetaFeatureExtractor.PairConjunction.First))
                    {
                        features.AddAll(GetConjunction(pairFeatures, "_m1=" + id1));
                    }
                    if (pairConjunctions.Contains(MetaFeatureExtractor.PairConjunction.Last))
                    {
                        features.AddAll(GetConjunction(pairFeatures, "_m2=" + id2));
                    }
                    if (pairConjunctions.Contains(MetaFeatureExtractor.PairConjunction.Both))
                    {
                        features.AddAll(GetConjunction(pairFeatures, "_ms=" + id1 + "_" + id2));
                    }
                    if (singleConjunctions.Contains(MetaFeatureExtractor.SingleConjunction.Index))
                    {
                        features.AddAll(GetConjunction(features1, "_1"));
                        features.AddAll(GetConjunction(features2, "_2"));
                    }
                    if (singleConjunctions.Contains(MetaFeatureExtractor.SingleConjunction.IndexCurrent))
                    {
                        features.AddAll(GetConjunction(features1, "_1" + "_m=" + id1));
                        features.AddAll(GetConjunction(features2, "_2" + "_m=" + id2));
                    }
                    if (singleConjunctions.Contains(MetaFeatureExtractor.SingleConjunction.IndexLast))
                    {
                        features.AddAll(GetConjunction(features1, "_1" + "_m2=" + id2));
                        features.AddAll(GetConjunction(features2, "_2" + "_m2=" + id2));
                    }
                    if (singleConjunctions.Contains(MetaFeatureExtractor.SingleConjunction.IndexOther))
                    {
                        features.AddAll(GetConjunction(features1, "_1" + "_m=" + id2));
                        features.AddAll(GetConjunction(features2, "_2" + "_m=" + id1));
                    }
                    if (singleConjunctions.Contains(MetaFeatureExtractor.SingleConjunction.IndexBoth))
                    {
                        features.AddAll(GetConjunction(features1, "_1" + "_ms=" + id1 + "_" + id2));
                        features.AddAll(GetConjunction(features2, "_2" + "_ms=" + id1 + "_" + id2));
                    }
                }
            }
            if (example.IsNewLink())
            {
                features.AddAll(features2);
                features.AddAll(GetConjunction(features2, "_m=" + ids2[0]));
                ICounter <string> newFeatures = new ClassicCounter <string>();
                foreach (KeyValuePair <string, double> e in features.EntrySet())
                {
                    newFeatures.IncrementCount(e.Key + "_NEW", e.Value);
                }
                features = newFeatures;
            }
            return(features);
        }