Пример #1
0
        public virtual void TestIn2()
        {
            SimilarityBase sim  = new DFRSimilarity(new BasicModelIn(), new AfterEffect.NoAfterEffect(), new NormalizationH2());
            float          tfn  = (float)(FREQ * SimilarityBase.Log2(1 + AVG_FIELD_LENGTH / DOC_LEN));              // 8.1894750101
            float          gold = (float)(tfn * SimilarityBase.Log2((NUMBER_OF_DOCUMENTS + 1) / (DOC_FREQ + 0.5))); // 26.7459577898

            CorrectnessTestCore(sim, gold);
        }
Пример #2
0
        public virtual void TestIFB()
        {
            SimilarityBase sim  = new DFRSimilarity(new BasicModelIF(), new AfterEffectB(), new Normalization.NoNormalization());
            float          B    = (TOTAL_TERM_FREQ + 1 + 1) / ((DOC_FREQ + 1) * (FREQ + 1));                                    // 0.8875
            float          IF   = (float)(FREQ * SimilarityBase.Log2(1 + (NUMBER_OF_DOCUMENTS + 1) / (TOTAL_TERM_FREQ + 0.5))); // 8.97759389642
            float          gold = B * IF;                                                                                       // 7.96761458307

            CorrectnessTestCore(sim, gold);
        }
Пример #3
0
        public virtual void TestPL2()
        {
            SimilarityBase sim    = new DFRSimilarity(new BasicModelP(), new AfterEffectL(), new NormalizationH2());
            float          tfn    = (float)(FREQ * SimilarityBase.Log2(1 + AVG_FIELD_LENGTH / DOC_LEN));                                                                                             // 8.1894750101
            float          l      = 1.0f / (tfn + 1.0f);                                                                                                                                             // 0.108820144666
            float          lambda = (1.0f + TOTAL_TERM_FREQ) / (1f + NUMBER_OF_DOCUMENTS);                                                                                                           // 0.7029703
            float          p      = (float)(tfn * SimilarityBase.Log2(tfn / lambda) + (lambda + 1 / (12 * tfn) - tfn) * SimilarityBase.Log2(Math.E) + 0.5 * SimilarityBase.Log2(2 * Math.PI * tfn)); // 21.065619
            float          gold   = l * p;                                                                                                                                                           // 2.2923636

            CorrectnessTestCore(sim, gold);
        }
Пример #4
0
        public virtual void TestD()
        {
            SimilarityBase sim = new DFRSimilarity(new BasicModelD(), new AfterEffect.NoAfterEffect(), new Normalization.NoNormalization());
            double         totalTermFreqNorm = TOTAL_TERM_FREQ + FREQ + 1;
            double         p    = 1.0 / (NUMBER_OF_DOCUMENTS + 1);                                                                // 0.009900990099009901
            double         phi  = FREQ / totalTermFreqNorm;                                                                       // 0.08974358974358974
            double         D    = phi * SimilarityBase.Log2(phi / p) + (1 - phi) * SimilarityBase.Log2((1 - phi) / (1 - p));      // 0.17498542370019005
            float          gold = (float)(totalTermFreqNorm * D + 0.5 * SimilarityBase.Log2(1 + 2 * Math.PI * FREQ * (1 - phi))); // 16.328257

            CorrectnessTestCore(sim, gold);
        }
Пример #5
0
        public virtual void TestDiscountOverlapsBoost()
        {
            DefaultSimilarity expected = new DefaultSimilarity();
            SimilarityBase    actual   = new DFRSimilarity(new BasicModelIne(), new AfterEffectB(), new NormalizationH2());

            expected.DiscountOverlaps = false;
            actual.DiscountOverlaps   = false;
            FieldInvertState state = new FieldInvertState("foo");

            state.Length     = 5;
            state.NumOverlap = 2;
            state.Boost      = 3;
            Assert.AreEqual(expected.ComputeNorm(state), actual.ComputeNorm(state));
            expected.DiscountOverlaps = true;
            actual.DiscountOverlaps   = true;
            Assert.AreEqual(expected.ComputeNorm(state), actual.ComputeNorm(state));
        }
Пример #6
0
        public virtual void TestBEB1()
        {
            SimilarityBase sim = new DFRSimilarity(new BasicModelBE(), new AfterEffectB(), new NormalizationH1());
            float          tfn = FREQ * AVG_FIELD_LENGTH / DOC_LEN;                        // 8.75
            float          b   = (TOTAL_TERM_FREQ + 1 + 1) / ((DOC_FREQ + 1) * (tfn + 1)); // 0.67132866
            double         f   = TOTAL_TERM_FREQ + 1 + tfn;
            double         n   = f + NUMBER_OF_DOCUMENTS;
            double         n1  = n + f - 1;                                                                                                                                                                                                                                    // 258.5
            double         m1  = n + f - tfn - 2;                                                                                                                                                                                                                              // 248.75
            double         n2  = f;                                                                                                                                                                                                                                            // 79.75
            double         m2  = f - tfn;                                                                                                                                                                                                                                      // 71.0
            float          be  = (float)(-SimilarityBase.Log2(n - 1) - SimilarityBase.Log2(Math.E) + ((m1 + 0.5f) * SimilarityBase.Log2(n1 / m1) + (n1 - m1) * SimilarityBase.Log2(n1)) - ((m2 + 0.5f) * SimilarityBase.Log2(n2 / m2) + (n2 - m2) * SimilarityBase.Log2(n2))); // 67.26544321004599 -  91.9620374903885 -  -8.924494472554715
            // 15.7720995
            float gold = b * be;                                                                                                                                                                                                                                               // 10.588263

            CorrectnessTestCore(sim, gold);
        }
Пример #7
0
        public virtual void TestGL1()
        {
            SimilarityBase sim = new DFRSimilarity(new BasicModelG(), new AfterEffectL(), new NormalizationH1());

            CorrectnessTestCore(sim, 1.6390540599822998f);
        }
Пример #8
0
        public virtual void TestIneB2()
        {
            SimilarityBase sim = new DFRSimilarity(new BasicModelIne(), new AfterEffectB(), new NormalizationH2());

            CorrectnessTestCore(sim, 5.747603416442871f);
        }
Пример #9
0
 public virtual void TestPL2()
 {
     SimilarityBase sim = new DFRSimilarity(new BasicModelP(), new AfterEffectL(), new NormalizationH2());
     float tfn = (float)(FREQ * SimilarityBase.Log2(1 + AVG_FIELD_LENGTH / DOC_LEN)); // 8.1894750101
     float l = 1.0f / (tfn + 1.0f); // 0.108820144666
     float lambda = (1.0f + TOTAL_TERM_FREQ) / (1f + NUMBER_OF_DOCUMENTS); // 0.7029703
     float p = (float)(tfn * SimilarityBase.Log2(tfn / lambda) + (lambda + 1 / (12 * tfn) - tfn) * SimilarityBase.Log2(Math.E) + 0.5 * SimilarityBase.Log2(2 * Math.PI * tfn)); // 21.065619
     float gold = l * p; // 2.2923636
     CorrectnessTestCore(sim, gold);
 }
Пример #10
0
 public virtual void TestIn2()
 {
     SimilarityBase sim = new DFRSimilarity(new BasicModelIn(), new AfterEffect.NoAfterEffect(), new NormalizationH2());
     float tfn = (float)(FREQ * SimilarityBase.Log2(1 + AVG_FIELD_LENGTH / DOC_LEN)); // 8.1894750101
     float gold = (float)(tfn * SimilarityBase.Log2((NUMBER_OF_DOCUMENTS + 1) / (DOC_FREQ + 0.5))); // 26.7459577898
     CorrectnessTestCore(sim, gold);
 }
Пример #11
0
 public virtual void TestIneB2()
 {
     SimilarityBase sim = new DFRSimilarity(new BasicModelIne(), new AfterEffectB(), new NormalizationH2());
     CorrectnessTestCore(sim, 5.747603416442871f);
 }
Пример #12
0
 public virtual void TestIFB()
 {
     SimilarityBase sim = new DFRSimilarity(new BasicModelIF(), new AfterEffectB(), new Normalization.NoNormalization());
     float B = (TOTAL_TERM_FREQ + 1 + 1) / ((DOC_FREQ + 1) * (FREQ + 1)); // 0.8875
     float IF = (float)(FREQ * SimilarityBase.Log2(1 + (NUMBER_OF_DOCUMENTS + 1) / (TOTAL_TERM_FREQ + 0.5))); // 8.97759389642
     float gold = B * IF; // 7.96761458307
     CorrectnessTestCore(sim, gold);
 }
Пример #13
0
 public virtual void TestGL1()
 {
     SimilarityBase sim = new DFRSimilarity(new BasicModelG(), new AfterEffectL(), new NormalizationH1());
     CorrectnessTestCore(sim, 1.6390540599822998f);
 }
Пример #14
0
 public virtual void TestDiscountOverlapsBoost()
 {
     DefaultSimilarity expected = new DefaultSimilarity();
     SimilarityBase actual = new DFRSimilarity(new BasicModelIne(), new AfterEffectB(), new NormalizationH2());
     expected.DiscountOverlaps = false;
     actual.DiscountOverlaps = false;
     FieldInvertState state = new FieldInvertState("foo");
     state.Length = 5;
     state.NumOverlap = 2;
     state.Boost = 3;
     Assert.AreEqual(expected.ComputeNorm(state), actual.ComputeNorm(state));
     expected.DiscountOverlaps = true;
     actual.DiscountOverlaps = true;
     Assert.AreEqual(expected.ComputeNorm(state), actual.ComputeNorm(state));
 }
Пример #15
0
 public virtual void TestD()
 {
     SimilarityBase sim = new DFRSimilarity(new BasicModelD(), new AfterEffect.NoAfterEffect(), new Normalization.NoNormalization());
     double totalTermFreqNorm = TOTAL_TERM_FREQ + FREQ + 1;
     double p = 1.0 / (NUMBER_OF_DOCUMENTS + 1); // 0.009900990099009901
     double phi = FREQ / totalTermFreqNorm; // 0.08974358974358974
     double D = phi * SimilarityBase.Log2(phi / p) + (1 - phi) * SimilarityBase.Log2((1 - phi) / (1 - p)); // 0.17498542370019005
     float gold = (float)(totalTermFreqNorm * D + 0.5 * SimilarityBase.Log2(1 + 2 * Math.PI * FREQ * (1 - phi))); // 16.328257
     CorrectnessTestCore(sim, gold);
 }
Пример #16
0
 public virtual void TestBEB1()
 {
     SimilarityBase sim = new DFRSimilarity(new BasicModelBE(), new AfterEffectB(), new NormalizationH1());
     float tfn = FREQ * AVG_FIELD_LENGTH / DOC_LEN; // 8.75
     float b = (TOTAL_TERM_FREQ + 1 + 1) / ((DOC_FREQ + 1) * (tfn + 1)); // 0.67132866
     double f = TOTAL_TERM_FREQ + 1 + tfn;
     double n = f + NUMBER_OF_DOCUMENTS;
     double n1 = n + f - 1; // 258.5
     double m1 = n + f - tfn - 2; // 248.75
     double n2 = f; // 79.75
     double m2 = f - tfn; // 71.0
     float be = (float)(-SimilarityBase.Log2(n - 1) - SimilarityBase.Log2(Math.E) + ((m1 + 0.5f) * SimilarityBase.Log2(n1 / m1) + (n1 - m1) * SimilarityBase.Log2(n1)) - ((m2 + 0.5f) * SimilarityBase.Log2(n2 / m2) + (n2 - m2) * SimilarityBase.Log2(n2))); // 67.26544321004599 -  91.9620374903885 -  -8.924494472554715
     // 15.7720995
     float gold = b * be; // 10.588263
     CorrectnessTestCore(sim, gold);
 }