Beispiel #1
0
        public override sealed float Score(BasicStats stats, float tfn)
        {
            long N = stats.NumberOfDocuments;
            long F = stats.TotalTermFreq;

            return(tfn * (float)(SimilarityBase.Log2(1 + (N + 1) / (F + 0.5))));
        }
Beispiel #2
0
        public override sealed float Score(BasicStats stats, float tfn)
        {
            long N = stats.NumberOfDocuments;
            long n = stats.DocFreq;

            return(tfn * (float)(SimilarityBase.Log2((N + 1) / (n + 0.5))));
        }
Beispiel #3
0
        /// <summary>
        /// The generic test core called by all correctness test methods. It calls the
        /// <seealso cref="SimilarityBase#score(BasicStats, float, float)"/> method of all
        /// Similarities in <seealso cref="#sims"/> and compares the score against the manually
        /// computed {@code gold}.
        /// </summary>
        private void CorrectnessTestCore(SimilarityBase sim, float gold)
        {
            BasicStats stats     = CreateStats();
            BasicStats realStats = (BasicStats)sim.ComputeWeight(stats.TotalBoost, ToCollectionStats(stats), ToTermStats(stats));
            float      score     = sim.Score(realStats, FREQ, DOC_LEN);

            Assert.AreEqual(gold, score, FLOAT_EPSILON, sim.ToString() + " score not correct.");
        }
Beispiel #4
0
        public override sealed float Score(BasicStats stats, float tfn)
        {
            double F = stats.TotalTermFreq + 1 + tfn;
            // approximation only holds true when F << N, so we use N += F
            double N = F + stats.NumberOfDocuments;

            return((float)(-SimilarityBase.Log2((N - 1) * Math.E) + this.F(N + F - 1, N + F - tfn - 2) - this.F(F, F - tfn)));
        }
Beispiel #5
0
        public override sealed float Score(BasicStats stats, float tfn)
        {
            long   N  = stats.NumberOfDocuments;
            long   F  = stats.TotalTermFreq;
            double ne = N * (1 - Math.Pow((N - 1) / (double)N, F));

            return(tfn * (float)(SimilarityBase.Log2((N + 1) / (ne + 0.5))));
        }
Beispiel #6
0
        public virtual void TestIn2()
        {
            SimilarityBase sim  = new DFRSimilarity(new BasicModelIn(), new AfterEffect.NoAfterEffect(), new NormalizationH2());
            float          tfn  = (float)(FREQ * SimilarityBase.Log2(1 + AVG_FIELD_LENGTH / DOC_LEN));              // 8.1894750101
            float          gold = (float)(tfn * SimilarityBase.Log2((NUMBER_OF_DOCUMENTS + 1) / (DOC_FREQ + 0.5))); // 26.7459577898

            CorrectnessTestCore(sim, gold);
        }
Beispiel #7
0
        public virtual void TestIFB()
        {
            SimilarityBase sim  = new DFRSimilarity(new BasicModelIF(), new AfterEffectB(), new Normalization.NoNormalization());
            float          B    = (TOTAL_TERM_FREQ + 1 + 1) / ((DOC_FREQ + 1) * (FREQ + 1));                                    // 0.8875
            float          IF   = (float)(FREQ * SimilarityBase.Log2(1 + (NUMBER_OF_DOCUMENTS + 1) / (TOTAL_TERM_FREQ + 0.5))); // 8.97759389642
            float          gold = B * IF;                                                                                       // 7.96761458307

            CorrectnessTestCore(sim, gold);
        }
Beispiel #8
0
        public override sealed float Score(BasicStats stats, float tfn)
        {
            // just like in BE, approximation only holds true when F << N, so we use lambda = F / (N + F)
            double F      = stats.TotalTermFreq + 1;
            double N      = stats.NumberOfDocuments;
            double lambda = F / (N + F);

            // -log(1 / (lambda + 1)) -> log(lambda + 1)
            return((float)(SimilarityBase.Log2(lambda + 1) + tfn * SimilarityBase.Log2((1 + lambda) / lambda)));
        }
Beispiel #9
0
        public virtual void TestD()
        {
            SimilarityBase sim = new DFRSimilarity(new BasicModelD(), new AfterEffect.NoAfterEffect(), new Normalization.NoNormalization());
            double         totalTermFreqNorm = TOTAL_TERM_FREQ + FREQ + 1;
            double         p    = 1.0 / (NUMBER_OF_DOCUMENTS + 1);                                                                // 0.009900990099009901
            double         phi  = FREQ / totalTermFreqNorm;                                                                       // 0.08974358974358974
            double         D    = phi * SimilarityBase.Log2(phi / p) + (1 - phi) * SimilarityBase.Log2((1 - phi) / (1 - p));      // 0.17498542370019005
            float          gold = (float)(totalTermFreqNorm * D + 0.5 * SimilarityBase.Log2(1 + 2 * Math.PI * FREQ * (1 - phi))); // 16.328257

            CorrectnessTestCore(sim, gold);
        }
Beispiel #10
0
        public virtual void TestPL2()
        {
            SimilarityBase sim    = new DFRSimilarity(new BasicModelP(), new AfterEffectL(), new NormalizationH2());
            float          tfn    = (float)(FREQ * SimilarityBase.Log2(1 + AVG_FIELD_LENGTH / DOC_LEN));                                                                                             // 8.1894750101
            float          l      = 1.0f / (tfn + 1.0f);                                                                                                                                             // 0.108820144666
            float          lambda = (1.0f + TOTAL_TERM_FREQ) / (1f + NUMBER_OF_DOCUMENTS);                                                                                                           // 0.7029703
            float          p      = (float)(tfn * SimilarityBase.Log2(tfn / lambda) + (lambda + 1 / (12 * tfn) - tfn) * SimilarityBase.Log2(Math.E) + 0.5 * SimilarityBase.Log2(2 * Math.PI * tfn)); // 21.065619
            float          gold   = l * p;                                                                                                                                                           // 2.2923636

            CorrectnessTestCore(sim, gold);
        }
Beispiel #11
0
        public override sealed float Score(BasicStats stats, float tfn)
        {
            // we have to ensure phi is always < 1 for tiny TTF values, otherwise nphi can go negative,
            // resulting in NaN. cleanest way is to unconditionally always add tfn to totalTermFreq
            // to create a 'normalized' F.
            double F    = stats.TotalTermFreq + 1 + tfn;
            double phi  = (double)tfn / F;
            double nphi = 1 - phi;
            double p    = 1.0 / (stats.NumberOfDocuments + 1);
            double D    = phi * SimilarityBase.Log2(phi / p) + nphi * SimilarityBase.Log2(nphi / (1 - p));

            return((float)(D * F + 0.5 * SimilarityBase.Log2(1 + 2 * Math.PI * tfn * nphi)));
        }
Beispiel #12
0
        public virtual void TestBEB1()
        {
            SimilarityBase sim = new DFRSimilarity(new BasicModelBE(), new AfterEffectB(), new NormalizationH1());
            float          tfn = FREQ * AVG_FIELD_LENGTH / DOC_LEN;                        // 8.75
            float          b   = (TOTAL_TERM_FREQ + 1 + 1) / ((DOC_FREQ + 1) * (tfn + 1)); // 0.67132866
            double         f   = TOTAL_TERM_FREQ + 1 + tfn;
            double         n   = f + NUMBER_OF_DOCUMENTS;
            double         n1  = n + f - 1;                                                                                                                                                                                                                                    // 258.5
            double         m1  = n + f - tfn - 2;                                                                                                                                                                                                                              // 248.75
            double         n2  = f;                                                                                                                                                                                                                                            // 79.75
            double         m2  = f - tfn;                                                                                                                                                                                                                                      // 71.0
            float          be  = (float)(-SimilarityBase.Log2(n - 1) - SimilarityBase.Log2(Math.E) + ((m1 + 0.5f) * SimilarityBase.Log2(n1 / m1) + (n1 - m1) * SimilarityBase.Log2(n1)) - ((m2 + 0.5f) * SimilarityBase.Log2(n2 / m2) + (n2 - m2) * SimilarityBase.Log2(n2))); // 67.26544321004599 -  91.9620374903885 -  -8.924494472554715
            // 15.7720995
            float gold = b * be;                                                                                                                                                                                                                                               // 10.588263

            CorrectnessTestCore(sim, gold);
        }
Beispiel #13
0
 public override sealed float Tfn(BasicStats stats, float tf, float len)
 {
     return((float)(tf * SimilarityBase.Log2(1 + c * stats.AvgFieldLength / len)));
 }
 /// <summary>
 /// The generic test core called by all correctness test methods. It calls the
 /// <seealso cref="SimilarityBase#score(BasicStats, float, float)"/> method of all
 /// Similarities in <seealso cref="#sims"/> and compares the score against the manually
 /// computed {@code gold}.
 /// </summary>
 private void CorrectnessTestCore(SimilarityBase sim, float gold)
 {
     BasicStats stats = CreateStats();
     BasicStats realStats = (BasicStats)sim.ComputeWeight(stats.TotalBoost, ToCollectionStats(stats), ToTermStats(stats));
     float score = sim.Score(realStats, FREQ, DOC_LEN);
     Assert.AreEqual(gold, score, FLOAT_EPSILON, sim.ToString() + " score not correct.");
 }
Beispiel #15
0
 internal BasicSimScorer(SimilarityBase outerInstance, BasicStats stats, NumericDocValues norms)
 {
     this.OuterInstance = outerInstance;
     this.Stats         = stats;
     this.Norms         = norms;
 }
Beispiel #16
0
        public override sealed float Score(BasicStats stats, float tfn)
        {
            float lambda = (float)(stats.TotalTermFreq + 1) / (stats.NumberOfDocuments + 1);

            return((float)(tfn * SimilarityBase.Log2(tfn / lambda) + (lambda + 1 / (12 * tfn) - tfn) * LOG2_E + 0.5 * SimilarityBase.Log2(2 * Math.PI * tfn)));
        }
Beispiel #17
0
 /// <summary>
 /// The <em>f</em> helper function defined for <em>B<sub>E</sub></em>. </summary>
 private double F(double n, double m)
 {
     return((m + 0.5) * SimilarityBase.Log2(n / m) + (n - m) * SimilarityBase.Log2(n));
 }
Beispiel #18
0
 internal BasicSimScorer(SimilarityBase outerInstance, BasicStats stats, NumericDocValues norms)
 {
     this.OuterInstance = outerInstance;
     this.Stats = stats;
     this.Norms = norms;
 }