public override sealed float Score(BasicStats stats, float tfn) { long N = stats.NumberOfDocuments; long F = stats.TotalTermFreq; return(tfn * (float)(SimilarityBase.Log2(1 + (N + 1) / (F + 0.5)))); }
public override sealed float Score(BasicStats stats, float tfn) { long N = stats.NumberOfDocuments; long n = stats.DocFreq; return(tfn * (float)(SimilarityBase.Log2((N + 1) / (n + 0.5)))); }
/// <summary> /// The generic test core called by all correctness test methods. It calls the /// <seealso cref="SimilarityBase#score(BasicStats, float, float)"/> method of all /// Similarities in <seealso cref="#sims"/> and compares the score against the manually /// computed {@code gold}. /// </summary> private void CorrectnessTestCore(SimilarityBase sim, float gold) { BasicStats stats = CreateStats(); BasicStats realStats = (BasicStats)sim.ComputeWeight(stats.TotalBoost, ToCollectionStats(stats), ToTermStats(stats)); float score = sim.Score(realStats, FREQ, DOC_LEN); Assert.AreEqual(gold, score, FLOAT_EPSILON, sim.ToString() + " score not correct."); }
public override sealed float Score(BasicStats stats, float tfn) { double F = stats.TotalTermFreq + 1 + tfn; // approximation only holds true when F << N, so we use N += F double N = F + stats.NumberOfDocuments; return((float)(-SimilarityBase.Log2((N - 1) * Math.E) + this.F(N + F - 1, N + F - tfn - 2) - this.F(F, F - tfn))); }
public override sealed float Score(BasicStats stats, float tfn) { long N = stats.NumberOfDocuments; long F = stats.TotalTermFreq; double ne = N * (1 - Math.Pow((N - 1) / (double)N, F)); return(tfn * (float)(SimilarityBase.Log2((N + 1) / (ne + 0.5)))); }
public virtual void TestIn2() { SimilarityBase sim = new DFRSimilarity(new BasicModelIn(), new AfterEffect.NoAfterEffect(), new NormalizationH2()); float tfn = (float)(FREQ * SimilarityBase.Log2(1 + AVG_FIELD_LENGTH / DOC_LEN)); // 8.1894750101 float gold = (float)(tfn * SimilarityBase.Log2((NUMBER_OF_DOCUMENTS + 1) / (DOC_FREQ + 0.5))); // 26.7459577898 CorrectnessTestCore(sim, gold); }
public virtual void TestIFB() { SimilarityBase sim = new DFRSimilarity(new BasicModelIF(), new AfterEffectB(), new Normalization.NoNormalization()); float B = (TOTAL_TERM_FREQ + 1 + 1) / ((DOC_FREQ + 1) * (FREQ + 1)); // 0.8875 float IF = (float)(FREQ * SimilarityBase.Log2(1 + (NUMBER_OF_DOCUMENTS + 1) / (TOTAL_TERM_FREQ + 0.5))); // 8.97759389642 float gold = B * IF; // 7.96761458307 CorrectnessTestCore(sim, gold); }
public override sealed float Score(BasicStats stats, float tfn) { // just like in BE, approximation only holds true when F << N, so we use lambda = F / (N + F) double F = stats.TotalTermFreq + 1; double N = stats.NumberOfDocuments; double lambda = F / (N + F); // -log(1 / (lambda + 1)) -> log(lambda + 1) return((float)(SimilarityBase.Log2(lambda + 1) + tfn * SimilarityBase.Log2((1 + lambda) / lambda))); }
public virtual void TestD() { SimilarityBase sim = new DFRSimilarity(new BasicModelD(), new AfterEffect.NoAfterEffect(), new Normalization.NoNormalization()); double totalTermFreqNorm = TOTAL_TERM_FREQ + FREQ + 1; double p = 1.0 / (NUMBER_OF_DOCUMENTS + 1); // 0.009900990099009901 double phi = FREQ / totalTermFreqNorm; // 0.08974358974358974 double D = phi * SimilarityBase.Log2(phi / p) + (1 - phi) * SimilarityBase.Log2((1 - phi) / (1 - p)); // 0.17498542370019005 float gold = (float)(totalTermFreqNorm * D + 0.5 * SimilarityBase.Log2(1 + 2 * Math.PI * FREQ * (1 - phi))); // 16.328257 CorrectnessTestCore(sim, gold); }
public virtual void TestPL2() { SimilarityBase sim = new DFRSimilarity(new BasicModelP(), new AfterEffectL(), new NormalizationH2()); float tfn = (float)(FREQ * SimilarityBase.Log2(1 + AVG_FIELD_LENGTH / DOC_LEN)); // 8.1894750101 float l = 1.0f / (tfn + 1.0f); // 0.108820144666 float lambda = (1.0f + TOTAL_TERM_FREQ) / (1f + NUMBER_OF_DOCUMENTS); // 0.7029703 float p = (float)(tfn * SimilarityBase.Log2(tfn / lambda) + (lambda + 1 / (12 * tfn) - tfn) * SimilarityBase.Log2(Math.E) + 0.5 * SimilarityBase.Log2(2 * Math.PI * tfn)); // 21.065619 float gold = l * p; // 2.2923636 CorrectnessTestCore(sim, gold); }
public override sealed float Score(BasicStats stats, float tfn) { // we have to ensure phi is always < 1 for tiny TTF values, otherwise nphi can go negative, // resulting in NaN. cleanest way is to unconditionally always add tfn to totalTermFreq // to create a 'normalized' F. double F = stats.TotalTermFreq + 1 + tfn; double phi = (double)tfn / F; double nphi = 1 - phi; double p = 1.0 / (stats.NumberOfDocuments + 1); double D = phi * SimilarityBase.Log2(phi / p) + nphi * SimilarityBase.Log2(nphi / (1 - p)); return((float)(D * F + 0.5 * SimilarityBase.Log2(1 + 2 * Math.PI * tfn * nphi))); }
public virtual void TestBEB1() { SimilarityBase sim = new DFRSimilarity(new BasicModelBE(), new AfterEffectB(), new NormalizationH1()); float tfn = FREQ * AVG_FIELD_LENGTH / DOC_LEN; // 8.75 float b = (TOTAL_TERM_FREQ + 1 + 1) / ((DOC_FREQ + 1) * (tfn + 1)); // 0.67132866 double f = TOTAL_TERM_FREQ + 1 + tfn; double n = f + NUMBER_OF_DOCUMENTS; double n1 = n + f - 1; // 258.5 double m1 = n + f - tfn - 2; // 248.75 double n2 = f; // 79.75 double m2 = f - tfn; // 71.0 float be = (float)(-SimilarityBase.Log2(n - 1) - SimilarityBase.Log2(Math.E) + ((m1 + 0.5f) * SimilarityBase.Log2(n1 / m1) + (n1 - m1) * SimilarityBase.Log2(n1)) - ((m2 + 0.5f) * SimilarityBase.Log2(n2 / m2) + (n2 - m2) * SimilarityBase.Log2(n2))); // 67.26544321004599 - 91.9620374903885 - -8.924494472554715 // 15.7720995 float gold = b * be; // 10.588263 CorrectnessTestCore(sim, gold); }
public override sealed float Tfn(BasicStats stats, float tf, float len) { return((float)(tf * SimilarityBase.Log2(1 + c * stats.AvgFieldLength / len))); }
internal BasicSimScorer(SimilarityBase outerInstance, BasicStats stats, NumericDocValues norms) { this.OuterInstance = outerInstance; this.Stats = stats; this.Norms = norms; }
public override sealed float Score(BasicStats stats, float tfn) { float lambda = (float)(stats.TotalTermFreq + 1) / (stats.NumberOfDocuments + 1); return((float)(tfn * SimilarityBase.Log2(tfn / lambda) + (lambda + 1 / (12 * tfn) - tfn) * LOG2_E + 0.5 * SimilarityBase.Log2(2 * Math.PI * tfn))); }
/// <summary> /// The <em>f</em> helper function defined for <em>B<sub>E</sub></em>. </summary> private double F(double n, double m) { return((m + 0.5) * SimilarityBase.Log2(n / m) + (n - m) * SimilarityBase.Log2(n)); }