public override sealed float Score(BasicStats stats, float tfn) { double F = stats.TotalTermFreq + 1 + tfn; // approximation only holds true when F << N, so we use N += F double N = F + stats.NumberOfDocuments; return((float)(-SimilarityBase.Log2((N - 1) * Math.E) + this.F(N + F - 1, N + F - tfn - 2) - this.F(F, F - tfn))); }
public override sealed float Score(BasicStats stats, float tfn) { long N = stats.NumberOfDocuments; long F = stats.TotalTermFreq; double ne = N * (1 - Math.Pow((N - 1) / (double)N, F)); return(tfn * (float)(SimilarityBase.Log2((N + 1) / (ne + 0.5)))); }
public override sealed float Score(BasicStats stats, float tfn) { // we have to ensure phi is always < 1 for tiny TTF values, otherwise nphi can go negative, // resulting in NaN. cleanest way is to unconditionally always add tfn to totalTermFreq // to create a 'normalized' F. double F = stats.TotalTermFreq + 1 + tfn; double phi = (double)tfn / F; double nphi = 1 - phi; double p = 1.0 / (stats.NumberOfDocuments + 1); double D = phi * SimilarityBase.Log2(phi / p) + nphi * SimilarityBase.Log2(nphi / (1 - p)); return((float)(D * F + 0.5 * SimilarityBase.Log2(1 + 2 * Math.PI * tfn * nphi))); }
/// <summary> /// The <em>f</em> helper function defined for <em>B<sub>E</sub></em>. </summary> private double F(double n, double m) { return((m + 0.5) * SimilarityBase.Log2(n / m) + (n - m) * SimilarityBase.Log2(n)); }
public override sealed float Score(BasicStats stats, float tfn) { float lambda = (float)(stats.TotalTermFreq + 1) / (stats.NumberOfDocuments + 1); return((float)(tfn * SimilarityBase.Log2(tfn / lambda) + (lambda + 1 / (12 * tfn) - tfn) * LOG2_E + 0.5 * SimilarityBase.Log2(2 * Math.PI * tfn))); }