public virtual void TestIn2() { SimilarityBase sim = new DFRSimilarity(new BasicModelIn(), new AfterEffect.NoAfterEffect(), new NormalizationH2()); float tfn = (float)(FREQ * SimilarityBase.Log2(1 + AVG_FIELD_LENGTH / DOC_LEN)); // 8.1894750101 float gold = (float)(tfn * SimilarityBase.Log2((NUMBER_OF_DOCUMENTS + 1) / (DOC_FREQ + 0.5))); // 26.7459577898 CorrectnessTestCore(sim, gold); }
public virtual void TestIFB() { SimilarityBase sim = new DFRSimilarity(new BasicModelIF(), new AfterEffectB(), new Normalization.NoNormalization()); float B = (TOTAL_TERM_FREQ + 1 + 1) / ((DOC_FREQ + 1) * (FREQ + 1)); // 0.8875 float IF = (float)(FREQ * SimilarityBase.Log2(1 + (NUMBER_OF_DOCUMENTS + 1) / (TOTAL_TERM_FREQ + 0.5))); // 8.97759389642 float gold = B * IF; // 7.96761458307 CorrectnessTestCore(sim, gold); }
public virtual void TestPL2() { SimilarityBase sim = new DFRSimilarity(new BasicModelP(), new AfterEffectL(), new NormalizationH2()); float tfn = (float)(FREQ * SimilarityBase.Log2(1 + AVG_FIELD_LENGTH / DOC_LEN)); // 8.1894750101 float l = 1.0f / (tfn + 1.0f); // 0.108820144666 float lambda = (1.0f + TOTAL_TERM_FREQ) / (1f + NUMBER_OF_DOCUMENTS); // 0.7029703 float p = (float)(tfn * SimilarityBase.Log2(tfn / lambda) + (lambda + 1 / (12 * tfn) - tfn) * SimilarityBase.Log2(Math.E) + 0.5 * SimilarityBase.Log2(2 * Math.PI * tfn)); // 21.065619 float gold = l * p; // 2.2923636 CorrectnessTestCore(sim, gold); }
public virtual void TestD() { SimilarityBase sim = new DFRSimilarity(new BasicModelD(), new AfterEffect.NoAfterEffect(), new Normalization.NoNormalization()); double totalTermFreqNorm = TOTAL_TERM_FREQ + FREQ + 1; double p = 1.0 / (NUMBER_OF_DOCUMENTS + 1); // 0.009900990099009901 double phi = FREQ / totalTermFreqNorm; // 0.08974358974358974 double D = phi * SimilarityBase.Log2(phi / p) + (1 - phi) * SimilarityBase.Log2((1 - phi) / (1 - p)); // 0.17498542370019005 float gold = (float)(totalTermFreqNorm * D + 0.5 * SimilarityBase.Log2(1 + 2 * Math.PI * FREQ * (1 - phi))); // 16.328257 CorrectnessTestCore(sim, gold); }
public virtual void TestDiscountOverlapsBoost() { DefaultSimilarity expected = new DefaultSimilarity(); SimilarityBase actual = new DFRSimilarity(new BasicModelIne(), new AfterEffectB(), new NormalizationH2()); expected.DiscountOverlaps = false; actual.DiscountOverlaps = false; FieldInvertState state = new FieldInvertState("foo"); state.Length = 5; state.NumOverlap = 2; state.Boost = 3; Assert.AreEqual(expected.ComputeNorm(state), actual.ComputeNorm(state)); expected.DiscountOverlaps = true; actual.DiscountOverlaps = true; Assert.AreEqual(expected.ComputeNorm(state), actual.ComputeNorm(state)); }
public virtual void TestBEB1() { SimilarityBase sim = new DFRSimilarity(new BasicModelBE(), new AfterEffectB(), new NormalizationH1()); float tfn = FREQ * AVG_FIELD_LENGTH / DOC_LEN; // 8.75 float b = (TOTAL_TERM_FREQ + 1 + 1) / ((DOC_FREQ + 1) * (tfn + 1)); // 0.67132866 double f = TOTAL_TERM_FREQ + 1 + tfn; double n = f + NUMBER_OF_DOCUMENTS; double n1 = n + f - 1; // 258.5 double m1 = n + f - tfn - 2; // 248.75 double n2 = f; // 79.75 double m2 = f - tfn; // 71.0 float be = (float)(-SimilarityBase.Log2(n - 1) - SimilarityBase.Log2(Math.E) + ((m1 + 0.5f) * SimilarityBase.Log2(n1 / m1) + (n1 - m1) * SimilarityBase.Log2(n1)) - ((m2 + 0.5f) * SimilarityBase.Log2(n2 / m2) + (n2 - m2) * SimilarityBase.Log2(n2))); // 67.26544321004599 - 91.9620374903885 - -8.924494472554715 // 15.7720995 float gold = b * be; // 10.588263 CorrectnessTestCore(sim, gold); }
public virtual void TestGL1() { SimilarityBase sim = new DFRSimilarity(new BasicModelG(), new AfterEffectL(), new NormalizationH1()); CorrectnessTestCore(sim, 1.6390540599822998f); }
public virtual void TestIneB2() { SimilarityBase sim = new DFRSimilarity(new BasicModelIne(), new AfterEffectB(), new NormalizationH2()); CorrectnessTestCore(sim, 5.747603416442871f); }