Пример #1
0
 public override sealed float Score(BasicStats stats, float tfn)
 {
     long N = stats.NumberOfDocuments;
     long F = stats.TotalTermFreq;
     double ne = N * (1 - Math.Pow((N - 1) / (double)N, F));
     return tfn * (float)(SimilarityBase.Log2((N + 1) / (ne + 0.5)));
 }
Пример #2
0
 public override sealed float Score(BasicStats stats, float tfn)
 {
     double F = stats.TotalTermFreq + 1 + tfn;
     // approximation only holds true when F << N, so we use N += F
     double N = F + stats.NumberOfDocuments;
     return (float)(-SimilarityBase.Log2((N - 1) * Math.E) + f(N + F - 1, N + F - tfn - 2) - f(F, F - tfn));
 }
Пример #3
0
 public override sealed float Score(BasicStats stats, float tfn, float lambda)
 {
     if (lambda == 1f)
     {
         lambda = 0.99f;
     }
     return (float)-Math.Log((Math.Pow(lambda, (tfn / (tfn + 1))) - lambda) / (1 - lambda));
 }
Пример #4
0
        public virtual void TestVerySparseDocuments()
        {
            BasicStats stats = CreateStats();

            stats.NumberOfFieldTokens = stats.NumberOfDocuments * 2 / 3;
            stats.TotalTermFreq       = stats.DocFreq;
            stats.AvgFieldLength      = (float)stats.NumberOfFieldTokens / stats.NumberOfDocuments;
            UnitTestCore(stats, FREQ, DOC_LEN);
        }
Пример #5
0
        public virtual void TestMostDocumentsRelevant()
        {
            BasicStats stats = CreateStats();
            float      mult  = (0.6f * stats.NumberOfDocuments) / stats.DocFreq;

            stats.TotalTermFreq = (int)(stats.TotalTermFreq * mult);
            stats.DocFreq       = (int)(stats.NumberOfDocuments * 0.6);
            UnitTestCore(stats, FREQ, DOC_LEN);
        }
Пример #6
0
        public virtual void TestAllTermsRelevant()
        {
            BasicStats stats = CreateStats();

            stats.TotalTermFreq = stats.NumberOfFieldTokens;
            UnitTestCore(stats, DOC_LEN, DOC_LEN);
            stats.AvgFieldLength = DOC_LEN + 10;
            UnitTestCore(stats, DOC_LEN, DOC_LEN);
        }
 protected internal override void Explain(Explanation expl, BasicStats stats, int doc, float freq, float docLen)
 {
     if (stats.TotalBoost != 1.0f)
     {
         expl.AddDetail(new Explanation(stats.TotalBoost, "boost"));
     }
     expl.AddDetail(new Explanation(lambda, "lambda"));
     base.Explain(expl, stats, doc, freq, docLen);
 }
Пример #8
0
        public override sealed Explanation Explain(BasicStats stats, float tfn)
        {
            Explanation result = new Explanation();

            result.Description = this.GetType().Name + ", computed from: ";
            result.Value       = Score(stats, tfn);
            result.AddDetail(new Explanation(tfn, "tfn"));
            return(result);
        }
Пример #9
0
        public override sealed float Score(BasicStats stats, float tfn)
        {
            // just like in BE, approximation only holds true when F << N, so we use lambda = F / (N + F)
            double F      = stats.TotalTermFreq + 1;
            double N      = stats.NumberOfDocuments;
            double lambda = F / (N + F);

            // -log(1 / (lambda + 1)) -> log(lambda + 1)
            return((float)(SimilarityBase.Log2(lambda + 1) + tfn * SimilarityBase.Log2((1 + lambda) / lambda)));
        }
Пример #10
0
        public virtual void TestOnlyOneTerm()
        {
            BasicStats stats = CreateStats();

            stats.NumberOfFieldTokens = 1;
            stats.AvgFieldLength      = 1.0f / stats.NumberOfDocuments;
            stats.DocFreq             = 1;
            stats.TotalTermFreq       = 1;
            UnitTestCore(stats, 1, DOC_LEN);
        }
Пример #11
0
 public override sealed SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats)
 {
     BasicStats[] stats = new BasicStats[termStats.Length];
     for (int i = 0; i < termStats.Length; i++)
     {
         stats[i] = NewStats(collectionStats.Field(), queryBoost);
         FillBasicStats(stats[i], collectionStats, termStats[i]);
     }
     return(stats.Length == 1 ? stats[0] : new MultiSimilarity.MultiStats(stats) as SimWeight);
 }
Пример #12
0
        public override sealed Explanation Explain(BasicStats stats)
        {
            Explanation result = new Explanation();

            result.Description = this.GetType().Name + ", computed from: ";
            result.Value       = CalculateLambda(stats);
            result.AddDetail(new Explanation(stats.TotalTermFreq, "totalTermFreq"));
            result.AddDetail(new Explanation(stats.NumberOfDocuments, "numberOfDocuments"));
            return(result);
        }
Пример #13
0
 public override sealed SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats)
 {
     BasicStats[] stats = new BasicStats[termStats.Length];
     for (int i = 0; i < termStats.Length; i++)
     {
         stats[i] = NewStats(collectionStats.Field(), queryBoost);
         FillBasicStats(stats[i], collectionStats, termStats[i]);
     }
     return stats.Length == 1 ? stats[0] : new MultiSimilarity.MultiStats(stats) as SimWeight;
 }
Пример #14
0
        /// <summary>
        /// Returns an explanation for the normalized term frequency.
        /// <p>The default normalization methods use the field length of the document
        /// and the average field length to compute the normalized term frequency.
        /// this method provides a generic explanation for such methods.
        /// Subclasses that use other statistics must override this method.</p>
        /// </summary>
        public virtual Explanation Explain(BasicStats stats, float tf, float len)
        {
            Explanation result = new Explanation();

            result.Description = this.GetType().Name + ", computed from: ";
            result.Value       = Tfn(stats, tf, len);
            result.AddDetail(new Explanation(tf, "tf"));
            result.AddDetail(new Explanation(stats.AvgFieldLength, "avgFieldLength"));
            result.AddDetail(new Explanation(len, "len"));
            return(result);
        }
Пример #15
0
        /// <summary>
        /// Creates the default statistics object that the specific tests modify. </summary>
        private BasicStats CreateStats()
        {
            BasicStats stats = new BasicStats("spoof", 1);

            stats.NumberOfDocuments   = NUMBER_OF_DOCUMENTS;
            stats.NumberOfFieldTokens = NUMBER_OF_FIELD_TOKENS;
            stats.AvgFieldLength      = AVG_FIELD_LENGTH;
            stats.DocFreq             = DOC_FREQ;
            stats.TotalTermFreq       = TOTAL_TERM_FREQ;
            return(stats);
        }
Пример #16
0
        public virtual void TestAllTermsRelevantOnlyOneDocument()
        {
            BasicStats stats = CreateStats();

            stats.NumberOfDocuments   = 10;
            stats.NumberOfFieldTokens = 50;
            stats.AvgFieldLength      = 5;
            stats.DocFreq             = 1;
            stats.TotalTermFreq       = 50;
            UnitTestCore(stats, 50, 50);
        }
Пример #17
0
        /// <summary>
        /// Returns an explanation for the score.
        /// <para>Most basic models use the number of documents and the total term
        /// frequency to compute Inf<sub>1</sub>. this method provides a generic
        /// explanation for such models. Subclasses that use other statistics must
        /// override this method.</para>
        /// </summary>
        public virtual Explanation Explain(BasicStats stats, float tfn)
        {
            Explanation result = new Explanation();

            result.Description = this.GetType().Name + ", computed from: ";
            result.Value       = Score(stats, tfn);
            result.AddDetail(new Explanation(tfn, "tfn"));
            result.AddDetail(new Explanation(stats.NumberOfDocuments, "numberOfDocuments"));
            result.AddDetail(new Explanation(stats.TotalTermFreq, "totalTermFreq"));
            return(result);
        }
Пример #18
0
        public virtual void TestOneDocument()
        {
            BasicStats stats = CreateStats();

            stats.NumberOfDocuments   = 1;
            stats.NumberOfFieldTokens = DOC_LEN;
            stats.AvgFieldLength      = DOC_LEN;
            stats.DocFreq             = 1;
            stats.TotalTermFreq       = (int)FREQ;
            UnitTestCore(stats, FREQ, DOC_LEN);
        }
Пример #19
0
        /// <summary>
        /// Explains the score. The implementation here provides a basic explanation
        /// in the format <em>score(name-of-similarity, doc=doc-id,
        /// freq=term-frequency), computed from:</em>, and
        /// attaches the score (computed via the <seealso cref="#score(BasicStats, float, float)"/>
        /// method) and the explanation for the term frequency. Subclasses content with
        /// this format may add additional details in
        /// <seealso cref="#explain(Explanation, BasicStats, int, float, float)"/>.
        /// </summary>
        /// <param name="stats"> the corpus level statistics. </param>
        /// <param name="doc"> the document id. </param>
        /// <param name="freq"> the term frequency and its explanation. </param>
        /// <param name="docLen"> the document length. </param>
        /// <returns> the explanation. </returns>
        public virtual Explanation Explain(BasicStats stats, int doc, Explanation freq, float docLen)
        {
            Explanation result = new Explanation();

            result.Value       = Score(stats, freq.Value, docLen);
            result.Description = "score(" + this.GetType().Name + ", doc=" + doc + ", freq=" + freq.Value + "), computed from:";
            result.AddDetail(freq);

            Explain(result, stats, doc, freq.Value, docLen);

            return(result);
        }
Пример #20
0
 public override sealed float Score(BasicStats stats, float tfn)
 {
     // we have to ensure phi is always < 1 for tiny TTF values, otherwise nphi can go negative,
     // resulting in NaN. cleanest way is to unconditionally always add tfn to totalTermFreq
     // to create a 'normalized' F.
     double F = stats.TotalTermFreq + 1 + tfn;
     double phi = (double)tfn / F;
     double nphi = 1 - phi;
     double p = 1.0 / (stats.NumberOfDocuments + 1);
     double D = phi * SimilarityBase.Log2(phi / p) + nphi * SimilarityBase.Log2(nphi / (1 - p));
     return (float)(D * F + 0.5 * SimilarityBase.Log2(1 + 2 * Math.PI * tfn * nphi));
 }
Пример #21
0
 /// <summary>
 /// The generic test core called by all unit test methods. It calls the
 /// <seealso cref="SimilarityBase#score(BasicStats, float, float)"/> method of all
 /// Similarities in <seealso cref="#sims"/> and checks if the score is valid; i.e. it
 /// is a finite positive real number.
 /// </summary>
 private void UnitTestCore(BasicStats stats, float freq, int docLen)
 {
     foreach (SimilarityBase sim in Sims)
     {
         BasicStats realStats = (BasicStats)sim.ComputeWeight(stats.TotalBoost, ToCollectionStats(stats), ToTermStats(stats));
         float      score     = sim.Score(realStats, freq, docLen);
         float      explScore = sim.Explain(realStats, 1, new Explanation(freq, "freq"), docLen).Value;
         Assert.IsFalse(float.IsInfinity(score), "Score infinite: " + sim.ToString());
         Assert.IsFalse(float.IsNaN(score), "Score NaN: " + sim.ToString());
         Assert.IsTrue(score >= 0, "Score negative: " + sim.ToString());
         Assert.AreEqual(score, explScore, FLOAT_EPSILON, "score() and explain() return different values: " + sim.ToString());
     }
 }
Пример #22
0
        public override sealed float Score(BasicStats stats, float tfn)
        {
            // we have to ensure phi is always < 1 for tiny TTF values, otherwise nphi can go negative,
            // resulting in NaN. cleanest way is to unconditionally always add tfn to totalTermFreq
            // to create a 'normalized' F.
            double F    = stats.TotalTermFreq + 1 + tfn;
            double phi  = (double)tfn / F;
            double nphi = 1 - phi;
            double p    = 1.0 / (stats.NumberOfDocuments + 1);
            double D    = phi * SimilarityBase.Log2(phi / p) + nphi * SimilarityBase.Log2(nphi / (1 - p));

            return((float)(D * F + 0.5 * SimilarityBase.Log2(1 + 2 * Math.PI * tfn * nphi)));
        }
Пример #23
0
        protected internal override void Explain(Explanation expl, BasicStats stats, int doc, float freq, float docLen)
        {
            if (stats.TotalBoost != 1.0f)
            {
                expl.AddDetail(new Explanation(stats.TotalBoost, "boost"));
            }
            Explanation normExpl   = m_normalization.Explain(stats, freq, docLen);
            Explanation lambdaExpl = m_lambda.Explain(stats);

            expl.AddDetail(normExpl);
            expl.AddDetail(lambdaExpl);
            expl.AddDetail(m_distribution.Explain(stats, normExpl.Value, lambdaExpl.Value));
        }
Пример #24
0
        protected internal override void Explain(Explanation expl, BasicStats stats, int doc, float freq, float docLen)
        {
            if (stats.TotalBoost != 1.0f)
            {
                expl.AddDetail(new Explanation(stats.TotalBoost, "boost"));
            }

            Explanation normExpl = m_normalization.Explain(stats, freq, docLen);
            float       tfn      = normExpl.Value;

            expl.AddDetail(normExpl);
            expl.AddDetail(m_basicModel.Explain(stats, tfn));
            expl.AddDetail(m_afterEffect.Explain(stats, tfn));
        }
Пример #25
0
        protected internal override void Explain(Explanation expl, BasicStats stats, int doc, float freq, float docLen)
        {
            if (stats.TotalBoost != 1.0f)
            {
                expl.AddDetail(new Explanation(stats.TotalBoost, "boost"));
            }

            expl.AddDetail(new Explanation(mu, "mu"));
            Explanation weightExpl = new Explanation();

            weightExpl.Value       = (float)Math.Log(1 + freq / (mu * ((LMStats)stats).CollectionProbability));
            weightExpl.Description = "term weight";
            expl.AddDetail(weightExpl);
            expl.AddDetail(new Explanation((float)Math.Log(mu / (docLen + mu)), "document norm"));
            base.Explain(expl, stats, doc, freq, docLen);
        }
Пример #26
0
        /// <summary>
        /// Fills all member fields defined in {@code BasicStats} in {@code stats}.
        ///  Subclasses can override this method to fill additional stats.
        /// </summary>
        protected internal virtual void FillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats)
        {
            // #positions(field) must be >= #positions(term)
            Debug.Assert(collectionStats.SumTotalTermFreq() == -1 || collectionStats.SumTotalTermFreq() >= termStats.TotalTermFreq());
            long numberOfDocuments = collectionStats.MaxDoc;

            long docFreq       = termStats.DocFreq();
            long totalTermFreq = termStats.TotalTermFreq();

            // codec does not supply totalTermFreq: substitute docFreq
            if (totalTermFreq == -1)
            {
                totalTermFreq = docFreq;
            }

            long  numberOfFieldTokens;
            float avgFieldLength;

            long sumTotalTermFreq = collectionStats.SumTotalTermFreq();

            if (sumTotalTermFreq <= 0)
            {
                // field does not exist;
                // We have to provide something if codec doesnt supply these measures,
                // or if someone omitted frequencies for the field... negative values cause
                // NaN/Inf for some scorers.
                numberOfFieldTokens = docFreq;
                avgFieldLength      = 1;
            }
            else
            {
                numberOfFieldTokens = sumTotalTermFreq;
                avgFieldLength      = (float)numberOfFieldTokens / numberOfDocuments;
            }

            // TODO: add sumDocFreq for field (numberOfFieldPostings)
            stats.NumberOfDocuments   = numberOfDocuments;
            stats.NumberOfFieldTokens = numberOfFieldTokens;
            stats.AvgFieldLength      = avgFieldLength;
            stats.DocFreq             = docFreq;
            stats.TotalTermFreq       = totalTermFreq;
        }
Пример #27
0
 public override SimScorer DoSimScorer(SimWeight stats, AtomicReaderContext context)
 {
     if (stats is MultiSimilarity.MultiStats)
     {
         // a multi term query (e.g. phrase). return the summation,
         // scoring almost as if it were boolean query
         SimWeight[] subStats   = ((MultiSimilarity.MultiStats)stats).SubStats;
         SimScorer[] subScorers = new SimScorer[subStats.Length];
         for (int i = 0; i < subScorers.Length; i++)
         {
             BasicStats basicstats = (BasicStats)subStats[i];
             subScorers[i] = new BasicSimScorer(this, basicstats, context.AtomicReader.GetNormValues(basicstats.field));
         }
         return(new MultiSimilarity.MultiSimScorer(subScorers));
     }
     else
     {
         BasicStats basicstats = (BasicStats)stats;
         return(new BasicSimScorer(this, basicstats, context.AtomicReader.GetNormValues(basicstats.field)));
     }
 }
Пример #28
0
        /// <summary>
        /// Fills all member fields defined in {@code BasicStats} in {@code stats}.
        ///  Subclasses can override this method to fill additional stats.
        /// </summary>
        protected internal virtual void FillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats)
        {
            // #positions(field) must be >= #positions(term)
            Debug.Assert(collectionStats.SumTotalTermFreq() == -1 || collectionStats.SumTotalTermFreq() >= termStats.TotalTermFreq());
            long numberOfDocuments = collectionStats.MaxDoc;

            long docFreq = termStats.DocFreq();
            long totalTermFreq = termStats.TotalTermFreq();

            // codec does not supply totalTermFreq: substitute docFreq
            if (totalTermFreq == -1)
            {
                totalTermFreq = docFreq;
            }

            long numberOfFieldTokens;
            float avgFieldLength;

            long sumTotalTermFreq = collectionStats.SumTotalTermFreq();

            if (sumTotalTermFreq <= 0)
            {
                // field does not exist;
                // We have to provide something if codec doesnt supply these measures,
                // or if someone omitted frequencies for the field... negative values cause
                // NaN/Inf for some scorers.
                numberOfFieldTokens = docFreq;
                avgFieldLength = 1;
            }
            else
            {
                numberOfFieldTokens = sumTotalTermFreq;
                avgFieldLength = (float)numberOfFieldTokens / numberOfDocuments;
            }

            // TODO: add sumDocFreq for field (numberOfFieldPostings)
            stats.NumberOfDocuments = numberOfDocuments;
            stats.NumberOfFieldTokens = numberOfFieldTokens;
            stats.AvgFieldLength = avgFieldLength;
            stats.DocFreq = docFreq;
            stats.TotalTermFreq = totalTermFreq;
        }
Пример #29
0
 /// <summary>
 /// Computes the collection probability of the current term in addition to the
 /// usual statistics.
 /// </summary>
 protected internal override void FillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats)
 {
     base.FillBasicStats(stats, collectionStats, termStats);
     LMStats lmStats = (LMStats)stats;
     lmStats.CollectionProbability = collectionModel.ComputeProbability(stats);
 }
Пример #30
0
 /// <summary>
 /// Subclasses should implement this method to explain the score. {@code expl}
 /// already contains the score, the name of the class and the doc id, as well
 /// as the term frequency and its explanation; subclasses can add additional
 /// clauses to explain details of their scoring formulae.
 /// <p>The default implementation does nothing.</p>
 /// </summary>
 /// <param name="expl"> the explanation to extend with details. </param>
 /// <param name="stats"> the corpus level statistics. </param>
 /// <param name="doc"> the document id. </param>
 /// <param name="freq"> the term frequency. </param>
 /// <param name="docLen"> the document length. </param>
 protected internal virtual void Explain(Explanation expl, BasicStats stats, int doc, float freq, float docLen)
 {
 }
Пример #31
0
 /// <summary>
 /// The generic test core called by all unit test methods. It calls the
 /// <seealso cref="SimilarityBase#score(BasicStats, float, float)"/> method of all
 /// Similarities in <seealso cref="#sims"/> and checks if the score is valid; i.e. it
 /// is a finite positive real number.
 /// </summary>
 private void UnitTestCore(BasicStats stats, float freq, int docLen)
 {
     foreach (SimilarityBase sim in Sims)
     {
         BasicStats realStats = (BasicStats)sim.ComputeWeight(stats.TotalBoost, ToCollectionStats(stats), ToTermStats(stats));
         float score = sim.Score(realStats, freq, docLen);
         float explScore = sim.Explain(realStats, 1, new Explanation(freq, "freq"), docLen).Value;
         Assert.IsFalse(float.IsInfinity(score), "Score infinite: " + sim.ToString());
         Assert.IsFalse(float.IsNaN(score), "Score NaN: " + sim.ToString());
         Assert.IsTrue(score >= 0, "Score negative: " + sim.ToString());
         Assert.AreEqual(score, explScore, FLOAT_EPSILON, "score() and explain() return different values: " + sim.ToString());
     }
 }
Пример #32
0
 public override float Tfn(BasicStats stats, float tf, float len)
 {
     return(tf);
 }
Пример #33
0
 public override sealed float CalculateLambda(BasicStats stats)
 {
     return((stats.TotalTermFreq + 1F) / (stats.NumberOfDocuments + 1F));
 }
Пример #34
0
 public override sealed float Score(BasicStats stats, float tfn)
 {
     float lambda = (float)(stats.TotalTermFreq + 1) / (stats.NumberOfDocuments + 1);
     return (float)(tfn * SimilarityBase.Log2(tfn / lambda) + (lambda + 1 / (12 * tfn) - tfn) * LOG2_E + 0.5 * SimilarityBase.Log2(2 * Math.PI * tfn));
 }
Пример #35
0
 internal BasicSimScorer(SimilarityBase outerInstance, BasicStats stats, NumericDocValues norms)
 {
     this.OuterInstance = outerInstance;
     this.Stats         = stats;
     this.Norms         = norms;
 }
Пример #36
0
 /// <summary>
 /// Creates the default statistics object that the specific tests modify. </summary>
 private BasicStats CreateStats()
 {
     BasicStats stats = new BasicStats("spoof", 1);
     stats.NumberOfDocuments = NUMBER_OF_DOCUMENTS;
     stats.NumberOfFieldTokens = NUMBER_OF_FIELD_TOKENS;
     stats.AvgFieldLength = AVG_FIELD_LENGTH;
     stats.DocFreq = DOC_FREQ;
     stats.TotalTermFreq = TOTAL_TERM_FREQ;
     return stats;
 }
Пример #37
0
 protected internal override void Explain(Explanation expl, BasicStats stats, int doc, float freq, float docLen)
 {
     expl.AddDetail(new Explanation(collectionModel.ComputeProbability(stats), "collection probability"));
 }
Пример #38
0
        protected internal override void Explain(Explanation expl, BasicStats stats, int doc, float freq, float docLen)
        {
            if (stats.TotalBoost != 1.0f)
            {
                expl.AddDetail(new Explanation(stats.TotalBoost, "boost"));
            }

            expl.AddDetail(new Explanation(Mu_Renamed, "mu"));
            Explanation weightExpl = new Explanation();
            weightExpl.Value = (float)Math.Log(1 + freq / (Mu_Renamed * ((LMStats)stats).CollectionProbability));
            weightExpl.Description = "term weight";
            expl.AddDetail(weightExpl);
            expl.AddDetail(new Explanation((float)Math.Log(Mu_Renamed / (docLen + Mu_Renamed)), "document norm"));
            base.Explain(expl, stats, doc, freq, docLen);
        }
Пример #39
0
 public float ComputeProbability(BasicStats stats)
 {
     return (stats.TotalTermFreq + 1F) / (stats.NumberOfFieldTokens + 1F);
 }
Пример #40
0
 private TermStatistics ToTermStats(BasicStats stats)
 {
     return new TermStatistics(new BytesRef("spoofyText"), stats.DocFreq, stats.TotalTermFreq);
 }
 public override float Score(BasicStats stats, float freq, float docLen)
 {
     return stats.TotalBoost * (float)Math.Log(1 + ((1 - Lambda_Renamed) * freq / docLen) / (Lambda_Renamed * ((LMStats)stats).CollectionProbability));
 }
Пример #42
0
 /// <summary>
 /// Returns the normalized term frequency. </summary>
 /// <param name="len"> the field length.  </param>
 public abstract float Tfn(BasicStats stats, float tf, float len);
Пример #43
0
        public override float Score(BasicStats stats, float freq, float docLen)
        {
            float tfn = m_normalization.Tfn(stats, freq, docLen);

            return(stats.TotalBoost * m_basicModel.Score(stats, tfn) * m_afterEffect.Score(stats, tfn));
        }
Пример #44
0
        /// <summary>
        /// Explains the score. The implementation here provides a basic explanation
        /// in the format <em>score(name-of-similarity, doc=doc-id,
        /// freq=term-frequency), computed from:</em>, and
        /// attaches the score (computed via the <seealso cref="#score(BasicStats, float, float)"/>
        /// method) and the explanation for the term frequency. Subclasses content with
        /// this format may add additional details in
        /// <seealso cref="#explain(Explanation, BasicStats, int, float, float)"/>.
        /// </summary>
        /// <param name="stats"> the corpus level statistics. </param>
        /// <param name="doc"> the document id. </param>
        /// <param name="freq"> the term frequency and its explanation. </param>
        /// <param name="docLen"> the document length. </param>
        /// <returns> the explanation. </returns>
        public virtual Explanation Explain(BasicStats stats, int doc, Explanation freq, float docLen)
        {
            Explanation result = new Explanation();
            result.Value = Score(stats, freq.Value, docLen);
            result.Description = "score(" + this.GetType().Name + ", doc=" + doc + ", freq=" + freq.Value + "), computed from:";
            result.AddDetail(freq);

            Explain(result, stats, doc, freq.Value, docLen);

            return result;
        }
Пример #45
0
 public override sealed float Score(BasicStats stats, float tfn, float lambda)
 {
     return((float)-Math.Log(lambda / (tfn + lambda)));
 }
Пример #46
0
 private CollectionStatistics ToCollectionStats(BasicStats stats)
 {
     return new CollectionStatistics(stats.Field, stats.NumberOfDocuments, -1, stats.NumberOfFieldTokens, -1);
 }
Пример #47
0
 public override sealed float Score(BasicStats stats, float tfn, float lambda)
 {
     return (float)-Math.Log(lambda / (tfn + lambda));
 }
 protected internal override void Explain(Explanation expl, BasicStats stats, int doc, float freq, float docLen)
 {
     if (stats.TotalBoost != 1.0f)
     {
         expl.AddDetail(new Explanation(stats.TotalBoost, "boost"));
     }
     expl.AddDetail(new Explanation(Lambda_Renamed, "lambda"));
     base.Explain(expl, stats, doc, freq, docLen);
 }
Пример #49
0
 public override float Tfn(BasicStats stats, float tf, float len)
 {
     return((float)(tf * Math.Pow(stats.m_avgFieldLength / len, z)));
 }
Пример #50
0
 /// <summary>
 /// Scores the document {@code doc}.
 /// <p>Subclasses must apply their scoring formula in this class.</p> </summary>
 /// <param name="stats"> the corpus level statistics. </param>
 /// <param name="freq"> the term frequency. </param>
 /// <param name="docLen"> the document length. </param>
 /// <returns> the score. </returns>
 public abstract float Score(BasicStats stats, float freq, float docLen);
Пример #51
0
 public override float Tfn(BasicStats stats, float tf, float len)
 {
     return (float)(tf * Math.Pow(stats.AvgFieldLength_Renamed / len, z));
 }
Пример #52
0
 internal BasicSimScorer(SimilarityBase outerInstance, BasicStats stats, NumericDocValues norms)
 {
     this.OuterInstance = outerInstance;
     this.Stats = stats;
     this.Norms = norms;
 }
Пример #53
0
 public override Explanation Explain(BasicStats stats, float tf, float len)
 {
     return(new Explanation(1, "no normalization"));
 }
Пример #54
0
 public override float Score(BasicStats stats, float freq, float docLen)
 {
     float score = stats.TotalBoost * (float)(Math.Log(1 + freq / (Mu_Renamed * ((LMStats)stats).CollectionProbability)) + Math.Log(Mu_Renamed / (docLen + Mu_Renamed)));
     return score > 0.0f ? score : 0.0f;
 }