Esempio n. 1
0
 protected internal override void Explain(Explanation expl, BasicStats stats, int doc, float freq, float docLen)
 {
     if (stats.TotalBoost != 1.0f)
     {
         expl.AddDetail(new Explanation(stats.TotalBoost, "boost"));
     }
     expl.AddDetail(new Explanation(lambda, "lambda"));
     base.Explain(expl, stats, doc, freq, docLen);
 }
Esempio n. 2
0
 public override sealed SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats)
 {
     BasicStats[] stats = new BasicStats[termStats.Length];
     for (int i = 0; i < termStats.Length; i++)
     {
         stats[i] = NewStats(collectionStats.Field, queryBoost);
         FillBasicStats(stats[i], collectionStats, termStats[i]);
     }
     return(stats.Length == 1 ? stats[0] : new MultiSimilarity.MultiStats(stats) as SimWeight);
 }
Esempio n. 3
0
        /// <summary>
        /// Explains the score. The implementation here provides a basic explanation
        /// in the format <em>Score(name-of-similarity, doc=doc-id,
        /// freq=term-frequency), computed from:</em>, and
        /// attaches the score (computed via the <see cref="Score(BasicStats, float, float)"/>
        /// method) and the explanation for the term frequency. Subclasses content with
        /// this format may add additional details in
        /// <see cref="Explain(Explanation, BasicStats, int, float, float)"/>.
        /// </summary>
        /// <param name="stats"> the corpus level statistics. </param>
        /// <param name="doc"> the document id. </param>
        /// <param name="freq"> the term frequency and its explanation. </param>
        /// <param name="docLen"> the document length. </param>
        /// <returns> the explanation. </returns>
        public virtual Explanation Explain(BasicStats stats, int doc, Explanation freq, float docLen)
        {
            Explanation result = new Explanation();

            result.Value       = Score(stats, freq.Value, docLen);
            result.Description = "score(" + this.GetType().Name + ", doc=" + doc + ", freq=" + freq.Value + "), computed from:";
            result.AddDetail(freq);

            Explain(result, stats, doc, freq.Value, docLen);

            return(result);
        }
Esempio n. 4
0
        protected internal override void Explain(Explanation expl, BasicStats stats, int doc, float freq, float docLen)
        {
            if (stats.TotalBoost != 1.0f)
            {
                expl.AddDetail(new Explanation(stats.TotalBoost, "boost"));
            }
            Explanation normExpl   = m_normalization.Explain(stats, freq, docLen);
            Explanation lambdaExpl = m_lambda.Explain(stats);

            expl.AddDetail(normExpl);
            expl.AddDetail(lambdaExpl);
            expl.AddDetail(m_distribution.Explain(stats, normExpl.Value, lambdaExpl.Value));
        }
Esempio n. 5
0
        public override sealed float Score(BasicStats stats, float tfn)
        {
            // we have to ensure phi is always < 1 for tiny TTF values, otherwise nphi can go negative,
            // resulting in NaN. cleanest way is to unconditionally always add tfn to totalTermFreq
            // to create a 'normalized' F.
            double F    = stats.TotalTermFreq + 1 + tfn;
            double phi  = (double)tfn / F;
            double nphi = 1 - phi;
            double p    = 1.0 / (stats.NumberOfDocuments + 1);
            double D    = phi * SimilarityBase.Log2(phi / p) + nphi * SimilarityBase.Log2(nphi / (1 - p));

            return((float)(D * F + 0.5 * SimilarityBase.Log2(1 + 2 * Math.PI * tfn * nphi)));
        }
Esempio n. 6
0
        protected internal override void Explain(Explanation expl, BasicStats stats, int doc, float freq, float docLen)
        {
            if (stats.TotalBoost != 1.0f)
            {
                expl.AddDetail(new Explanation(stats.TotalBoost, "boost"));
            }

            Explanation normExpl = m_normalization.Explain(stats, freq, docLen);
            float       tfn      = normExpl.Value;

            expl.AddDetail(normExpl);
            expl.AddDetail(m_basicModel.Explain(stats, tfn));
            expl.AddDetail(m_afterEffect.Explain(stats, tfn));
        }
Esempio n. 7
0
        protected internal override void Explain(Explanation expl, BasicStats stats, int doc, float freq, float docLen)
        {
            if (stats.TotalBoost != 1.0f)
            {
                expl.AddDetail(new Explanation(stats.TotalBoost, "boost"));
            }

            expl.AddDetail(new Explanation(mu, "mu"));
            Explanation weightExpl = new Explanation();

            weightExpl.Value       = (float)Math.Log(1 + freq / (mu * ((LMStats)stats).CollectionProbability));
            weightExpl.Description = "term weight";
            expl.AddDetail(weightExpl);
            expl.AddDetail(new Explanation((float)Math.Log(mu / (docLen + mu)), "document norm"));
            base.Explain(expl, stats, doc, freq, docLen);
        }
Esempio n. 8
0
        /// <summary>
        /// Fills all member fields defined in <see cref="BasicStats"/> in <paramref name="stats"/>.
        /// Subclasses can override this method to fill additional stats.
        /// </summary>
        protected internal virtual void FillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats)
        {
            // #positions(field) must be >= #positions(term)
            Debug.Assert(collectionStats.SumTotalTermFreq == -1 || collectionStats.SumTotalTermFreq >= termStats.TotalTermFreq);
            long numberOfDocuments = collectionStats.MaxDoc;

            long docFreq       = termStats.DocFreq;
            long totalTermFreq = termStats.TotalTermFreq;

            // codec does not supply totalTermFreq: substitute docFreq
            if (totalTermFreq == -1)
            {
                totalTermFreq = docFreq;
            }

            long  numberOfFieldTokens;
            float avgFieldLength;

            long sumTotalTermFreq = collectionStats.SumTotalTermFreq;

            if (sumTotalTermFreq <= 0)
            {
                // field does not exist;
                // We have to provide something if codec doesnt supply these measures,
                // or if someone omitted frequencies for the field... negative values cause
                // NaN/Inf for some scorers.
                numberOfFieldTokens = docFreq;
                avgFieldLength      = 1;
            }
            else
            {
                numberOfFieldTokens = sumTotalTermFreq;
                avgFieldLength      = (float)numberOfFieldTokens / numberOfDocuments;
            }

            // TODO: add sumDocFreq for field (numberOfFieldPostings)
            stats.NumberOfDocuments   = numberOfDocuments;
            stats.NumberOfFieldTokens = numberOfFieldTokens;
            stats.AvgFieldLength      = avgFieldLength;
            stats.DocFreq             = docFreq;
            stats.TotalTermFreq       = totalTermFreq;
        }
Esempio n. 9
0
 public override SimScorer GetSimScorer(SimWeight stats, AtomicReaderContext context)
 {
     if (stats is MultiSimilarity.MultiStats)
     {
         // a multi term query (e.g. phrase). return the summation,
         // scoring almost as if it were boolean query
         SimWeight[] subStats   = ((MultiSimilarity.MultiStats)stats).subStats;
         SimScorer[] subScorers = new SimScorer[subStats.Length];
         for (int i = 0; i < subScorers.Length; i++)
         {
             BasicStats basicstats = (BasicStats)subStats[i];
             subScorers[i] = new BasicSimScorer(this, basicstats, context.AtomicReader.GetNormValues(basicstats.Field));
         }
         return(new MultiSimilarity.MultiSimScorer(subScorers));
     }
     else
     {
         BasicStats basicstats = (BasicStats)stats;
         return(new BasicSimScorer(this, basicstats, context.AtomicReader.GetNormValues(basicstats.Field)));
     }
 }
Esempio n. 10
0
        public override float Score(BasicStats stats, float freq, float docLen)
        {
            float tfn = m_normalization.Tfn(stats, freq, docLen);

            return(stats.TotalBoost * m_basicModel.Score(stats, tfn) * m_afterEffect.Score(stats, tfn));
        }
Esempio n. 11
0
 protected internal override void Explain(Explanation expl, BasicStats stats, int doc, float freq, float docLen)
 {
     expl.AddDetail(new Explanation(m_collectionModel.ComputeProbability(stats), "collection probability"));
 }
Esempio n. 12
0
 public override sealed float Score(BasicStats stats, float tfn, float lambda)
 {
     return (float)-Math.Log(lambda / (tfn + lambda));
 }
Esempio n. 13
0
 public virtual float ComputeProbability(BasicStats stats)
 {
     return((stats.TotalTermFreq + 1F) / (stats.NumberOfFieldTokens + 1F));
 }
Esempio n. 14
0
 internal BasicSimScorer(SimilarityBase outerInstance, BasicStats stats, NumericDocValues norms)
 {
     this.outerInstance = outerInstance;
     this.stats         = stats;
     this.norms         = norms;
 }
Esempio n. 15
0
 /// <summary>
 /// Subclasses should implement this method to explain the score. <paramref name="expl"/>
 /// already contains the score, the name of the class and the doc id, as well
 /// as the term frequency and its explanation; subclasses can add additional
 /// clauses to explain details of their scoring formulae.
 /// <para>The default implementation does nothing.</para>
 /// </summary>
 /// <param name="expl"> the explanation to extend with details. </param>
 /// <param name="stats"> the corpus level statistics. </param>
 /// <param name="doc"> the document id. </param>
 /// <param name="freq"> the term frequency. </param>
 /// <param name="docLen"> the document length. </param>
 protected internal virtual void Explain(Explanation expl, BasicStats stats, int doc, float freq, float docLen)
 {
 }
Esempio n. 16
0
 /// <summary>
 /// Scores the document <c>doc</c>.
 /// <para>Subclasses must apply their scoring formula in this class.</para> </summary>
 /// <param name="stats"> the corpus level statistics. </param>
 /// <param name="freq"> the term frequency. </param>
 /// <param name="docLen"> the document length. </param>
 /// <returns> the score. </returns>
 public abstract float Score(BasicStats stats, float freq, float docLen);
Esempio n. 17
0
 public override float Score(BasicStats stats, float freq, float docLen)
 {
     return(stats.TotalBoost * (float)Math.Log(1 + ((1 - lambda) * freq / docLen) / (lambda * ((LMStats)stats).CollectionProbability)));
 }
Esempio n. 18
0
        public override float Score(BasicStats stats, float freq, float docLen)
        {
            float score = stats.TotalBoost * (float)(Math.Log(1 + freq / (mu * ((LMStats)stats).CollectionProbability)) + Math.Log(mu / (docLen + mu)));

            return(score > 0.0f ? score : 0.0f);
        }
Esempio n. 19
0
        public override sealed float Score(BasicStats stats, float tfn)
        {
            float lambda = (float)(stats.TotalTermFreq + 1) / (stats.NumberOfDocuments + 1);

            return((float)(tfn * SimilarityBase.Log2(tfn / lambda) + (lambda + 1 / (12 * tfn) - tfn) * LOG2_E + 0.5 * SimilarityBase.Log2(2 * Math.PI * tfn)));
        }
Esempio n. 20
0
 public override float Tfn(BasicStats stats, float tf, float len)
 {
     return((float)(tf * Math.Pow(stats.m_avgFieldLength / len, z)));
 }
Esempio n. 21
0
 public override float Score(BasicStats stats, float freq, float docLen)
 {
     return(stats.TotalBoost * m_distribution.Score(stats, m_normalization.Tfn(stats, freq, docLen), m_lambda.CalculateLambda(stats)));
 }