protected internal override void Explain(Explanation expl, BasicStats stats, int doc, float freq, float docLen) { if (stats.TotalBoost != 1.0f) { expl.AddDetail(new Explanation(stats.TotalBoost, "boost")); } expl.AddDetail(new Explanation(lambda, "lambda")); base.Explain(expl, stats, doc, freq, docLen); }
public override sealed SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats) { BasicStats[] stats = new BasicStats[termStats.Length]; for (int i = 0; i < termStats.Length; i++) { stats[i] = NewStats(collectionStats.Field, queryBoost); FillBasicStats(stats[i], collectionStats, termStats[i]); } return(stats.Length == 1 ? stats[0] : new MultiSimilarity.MultiStats(stats) as SimWeight); }
/// <summary> /// Explains the score. The implementation here provides a basic explanation /// in the format <em>Score(name-of-similarity, doc=doc-id, /// freq=term-frequency), computed from:</em>, and /// attaches the score (computed via the <see cref="Score(BasicStats, float, float)"/> /// method) and the explanation for the term frequency. Subclasses content with /// this format may add additional details in /// <see cref="Explain(Explanation, BasicStats, int, float, float)"/>. /// </summary> /// <param name="stats"> the corpus level statistics. </param> /// <param name="doc"> the document id. </param> /// <param name="freq"> the term frequency and its explanation. </param> /// <param name="docLen"> the document length. </param> /// <returns> the explanation. </returns> public virtual Explanation Explain(BasicStats stats, int doc, Explanation freq, float docLen) { Explanation result = new Explanation(); result.Value = Score(stats, freq.Value, docLen); result.Description = "score(" + this.GetType().Name + ", doc=" + doc + ", freq=" + freq.Value + "), computed from:"; result.AddDetail(freq); Explain(result, stats, doc, freq.Value, docLen); return(result); }
protected internal override void Explain(Explanation expl, BasicStats stats, int doc, float freq, float docLen) { if (stats.TotalBoost != 1.0f) { expl.AddDetail(new Explanation(stats.TotalBoost, "boost")); } Explanation normExpl = m_normalization.Explain(stats, freq, docLen); Explanation lambdaExpl = m_lambda.Explain(stats); expl.AddDetail(normExpl); expl.AddDetail(lambdaExpl); expl.AddDetail(m_distribution.Explain(stats, normExpl.Value, lambdaExpl.Value)); }
public override sealed float Score(BasicStats stats, float tfn) { // we have to ensure phi is always < 1 for tiny TTF values, otherwise nphi can go negative, // resulting in NaN. cleanest way is to unconditionally always add tfn to totalTermFreq // to create a 'normalized' F. double F = stats.TotalTermFreq + 1 + tfn; double phi = (double)tfn / F; double nphi = 1 - phi; double p = 1.0 / (stats.NumberOfDocuments + 1); double D = phi * SimilarityBase.Log2(phi / p) + nphi * SimilarityBase.Log2(nphi / (1 - p)); return((float)(D * F + 0.5 * SimilarityBase.Log2(1 + 2 * Math.PI * tfn * nphi))); }
protected internal override void Explain(Explanation expl, BasicStats stats, int doc, float freq, float docLen) { if (stats.TotalBoost != 1.0f) { expl.AddDetail(new Explanation(stats.TotalBoost, "boost")); } Explanation normExpl = m_normalization.Explain(stats, freq, docLen); float tfn = normExpl.Value; expl.AddDetail(normExpl); expl.AddDetail(m_basicModel.Explain(stats, tfn)); expl.AddDetail(m_afterEffect.Explain(stats, tfn)); }
protected internal override void Explain(Explanation expl, BasicStats stats, int doc, float freq, float docLen) { if (stats.TotalBoost != 1.0f) { expl.AddDetail(new Explanation(stats.TotalBoost, "boost")); } expl.AddDetail(new Explanation(mu, "mu")); Explanation weightExpl = new Explanation(); weightExpl.Value = (float)Math.Log(1 + freq / (mu * ((LMStats)stats).CollectionProbability)); weightExpl.Description = "term weight"; expl.AddDetail(weightExpl); expl.AddDetail(new Explanation((float)Math.Log(mu / (docLen + mu)), "document norm")); base.Explain(expl, stats, doc, freq, docLen); }
/// <summary> /// Fills all member fields defined in <see cref="BasicStats"/> in <paramref name="stats"/>. /// Subclasses can override this method to fill additional stats. /// </summary> protected internal virtual void FillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) { // #positions(field) must be >= #positions(term) Debug.Assert(collectionStats.SumTotalTermFreq == -1 || collectionStats.SumTotalTermFreq >= termStats.TotalTermFreq); long numberOfDocuments = collectionStats.MaxDoc; long docFreq = termStats.DocFreq; long totalTermFreq = termStats.TotalTermFreq; // codec does not supply totalTermFreq: substitute docFreq if (totalTermFreq == -1) { totalTermFreq = docFreq; } long numberOfFieldTokens; float avgFieldLength; long sumTotalTermFreq = collectionStats.SumTotalTermFreq; if (sumTotalTermFreq <= 0) { // field does not exist; // We have to provide something if codec doesnt supply these measures, // or if someone omitted frequencies for the field... negative values cause // NaN/Inf for some scorers. numberOfFieldTokens = docFreq; avgFieldLength = 1; } else { numberOfFieldTokens = sumTotalTermFreq; avgFieldLength = (float)numberOfFieldTokens / numberOfDocuments; } // TODO: add sumDocFreq for field (numberOfFieldPostings) stats.NumberOfDocuments = numberOfDocuments; stats.NumberOfFieldTokens = numberOfFieldTokens; stats.AvgFieldLength = avgFieldLength; stats.DocFreq = docFreq; stats.TotalTermFreq = totalTermFreq; }
public override SimScorer GetSimScorer(SimWeight stats, AtomicReaderContext context) { if (stats is MultiSimilarity.MultiStats) { // a multi term query (e.g. phrase). return the summation, // scoring almost as if it were boolean query SimWeight[] subStats = ((MultiSimilarity.MultiStats)stats).subStats; SimScorer[] subScorers = new SimScorer[subStats.Length]; for (int i = 0; i < subScorers.Length; i++) { BasicStats basicstats = (BasicStats)subStats[i]; subScorers[i] = new BasicSimScorer(this, basicstats, context.AtomicReader.GetNormValues(basicstats.Field)); } return(new MultiSimilarity.MultiSimScorer(subScorers)); } else { BasicStats basicstats = (BasicStats)stats; return(new BasicSimScorer(this, basicstats, context.AtomicReader.GetNormValues(basicstats.Field))); } }
public override float Score(BasicStats stats, float freq, float docLen) { float tfn = m_normalization.Tfn(stats, freq, docLen); return(stats.TotalBoost * m_basicModel.Score(stats, tfn) * m_afterEffect.Score(stats, tfn)); }
protected internal override void Explain(Explanation expl, BasicStats stats, int doc, float freq, float docLen) { expl.AddDetail(new Explanation(m_collectionModel.ComputeProbability(stats), "collection probability")); }
public override sealed float Score(BasicStats stats, float tfn, float lambda) { return (float)-Math.Log(lambda / (tfn + lambda)); }
public virtual float ComputeProbability(BasicStats stats) { return((stats.TotalTermFreq + 1F) / (stats.NumberOfFieldTokens + 1F)); }
internal BasicSimScorer(SimilarityBase outerInstance, BasicStats stats, NumericDocValues norms) { this.outerInstance = outerInstance; this.stats = stats; this.norms = norms; }
/// <summary> /// Subclasses should implement this method to explain the score. <paramref name="expl"/> /// already contains the score, the name of the class and the doc id, as well /// as the term frequency and its explanation; subclasses can add additional /// clauses to explain details of their scoring formulae. /// <para>The default implementation does nothing.</para> /// </summary> /// <param name="expl"> the explanation to extend with details. </param> /// <param name="stats"> the corpus level statistics. </param> /// <param name="doc"> the document id. </param> /// <param name="freq"> the term frequency. </param> /// <param name="docLen"> the document length. </param> protected internal virtual void Explain(Explanation expl, BasicStats stats, int doc, float freq, float docLen) { }
/// <summary> /// Scores the document <c>doc</c>. /// <para>Subclasses must apply their scoring formula in this class.</para> </summary> /// <param name="stats"> the corpus level statistics. </param> /// <param name="freq"> the term frequency. </param> /// <param name="docLen"> the document length. </param> /// <returns> the score. </returns> public abstract float Score(BasicStats stats, float freq, float docLen);
public override float Score(BasicStats stats, float freq, float docLen) { return(stats.TotalBoost * (float)Math.Log(1 + ((1 - lambda) * freq / docLen) / (lambda * ((LMStats)stats).CollectionProbability))); }
public override float Score(BasicStats stats, float freq, float docLen) { float score = stats.TotalBoost * (float)(Math.Log(1 + freq / (mu * ((LMStats)stats).CollectionProbability)) + Math.Log(mu / (docLen + mu))); return(score > 0.0f ? score : 0.0f); }
public override sealed float Score(BasicStats stats, float tfn) { float lambda = (float)(stats.TotalTermFreq + 1) / (stats.NumberOfDocuments + 1); return((float)(tfn * SimilarityBase.Log2(tfn / lambda) + (lambda + 1 / (12 * tfn) - tfn) * LOG2_E + 0.5 * SimilarityBase.Log2(2 * Math.PI * tfn))); }
public override float Tfn(BasicStats stats, float tf, float len) { return((float)(tf * Math.Pow(stats.m_avgFieldLength / len, z))); }
public override float Score(BasicStats stats, float freq, float docLen) { return(stats.TotalBoost * m_distribution.Score(stats, m_normalization.Tfn(stats, freq, docLen), m_lambda.CalculateLambda(stats))); }