public override sealed SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats) { PerFieldSimWeight weight = new PerFieldSimWeight(); weight.@delegate = Get(collectionStats.Field); weight.delegateWeight = [email protected](queryBoost, collectionStats, termStats); return weight; }
/// <summary> /// Computes the collection probability of the current term in addition to the /// usual statistics. /// </summary> protected internal override void FillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) { base.FillBasicStats(stats, collectionStats, termStats); LMStats lmStats = (LMStats)stats; lmStats.CollectionProbability = m_collectionModel.ComputeProbability(stats); }
private void MusicDatabaseWindow_Loaded(object sender, System.Windows.RoutedEventArgs e) { this.task = new Task(() => { try { this.collectionStatistics = this.generator.ComputeStatistics(); if (this.collectionStatistics != null) { this.Dispatcher.BeginInvokeAction(this.UpdateUI); } } catch (Exception ex) { Utility.WriteToErrorLog(ex.ToString()); Dialogs.Error("Error calculating statistics: " + ex.Message); } finally { this.canClose = true; this.Dispatcher.BeginInvokeAction(() => this.busyIndicator.IsBusy = false); if (this.shouldCancel) { this.Dispatcher.BeginInvokeAction(this.Close); } } }); this.task.Start(); }
public override sealed SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats) { PerFieldSimWeight weight = new PerFieldSimWeight(); weight.@delegate = Get(collectionStats.Field()); weight.DelegateWeight = [email protected](queryBoost, collectionStats, termStats); return weight; }
/// <summary> /// Computes a score factor for a simple term and returns an explanation /// for that score factor. /// /// <para/> /// The default implementation uses: /// /// <code> /// Idf(docFreq, searcher.MaxDoc); /// </code> /// /// Note that <see cref="CollectionStatistics.MaxDoc"/> is used instead of /// <see cref="Lucene.Net.Index.IndexReader.NumDocs"/> because also /// <see cref="TermStatistics.DocFreq"/> is used, and when the latter /// is inaccurate, so is <see cref="CollectionStatistics.MaxDoc"/>, and in the same direction. /// In addition, <see cref="CollectionStatistics.MaxDoc"/> is more efficient to compute /// </summary> /// <param name="collectionStats"> Collection-level statistics </param> /// <param name="termStats"> Term-level statistics for the term </param> /// <returns> An Explain object that includes both an idf score factor /// and an explanation for the term. </returns> public virtual Explanation IdfExplain(CollectionStatistics collectionStats, TermStatistics termStats) { long df = termStats.DocFreq; long max = collectionStats.MaxDoc; float idf = Idf(df, max); return(new Explanation(idf, "idf(docFreq=" + df + ", maxDocs=" + max + ")")); }
public override SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats) { SimWeight[] subStats = new SimWeight[Sims.Length]; for (int i = 0; i < subStats.Length; i++) { subStats[i] = Sims[i].ComputeWeight(queryBoost, collectionStats, termStats); } return new MultiStats(subStats); }
public CollectionStats() { using (var store = new DocumentStore()) { #region stats_3 CollectionStatistics stats = store.Maintenance.Send(new GetCollectionStatisticsOperation()); #endregion } }
public override SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats) { SimWeight[] subStats = new SimWeight[m_sims.Length]; for (int i = 0; i < subStats.Length; i++) { subStats[i] = m_sims[i].ComputeWeight(queryBoost, collectionStats, termStats); } return(new MultiStats(subStats)); }
public override sealed SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats) { BasicStats[] stats = new BasicStats[termStats.Length]; for (int i = 0; i < termStats.Length; i++) { stats[i] = NewStats(collectionStats.Field(), queryBoost); FillBasicStats(stats[i], collectionStats, termStats[i]); } return stats.Length == 1 ? stats[0] : new MultiSimilarity.MultiStats(stats) as SimWeight; }
public override sealed SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats) { BasicStats[] stats = new BasicStats[termStats.Length]; for (int i = 0; i < termStats.Length; i++) { stats[i] = NewStats(collectionStats.Field(), queryBoost); FillBasicStats(stats[i], collectionStats, termStats[i]); } return(stats.Length == 1 ? stats[0] : new MultiSimilarity.MultiStats(stats) as SimWeight); }
private void SetTokenNumber() { CollectionStatistics stat = LuceneService.Searcher.CollectionStatistics(ProjectInfo.TextFieldKey); this.NumberOfTokens = (ulong)stat.SumTotalTermFreq; for (int i = 0; i < LuceneService.DirReader.MaxDoc; i++) { LuceneService.GetTokenDataForDoc(LuceneService.DirReader.Document(i).GetField(ProjectInfo.TextFieldKey).GetStringValue()); } AllFields.Add("Tokens", this.NumberOfTokens); }
public static void AssertDatabaseCollections(CollectionStatistics expected, DataTable actual) { var expectedCollectionNames = expected.Collections.Keys.Where(x => CollectionName.IsHiLoCollection(x) == false).ToList(); var actualCollectionNames = actual .AsEnumerable() .Select(x => x.Field <string>("table_name")) .ToList(); AssertCollectionsHaveTheSameElements(expectedCollectionNames, actualCollectionNames); }
/// <summary> /// The default implementation computes the average as <c>sumTotalTermFreq / maxDoc</c>, /// or returns <c>1</c> if the index does not store sumTotalTermFreq (Lucene 3.x indexes /// or any field that omits frequency information). /// </summary> protected internal virtual float AvgFieldLength(CollectionStatistics collectionStats) { long sumTotalTermFreq = collectionStats.SumTotalTermFreq; if (sumTotalTermFreq <= 0) { return(1f); // field does not exist, or stat is unsupported } else { return((float)(sumTotalTermFreq / (double)collectionStats.MaxDoc)); } }
public override sealed SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats) { Explanation idf = termStats.Length == 1 ? IdfExplain(collectionStats, termStats[0]) : IdfExplain(collectionStats, termStats); float avgdl = AvgFieldLength(collectionStats); // compute freq-independent part of bm25 equation across all norm values float[] cache = new float[256]; for (int i = 0; i < cache.Length; i++) { cache[i] = k1 * ((1 - b) + b * DecodeNormValue((byte)i) / avgdl); } return(new BM25Stats(collectionStats.Field, idf, queryBoost, avgdl, cache)); }
public List <string> GetCollection(string Database) { try { var op = new GetCollectionStatisticsOperation(); CollectionStatistics collectionStats = Store.Maintenance.Send(op); return(collectionStats.Collections.Keys.ToList()); } catch (Exception ex) { DME_Editor.AddLogMessage(ex.Message, "Could not get Collection from Database in RavenDB " + Database, DateTime.Now, -1, ConnProp.Url, Errors.Failed); return(null); } }
/// <summary> /// Computes a score factor for a phrase. /// /// <para/> /// The default implementation sums the idf factor for /// each term in the phrase. /// </summary> /// <param name="collectionStats"> Collection-level statistics </param> /// <param name="termStats"> Term-level statistics for the terms in the phrase </param> /// <returns> An Explain object that includes both an idf /// score factor for the phrase and an explanation /// for each term. </returns> public virtual Explanation IdfExplain(CollectionStatistics collectionStats, TermStatistics[] termStats) { long max = collectionStats.MaxDoc; float idf = 0.0f; Explanation exp = new Explanation(); exp.Description = "idf(), sum of:"; foreach (TermStatistics stat in termStats) { long df = stat.DocFreq; float termIdf = Idf(df, max); exp.AddDetail(new Explanation(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")")); idf += termIdf; } exp.Value = idf; return(exp); }
/// <summary> /// Fills all member fields defined in {@code BasicStats} in {@code stats}. /// Subclasses can override this method to fill additional stats. /// </summary> protected internal virtual void FillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) { // #positions(field) must be >= #positions(term) Debug.Assert(collectionStats.SumTotalTermFreq() == -1 || collectionStats.SumTotalTermFreq() >= termStats.TotalTermFreq()); long numberOfDocuments = collectionStats.MaxDoc; long docFreq = termStats.DocFreq(); long totalTermFreq = termStats.TotalTermFreq(); // codec does not supply totalTermFreq: substitute docFreq if (totalTermFreq == -1) { totalTermFreq = docFreq; } long numberOfFieldTokens; float avgFieldLength; long sumTotalTermFreq = collectionStats.SumTotalTermFreq(); if (sumTotalTermFreq <= 0) { // field does not exist; // We have to provide something if codec doesnt supply these measures, // or if someone omitted frequencies for the field... negative values cause // NaN/Inf for some scorers. numberOfFieldTokens = docFreq; avgFieldLength = 1; } else { numberOfFieldTokens = sumTotalTermFreq; avgFieldLength = (float)numberOfFieldTokens / numberOfDocuments; } // TODO: add sumDocFreq for field (numberOfFieldPostings) stats.NumberOfDocuments = numberOfDocuments; stats.NumberOfFieldTokens = numberOfFieldTokens; stats.AvgFieldLength = avgFieldLength; stats.DocFreq = docFreq; stats.TotalTermFreq = totalTermFreq; }
public LMDBCollection(LightningDatabase collection) { _collection = collection; _stats = new CollectionStatistics(); _tempStats = new CollectionStatistics(); }
// idf used for phrase queries public override Explanation IdfExplain(CollectionStatistics collectionStats, TermStatistics[] termStats) { return new Explanation(1.0f, "Inexplicable"); }
public override SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats) { throw new System.NotSupportedException(); }
public void Deserialize(Common.Serialization.IO.CompactReader reader) { _stats = reader.ReadObject() as CollectionStatistics; }
public override sealed SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats) { Explanation idf = termStats.Length == 1 ? IdfExplain(collectionStats, termStats[0]) : IdfExplain(collectionStats, termStats); return(new IDFStats(collectionStats.Field, idf, queryBoost)); }
public override sealed SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats) { Explanation idf = termStats.Length == 1 ? IdfExplain(collectionStats, termStats[0]) : IdfExplain(collectionStats, termStats); return new IDFStats(collectionStats.Field(), idf, queryBoost); }
/// <summary> /// Compute any collection-level weight (e.g. IDF, average document length, etc) needed for scoring a query. /// </summary> /// <param name="queryBoost"> the query-time boost. </param> /// <param name="collectionStats"> collection-level statistics, such as the number of tokens in the collection. </param> /// <param name="termStats"> term-level statistics, such as the document frequency of a term across the collection. </param> /// <returns> SimWeight object with the information this Similarity needs to score a query. </returns> public abstract SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats);
/// <summary> /// Computes the collection probability of the current term in addition to the /// usual statistics. /// </summary> protected internal override void FillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) { base.FillBasicStats(stats, collectionStats, termStats); LMStats lmStats = (LMStats)stats; lmStats.CollectionProbability = collectionModel.ComputeProbability(stats); }
public override SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats) { throw UnsupportedOperationException.Create(); }
/// <summary> /// The default implementation computes the average as <code>sumTotalTermFreq / maxDoc</code>, /// or returns <code>1</code> if the index does not store sumTotalTermFreq (Lucene 3.x indexes /// or any field that omits frequency information). /// </summary> protected internal virtual float AvgFieldLength(CollectionStatistics collectionStats) { long sumTotalTermFreq = collectionStats.SumTotalTermFreq(); if (sumTotalTermFreq <= 0) { return 1f; // field does not exist, or stat is unsupported } else { return (float)(sumTotalTermFreq / (double)collectionStats.MaxDoc()); } }
public void UpdateCollectionStatistics(string cluster, string database, string collection, CollectionStatistics statistics) { _session.UpdateCollectionStatistics(cluster, database, collection, statistics); }
/// <summary> /// Compute any collection-level weight (e.g. IDF, average document length, etc) needed for scoring a query. /// </summary> /// <param name="queryBoost"> the query-time boost. </param> /// <param name="collectionStats"> collection-level statistics, such as the number of tokens in the collection. </param> /// <param name="termStats"> term-level statistics, such as the document frequency of a term across the collection. </param> /// <returns> <see cref="SimWeight"/> object with the information this <see cref="Similarity"/> needs to score a query. </returns> public abstract SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats);
public override SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats) { throw new InvalidOperationException(); }
// idf used for phrase queries public override Explanation IdfExplain(CollectionStatistics collectionStats, TermStatistics[] termStats) { return(new Explanation(1.0f, "Inexplicable")); }
/// <summary> /// Computes a score factor for a phrase. /// /// <p> /// The default implementation sums the idf factor for /// each term in the phrase. /// </summary> /// <param name="collectionStats"> collection-level statistics </param> /// <param name="termStats"> term-level statistics for the terms in the phrase </param> /// <returns> an Explain object that includes both an idf /// score factor for the phrase and an explanation /// for each term. </returns> public virtual Explanation IdfExplain(CollectionStatistics collectionStats, TermStatistics[] termStats) { long max = collectionStats.MaxDoc(); float idf = 0.0f; Explanation exp = new Explanation(); exp.Description = "idf(), sum of:"; foreach (TermStatistics stat in termStats) { long df = stat.DocFreq(); float termIdf = Idf(df, max); exp.AddDetail(new Explanation(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")")); idf += termIdf; } exp.Value = idf; return exp; }
/// <summary> /// Computes a score factor for a simple term and returns an explanation /// for that score factor. /// /// <p> /// The default implementation uses: /// /// <pre class="prettyprint"> /// idf(docFreq, searcher.maxDoc()); /// </pre> /// /// Note that <seealso cref="CollectionStatistics#maxDoc()"/> is used instead of /// <seealso cref="Lucene.Net.Index.IndexReader#numDocs() IndexReader#numDocs()"/> because also /// <seealso cref="TermStatistics#docFreq()"/> is used, and when the latter /// is inaccurate, so is <seealso cref="CollectionStatistics#maxDoc()"/>, and in the same direction. /// In addition, <seealso cref="CollectionStatistics#maxDoc()"/> is more efficient to compute /// </summary> /// <param name="collectionStats"> collection-level statistics </param> /// <param name="termStats"> term-level statistics for the term </param> /// <returns> an Explain object that includes both an idf score factor /// and an explanation for the term. </returns> public virtual Explanation IdfExplain(CollectionStatistics collectionStats, TermStatistics termStats) { long df = termStats.DocFreq(); long max = collectionStats.MaxDoc(); float idf = Idf(df, max); return new Explanation(idf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"); }
public override SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats) { return(@in.ComputeWeight(queryBoost, collectionStats, termStats)); }
public override sealed SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats) { Explanation idf = termStats.Length == 1 ? IdfExplain(collectionStats, termStats[0]) : IdfExplain(collectionStats, termStats); float avgdl = AvgFieldLength(collectionStats); // compute freq-independent part of bm25 equation across all norm values float[] cache = new float[256]; for (int i = 0; i < cache.Length; i++) { cache[i] = K1_Renamed * ((1 - b) + b * DecodeNormValue((sbyte)i) / avgdl); } return new BM25Stats(collectionStats.Field(), idf, queryBoost, avgdl, cache); }