public override sealed SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats)
 {
     PerFieldSimWeight weight = new PerFieldSimWeight();
     weight.@delegate = Get(collectionStats.Field);
     weight.delegateWeight = [email protected](queryBoost, collectionStats, termStats);
     return weight;
 }
Esempio n. 2
0
        /// <summary>
        /// Computes the collection probability of the current term in addition to the
        /// usual statistics.
        /// </summary>
        protected internal override void FillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats)
        {
            base.FillBasicStats(stats, collectionStats, termStats);
            LMStats lmStats = (LMStats)stats;

            lmStats.CollectionProbability = m_collectionModel.ComputeProbability(stats);
        }
        private void MusicDatabaseWindow_Loaded(object sender, System.Windows.RoutedEventArgs e)
        {
            this.task = new Task(() => {
                try
                {
                    this.collectionStatistics = this.generator.ComputeStatistics();

                    if (this.collectionStatistics != null)
                    {
                        this.Dispatcher.BeginInvokeAction(this.UpdateUI);
                    }
                }
                catch (Exception ex)
                {
                    Utility.WriteToErrorLog(ex.ToString());
                    Dialogs.Error("Error calculating statistics: " + ex.Message);
                }
                finally
                {
                    this.canClose = true;
                    this.Dispatcher.BeginInvokeAction(() => this.busyIndicator.IsBusy = false);
                    if (this.shouldCancel)
                    {
                        this.Dispatcher.BeginInvokeAction(this.Close);
                    }
                }
            });
            this.task.Start();
        }
 public override sealed SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats)
 {
     PerFieldSimWeight weight = new PerFieldSimWeight();
     weight.@delegate = Get(collectionStats.Field());
     weight.DelegateWeight = [email protected](queryBoost, collectionStats, termStats);
     return weight;
 }
Esempio n. 5
0
        /// <summary>
        /// Computes a score factor for a simple term and returns an explanation
        /// for that score factor.
        ///
        /// <para/>
        /// The default implementation uses:
        ///
        /// <code>
        /// Idf(docFreq, searcher.MaxDoc);
        /// </code>
        ///
        /// Note that <see cref="CollectionStatistics.MaxDoc"/> is used instead of
        /// <see cref="Lucene.Net.Index.IndexReader.NumDocs"/> because also
        /// <see cref="TermStatistics.DocFreq"/> is used, and when the latter
        /// is inaccurate, so is <see cref="CollectionStatistics.MaxDoc"/>, and in the same direction.
        /// In addition, <see cref="CollectionStatistics.MaxDoc"/> is more efficient to compute
        /// </summary>
        /// <param name="collectionStats"> Collection-level statistics </param>
        /// <param name="termStats"> Term-level statistics for the term </param>
        /// <returns> An Explain object that includes both an idf score factor
        ///           and an explanation for the term. </returns>
        public virtual Explanation IdfExplain(CollectionStatistics collectionStats, TermStatistics termStats)
        {
            long  df  = termStats.DocFreq;
            long  max = collectionStats.MaxDoc;
            float idf = Idf(df, max);

            return(new Explanation(idf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"));
        }
Esempio n. 6
0
 public override SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats)
 {
     SimWeight[] subStats = new SimWeight[Sims.Length];
     for (int i = 0; i < subStats.Length; i++)
     {
         subStats[i] = Sims[i].ComputeWeight(queryBoost, collectionStats, termStats);
     }
     return new MultiStats(subStats);
 }
Esempio n. 7
0
 public CollectionStats()
 {
     using (var store = new DocumentStore())
     {
         #region stats_3
         CollectionStatistics stats = store.Maintenance.Send(new GetCollectionStatisticsOperation());
         #endregion
     }
 }
Esempio n. 8
0
 public override SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats)
 {
     SimWeight[] subStats = new SimWeight[m_sims.Length];
     for (int i = 0; i < subStats.Length; i++)
     {
         subStats[i] = m_sims[i].ComputeWeight(queryBoost, collectionStats, termStats);
     }
     return(new MultiStats(subStats));
 }
Esempio n. 9
0
 public override sealed SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats)
 {
     BasicStats[] stats = new BasicStats[termStats.Length];
     for (int i = 0; i < termStats.Length; i++)
     {
         stats[i] = NewStats(collectionStats.Field(), queryBoost);
         FillBasicStats(stats[i], collectionStats, termStats[i]);
     }
     return stats.Length == 1 ? stats[0] : new MultiSimilarity.MultiStats(stats) as SimWeight;
 }
Esempio n. 10
0
 public override sealed SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats)
 {
     BasicStats[] stats = new BasicStats[termStats.Length];
     for (int i = 0; i < termStats.Length; i++)
     {
         stats[i] = NewStats(collectionStats.Field(), queryBoost);
         FillBasicStats(stats[i], collectionStats, termStats[i]);
     }
     return(stats.Length == 1 ? stats[0] : new MultiSimilarity.MultiStats(stats) as SimWeight);
 }
        private void SetTokenNumber()
        {
            CollectionStatistics stat = LuceneService.Searcher.CollectionStatistics(ProjectInfo.TextFieldKey);

            this.NumberOfTokens = (ulong)stat.SumTotalTermFreq;
            for (int i = 0; i < LuceneService.DirReader.MaxDoc; i++)
            {
                LuceneService.GetTokenDataForDoc(LuceneService.DirReader.Document(i).GetField(ProjectInfo.TextFieldKey).GetStringValue());
            }
            AllFields.Add("Tokens", this.NumberOfTokens);
        }
Esempio n. 12
0
        public static void AssertDatabaseCollections(CollectionStatistics expected, DataTable actual)
        {
            var expectedCollectionNames = expected.Collections.Keys.Where(x => CollectionName.IsHiLoCollection(x) == false).ToList();

            var actualCollectionNames = actual
                                        .AsEnumerable()
                                        .Select(x => x.Field <string>("table_name"))
                                        .ToList();

            AssertCollectionsHaveTheSameElements(expectedCollectionNames, actualCollectionNames);
        }
Esempio n. 13
0
        /// <summary>
        /// The default implementation computes the average as <c>sumTotalTermFreq / maxDoc</c>,
        /// or returns <c>1</c> if the index does not store sumTotalTermFreq (Lucene 3.x indexes
        /// or any field that omits frequency information).
        /// </summary>
        protected internal virtual float AvgFieldLength(CollectionStatistics collectionStats)
        {
            long sumTotalTermFreq = collectionStats.SumTotalTermFreq;

            if (sumTotalTermFreq <= 0)
            {
                return(1f); // field does not exist, or stat is unsupported
            }
            else
            {
                return((float)(sumTotalTermFreq / (double)collectionStats.MaxDoc));
            }
        }
Esempio n. 14
0
        public override sealed SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats)
        {
            Explanation idf = termStats.Length == 1 ? IdfExplain(collectionStats, termStats[0]) : IdfExplain(collectionStats, termStats);

            float avgdl = AvgFieldLength(collectionStats);

            // compute freq-independent part of bm25 equation across all norm values
            float[] cache = new float[256];
            for (int i = 0; i < cache.Length; i++)
            {
                cache[i] = k1 * ((1 - b) + b * DecodeNormValue((byte)i) / avgdl);
            }
            return(new BM25Stats(collectionStats.Field, idf, queryBoost, avgdl, cache));
        }
Esempio n. 15
0
        public List <string> GetCollection(string Database)
        {
            try
            {
                var op = new GetCollectionStatisticsOperation();
                CollectionStatistics collectionStats = Store.Maintenance.Send(op);

                return(collectionStats.Collections.Keys.ToList());
            }
            catch (Exception ex)
            {
                DME_Editor.AddLogMessage(ex.Message, "Could not get Collection from Database in RavenDB " + Database, DateTime.Now, -1, ConnProp.Url, Errors.Failed);
                return(null);
            }
        }
Esempio n. 16
0
        /// <summary>
        /// Computes a score factor for a phrase.
        ///
        /// <para/>
        /// The default implementation sums the idf factor for
        /// each term in the phrase.
        /// </summary>
        /// <param name="collectionStats"> Collection-level statistics </param>
        /// <param name="termStats"> Term-level statistics for the terms in the phrase </param>
        /// <returns> An Explain object that includes both an idf
        ///         score factor for the phrase and an explanation
        ///         for each term. </returns>
        public virtual Explanation IdfExplain(CollectionStatistics collectionStats, TermStatistics[] termStats)
        {
            long        max = collectionStats.MaxDoc;
            float       idf = 0.0f;
            Explanation exp = new Explanation();

            exp.Description = "idf(), sum of:";
            foreach (TermStatistics stat in termStats)
            {
                long  df      = stat.DocFreq;
                float termIdf = Idf(df, max);
                exp.AddDetail(new Explanation(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"));
                idf += termIdf;
            }
            exp.Value = idf;
            return(exp);
        }
Esempio n. 17
0
        /// <summary>
        /// Fills all member fields defined in {@code BasicStats} in {@code stats}.
        ///  Subclasses can override this method to fill additional stats.
        /// </summary>
        protected internal virtual void FillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats)
        {
            // #positions(field) must be >= #positions(term)
            Debug.Assert(collectionStats.SumTotalTermFreq() == -1 || collectionStats.SumTotalTermFreq() >= termStats.TotalTermFreq());
            long numberOfDocuments = collectionStats.MaxDoc;

            long docFreq       = termStats.DocFreq();
            long totalTermFreq = termStats.TotalTermFreq();

            // codec does not supply totalTermFreq: substitute docFreq
            if (totalTermFreq == -1)
            {
                totalTermFreq = docFreq;
            }

            long  numberOfFieldTokens;
            float avgFieldLength;

            long sumTotalTermFreq = collectionStats.SumTotalTermFreq();

            if (sumTotalTermFreq <= 0)
            {
                // field does not exist;
                // We have to provide something if codec doesnt supply these measures,
                // or if someone omitted frequencies for the field... negative values cause
                // NaN/Inf for some scorers.
                numberOfFieldTokens = docFreq;
                avgFieldLength      = 1;
            }
            else
            {
                numberOfFieldTokens = sumTotalTermFreq;
                avgFieldLength      = (float)numberOfFieldTokens / numberOfDocuments;
            }

            // TODO: add sumDocFreq for field (numberOfFieldPostings)
            stats.NumberOfDocuments   = numberOfDocuments;
            stats.NumberOfFieldTokens = numberOfFieldTokens;
            stats.AvgFieldLength      = avgFieldLength;
            stats.DocFreq             = docFreq;
            stats.TotalTermFreq       = totalTermFreq;
        }
Esempio n. 18
0
 public LMDBCollection(LightningDatabase collection)
 {
     _collection = collection;
     _stats      = new CollectionStatistics();
     _tempStats  = new CollectionStatistics();
 }
 // idf used for phrase queries
 public override Explanation IdfExplain(CollectionStatistics collectionStats, TermStatistics[] termStats)
 {
     return new Explanation(1.0f, "Inexplicable");
 }
Esempio n. 20
0
 public override SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats)
 {
     throw new System.NotSupportedException();
 }
Esempio n. 21
0
 public void Deserialize(Common.Serialization.IO.CompactReader reader)
 {
     _stats = reader.ReadObject() as CollectionStatistics;
 }
Esempio n. 22
0
        public override sealed SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats)
        {
            Explanation idf = termStats.Length == 1 ? IdfExplain(collectionStats, termStats[0]) : IdfExplain(collectionStats, termStats);

            return(new IDFStats(collectionStats.Field, idf, queryBoost));
        }
Esempio n. 23
0
        /// <summary>
        /// Fills all member fields defined in {@code BasicStats} in {@code stats}.
        ///  Subclasses can override this method to fill additional stats.
        /// </summary>
        protected internal virtual void FillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats)
        {
            // #positions(field) must be >= #positions(term)
            Debug.Assert(collectionStats.SumTotalTermFreq() == -1 || collectionStats.SumTotalTermFreq() >= termStats.TotalTermFreq());
            long numberOfDocuments = collectionStats.MaxDoc;

            long docFreq = termStats.DocFreq();
            long totalTermFreq = termStats.TotalTermFreq();

            // codec does not supply totalTermFreq: substitute docFreq
            if (totalTermFreq == -1)
            {
                totalTermFreq = docFreq;
            }

            long numberOfFieldTokens;
            float avgFieldLength;

            long sumTotalTermFreq = collectionStats.SumTotalTermFreq();

            if (sumTotalTermFreq <= 0)
            {
                // field does not exist;
                // We have to provide something if codec doesnt supply these measures,
                // or if someone omitted frequencies for the field... negative values cause
                // NaN/Inf for some scorers.
                numberOfFieldTokens = docFreq;
                avgFieldLength = 1;
            }
            else
            {
                numberOfFieldTokens = sumTotalTermFreq;
                avgFieldLength = (float)numberOfFieldTokens / numberOfDocuments;
            }

            // TODO: add sumDocFreq for field (numberOfFieldPostings)
            stats.NumberOfDocuments = numberOfDocuments;
            stats.NumberOfFieldTokens = numberOfFieldTokens;
            stats.AvgFieldLength = avgFieldLength;
            stats.DocFreq = docFreq;
            stats.TotalTermFreq = totalTermFreq;
        }
Esempio n. 24
0
 public override sealed SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats)
 {
     Explanation idf = termStats.Length == 1 ? IdfExplain(collectionStats, termStats[0]) : IdfExplain(collectionStats, termStats);
     return new IDFStats(collectionStats.Field(), idf, queryBoost);
 }
Esempio n. 25
0
 /// <summary>
 /// Compute any collection-level weight (e.g. IDF, average document length, etc) needed for scoring a query.
 /// </summary>
 /// <param name="queryBoost"> the query-time boost. </param>
 /// <param name="collectionStats"> collection-level statistics, such as the number of tokens in the collection. </param>
 /// <param name="termStats"> term-level statistics, such as the document frequency of a term across the collection. </param>
 /// <returns> SimWeight object with the information this Similarity needs to score a query. </returns>
 public abstract SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats);
Esempio n. 26
0
 /// <summary>
 /// Computes the collection probability of the current term in addition to the
 /// usual statistics.
 /// </summary>
 protected internal override void FillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats)
 {
     base.FillBasicStats(stats, collectionStats, termStats);
     LMStats lmStats = (LMStats)stats;
     lmStats.CollectionProbability = collectionModel.ComputeProbability(stats);
 }
Esempio n. 27
0
 public override SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats)
 {
     throw UnsupportedOperationException.Create();
 }
Esempio n. 28
0
 /// <summary>
 /// The default implementation computes the average as <code>sumTotalTermFreq / maxDoc</code>,
 /// or returns <code>1</code> if the index does not store sumTotalTermFreq (Lucene 3.x indexes
 /// or any field that omits frequency information).
 /// </summary>
 protected internal virtual float AvgFieldLength(CollectionStatistics collectionStats)
 {
     long sumTotalTermFreq = collectionStats.SumTotalTermFreq();
     if (sumTotalTermFreq <= 0)
     {
         return 1f; // field does not exist, or stat is unsupported
     }
     else
     {
         return (float)(sumTotalTermFreq / (double)collectionStats.MaxDoc());
     }
 }
Esempio n. 29
0
 public void UpdateCollectionStatistics(string cluster, string database, string collection, CollectionStatistics statistics)
 {
     _session.UpdateCollectionStatistics(cluster, database, collection, statistics);
 }
Esempio n. 30
0
 /// <summary>
 /// Compute any collection-level weight (e.g. IDF, average document length, etc) needed for scoring a query.
 /// </summary>
 /// <param name="queryBoost"> the query-time boost. </param>
 /// <param name="collectionStats"> collection-level statistics, such as the number of tokens in the collection. </param>
 /// <param name="termStats"> term-level statistics, such as the document frequency of a term across the collection. </param>
 /// <returns> <see cref="SimWeight"/> object with the information this <see cref="Similarity"/> needs to score a query. </returns>
 public abstract SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats);
Esempio n. 31
0
 public override SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats)
 {
     throw new InvalidOperationException();
 }
 // idf used for phrase queries
 public override Explanation IdfExplain(CollectionStatistics collectionStats, TermStatistics[] termStats)
 {
     return(new Explanation(1.0f, "Inexplicable"));
 }
Esempio n. 33
0
 public override SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats)
 {
     throw new System.NotSupportedException();
 }
Esempio n. 34
0
 /// <summary>
 /// Computes a score factor for a phrase.
 ///
 /// <p>
 /// The default implementation sums the idf factor for
 /// each term in the phrase.
 /// </summary>
 /// <param name="collectionStats"> collection-level statistics </param>
 /// <param name="termStats"> term-level statistics for the terms in the phrase </param>
 /// <returns> an Explain object that includes both an idf
 ///         score factor for the phrase and an explanation
 ///         for each term. </returns>
 public virtual Explanation IdfExplain(CollectionStatistics collectionStats, TermStatistics[] termStats)
 {
     long max = collectionStats.MaxDoc();
     float idf = 0.0f;
     Explanation exp = new Explanation();
     exp.Description = "idf(), sum of:";
     foreach (TermStatistics stat in termStats)
     {
         long df = stat.DocFreq();
         float termIdf = Idf(df, max);
         exp.AddDetail(new Explanation(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"));
         idf += termIdf;
     }
     exp.Value = idf;
     return exp;
 }
Esempio n. 35
0
 /// <summary>
 /// Computes a score factor for a simple term and returns an explanation
 /// for that score factor.
 ///
 /// <p>
 /// The default implementation uses:
 ///
 /// <pre class="prettyprint">
 /// idf(docFreq, searcher.maxDoc());
 /// </pre>
 ///
 /// Note that <seealso cref="CollectionStatistics#maxDoc()"/> is used instead of
 /// <seealso cref="Lucene.Net.Index.IndexReader#numDocs() IndexReader#numDocs()"/> because also
 /// <seealso cref="TermStatistics#docFreq()"/> is used, and when the latter
 /// is inaccurate, so is <seealso cref="CollectionStatistics#maxDoc()"/>, and in the same direction.
 /// In addition, <seealso cref="CollectionStatistics#maxDoc()"/> is more efficient to compute
 /// </summary>
 /// <param name="collectionStats"> collection-level statistics </param>
 /// <param name="termStats"> term-level statistics for the term </param>
 /// <returns> an Explain object that includes both an idf score factor
 ///           and an explanation for the term. </returns>
 public virtual Explanation IdfExplain(CollectionStatistics collectionStats, TermStatistics termStats)
 {
     long df = termStats.DocFreq();
     long max = collectionStats.MaxDoc();
     float idf = Idf(df, max);
     return new Explanation(idf, "idf(docFreq=" + df + ", maxDocs=" + max + ")");
 }
Esempio n. 36
0
 public override SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats)
 {
     return(@in.ComputeWeight(queryBoost, collectionStats, termStats));
 }
Esempio n. 37
0
        public override sealed SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats)
        {
            Explanation idf = termStats.Length == 1 ? IdfExplain(collectionStats, termStats[0]) : IdfExplain(collectionStats, termStats);

            float avgdl = AvgFieldLength(collectionStats);

            // compute freq-independent part of bm25 equation across all norm values
            float[] cache = new float[256];
            for (int i = 0; i < cache.Length; i++)
            {
                cache[i] = K1_Renamed * ((1 - b) + b * DecodeNormValue((sbyte)i) / avgdl);
            }
            return new BM25Stats(collectionStats.Field(), idf, queryBoost, avgdl, cache);
        }