private static List<TermAndFreq> QueueToList(Util.PriorityQueue<TermAndFreq> queue)
        {
            var terms = new List<TermAndFreq>();
            while (queue.Count > 0)
            {
                terms.Add(queue.Pop());
            }

            return terms;
        }
示例#2
0
        /// <summary>
        /// Convenience routine to make it easy to return the most interesting words in a document.
        /// More advanced users will call <see cref="RetrieveTerms(TextReader, string)"/> directly.
        /// </summary>
        /// <param name="r"> the source document </param>
        /// <param name="fieldName"> field passed to analyzer to use when analyzing the content </param>
        /// <returns> the most interesting words in the document </returns>
        /// <seealso cref="RetrieveTerms(TextReader, string)"/>
        /// <seealso cref="MaxQueryTerms"/>
        public string[] RetrieveInterestingTerms(TextReader r, string fieldName)
        {
            var al = new List <string>(MaxQueryTerms);

            Util.PriorityQueue <object[]> pq = RetrieveTerms(r, fieldName);
            object cur;
            int    lim = MaxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...

            // we just want to return the top words
            while (((cur = pq.Pop()) != null) && lim-- > 0)
            {
                var ar = (object[])cur;
                al.Add(ar[0].ToString()); // the 1st entry is the interesting word
            }
            return(al.ToArray());
        }
示例#3
0
        /// <summary>
        /// Create the More like query from a <see cref="T:Util.PriorityQueue{object[]}"/>
        /// </summary>
        private Query CreateQuery(Util.PriorityQueue <object[]> q)
        {
            BooleanQuery query = new BooleanQuery();
            object       cur;
            int          qterms    = 0;
            float        bestScore = 0;

            while ((cur = q.Pop()) != null)
            {
                var ar = (object[])cur;
                var tq = new TermQuery(new Term((string)ar[1], (string)ar[0]));

                if (ApplyBoost)
                {
                    if (qterms == 0)
                    {
                        bestScore = ((float)ar[2]);
                    }
                    float myScore = ((float)ar[2]);

                    tq.Boost = boostFactor * myScore / bestScore;
                }

                try
                {
                    query.Add(tq, Occur.SHOULD);
                }
                catch (BooleanQuery.TooManyClausesException)
                {
                    break;
                }

                qterms++;
                if (MaxQueryTerms > 0 && qterms >= MaxQueryTerms)
                {
                    break;
                }
            }

            return(query);
        }