private static List<TermAndFreq> QueueToList(Util.PriorityQueue<TermAndFreq> queue) { var terms = new List<TermAndFreq>(); while (queue.Count > 0) { terms.Add(queue.Pop()); } return terms; }
/// <summary> /// Convenience routine to make it easy to return the most interesting words in a document. /// More advanced users will call <see cref="RetrieveTerms(TextReader, string)"/> directly. /// </summary> /// <param name="r"> the source document </param> /// <param name="fieldName"> field passed to analyzer to use when analyzing the content </param> /// <returns> the most interesting words in the document </returns> /// <seealso cref="RetrieveTerms(TextReader, string)"/> /// <seealso cref="MaxQueryTerms"/> public string[] RetrieveInterestingTerms(TextReader r, string fieldName) { var al = new List <string>(MaxQueryTerms); Util.PriorityQueue <object[]> pq = RetrieveTerms(r, fieldName); object cur; int lim = MaxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller... // we just want to return the top words while (((cur = pq.Pop()) != null) && lim-- > 0) { var ar = (object[])cur; al.Add(ar[0].ToString()); // the 1st entry is the interesting word } return(al.ToArray()); }
/// <summary> /// Create the More like query from a <see cref="T:Util.PriorityQueue{object[]}"/> /// </summary> private Query CreateQuery(Util.PriorityQueue <object[]> q) { BooleanQuery query = new BooleanQuery(); object cur; int qterms = 0; float bestScore = 0; while ((cur = q.Pop()) != null) { var ar = (object[])cur; var tq = new TermQuery(new Term((string)ar[1], (string)ar[0])); if (ApplyBoost) { if (qterms == 0) { bestScore = ((float)ar[2]); } float myScore = ((float)ar[2]); tq.Boost = boostFactor * myScore / bestScore; } try { query.Add(tq, Occur.SHOULD); } catch (BooleanQuery.TooManyClausesException) { break; } qterms++; if (MaxQueryTerms > 0 && qterms >= MaxQueryTerms) { break; } } return(query); }