public override bool LessThan(Object a, Object b) { PQRecord aa = (PQRecord)a; PQRecord bb = (PQRecord)b; float fa = aa.score; float fb = bb.score; return(fa > fb); }
/// <summary> Create a PriorityQueue from a word->tf map. /// /// </summary> /// <param name="words">a map of words keyed on the word(String) with Int objects as the values. /// </param> private PriorityQueue CreateQueue(IDictionary words) { // have collected all words in doc and their freqs int numDocs = ir.NumDocs(); FreqQ res = new FreqQ(words.Count); // will order words by score IEnumerator it = words.Keys.GetEnumerator(); while (it.MoveNext()) { // for every word String word = (String)it.Current; int tf = ((Int)words[word]).x; // term freq in the source doc if (minTermFreq > 0 && tf < minTermFreq) { continue; // filter out words that don't occur enough times in the source } // go through all the fields and find the largest document frequency String topField = fieldNames[0]; int docFreq = 0; for (int i = 0; i < fieldNames.Length; i++) { int freq = ir.DocFreq(new Term(fieldNames[i], word)); topField = (freq > docFreq) ? fieldNames[i] : topField; docFreq = (freq > docFreq) ? freq : docFreq; } if (minDocFreq > 0 && docFreq < minDocFreq) { continue; // filter out words that don't occur in enough docs } if (docFreq == 0) { continue; // index update problem? } float idf = similarity.Idf(docFreq, numDocs); float score = tf * idf; // only really need 1st 3 entries, other ones are for troubleshooting PQRecord pqr = new PQRecord( word, topField, score, idf, docFreq, tf ); res.Insert(pqr); } return(res); }
/// <summary> Convenience routine to make it easy to return the most interesting words in a document. /// More advanced users will call {@link #RetrieveTerms(java.io.Reader) retrieveTerms()} directly. /// </summary> /// <param name="r">the source document /// </param> /// <returns> the most interesting words in the document /// /// </returns> /// <seealso cref="#RetrieveTerms(java.io.Reader)"> /// </seealso> /// <seealso cref="#setMaxQueryTerms"> /// </seealso> public String[] RetrieveInterestingTerms(StreamReader r) { ArrayList al = new ArrayList(maxQueryTerms); Lucene.Net.Util.PriorityQueue pq = RetrieveTerms(r); Object cur; int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller... // we just want to return the top words while (((cur = pq.Pop()) != null) && lim-- > 0) { PQRecord ar = (PQRecord)cur; al.Add(ar.word); // the 1st entry is the interesting word } String[] res = new String[al.Count]; // return (System.String[]) SupportClass.ICollectionSupport.ToArray(al, res); return((String[])al.ToArray(typeof(String))); }
/// <summary> Create the More like query from a PriorityQueue</summary> private Query CreateQuery(Lucene.Net.Util.PriorityQueue q) { Lucene.Net.Search.BooleanQuery query = new Lucene.Net.Search.BooleanQuery(); Object cur; int qterms = 0; float bestScore = 0; while (((cur = q.Pop()) != null)) { PQRecord ar = (PQRecord)cur; Lucene.Net.Search.TermQuery tq = new Lucene.Net.Search.TermQuery(new Term(ar.topField, ar.word)); if (boost) { if (qterms == 0) { bestScore = ar.score; } float myScore = ar.score; tq.SetBoost(myScore / bestScore); } try { query.Add(tq, Lucene.Net.Search.BooleanClause.Occur.SHOULD); } catch (Lucene.Net.Search.BooleanQuery.TooManyClauses) { break; } qterms++; if (maxQueryTerms > 0 && qterms >= maxQueryTerms) { break; } } return(query); }