public override Query Rewrite(IndexReader reader, MultiTermQuery query, IState state) { // Get the enum and start visiting terms. If we // exhaust the enum before hitting either of the // cutoffs, we use ConstantBooleanQueryRewrite; else, // ConstantFilterRewrite: ICollection <Term> pendingTerms = new List <Term>(); int docCountCutoff = (int)((docCountPercent / 100.0) * reader.MaxDoc); int termCountLimit = System.Math.Min(BooleanQuery.MaxClauseCount, termCountCutoff); int docVisitCount = 0; FilteredTermEnum enumerator = query.GetEnum(reader, state); try { while (true) { Term t = enumerator.Term; if (t != null) { pendingTerms.Add(t); // Loading the TermInfo from the terms dict here // should not be costly, because 1) the // query/filter will load the TermInfo when it // runs, and 2) the terms dict has a cache: docVisitCount += reader.DocFreq(t, state); } if (pendingTerms.Count >= termCountLimit || docVisitCount >= docCountCutoff) { // Too many terms -- make a filter. Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter <MultiTermQuery>(query)); result.Boost = query.Boost; return(result); } else if (!enumerator.Next(state)) { // Enumeration is done, and we hit a small // enough number of terms & docs -- just make a // BooleanQuery, now BooleanQuery bq = new BooleanQuery(true); foreach (Term term in pendingTerms) { TermQuery tq = new TermQuery(term); bq.Add(tq, Occur.SHOULD); } // Strip scores Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq)); result.Boost = query.Boost; query.IncTotalNumberOfTerms(pendingTerms.Count); return(result); } } } finally { enumerator.Close(); } }
public virtual void Generate(MultiTermQuery query, IndexReader reader, TermEnum enumerator) { int[] docs = new int[32]; int[] freqs = new int[32]; TermDocs termDocs = reader.TermDocs(); try { int termCount = 0; do { Term term = enumerator.Term(); if (term == null) { break; } termCount++; termDocs.Seek(term); while (true) { int count = termDocs.Read(docs, freqs); if (count != 0) { for (int i = 0; i < count; i++) { HandleDoc(docs[i]); } } else { break; } } }while (enumerator.Next()); query.IncTotalNumberOfTerms(termCount); // {{Aroush-2.9}} is the use of 'temp' as is right? } finally { termDocs.Close(); } }
public override Query Rewrite(IndexReader reader, MultiTermQuery query) { // Get the enum and start visiting terms. If we // exhaust the enum before hitting either of the // cutoffs, we use ConstantBooleanQueryRewrite; else, // ConstantFilterRewrite: System.Collections.ArrayList pendingTerms = new System.Collections.ArrayList(); int docCountCutoff = (int) ((docCountPercent / 100.0) * reader.MaxDoc()); int termCountLimit = System.Math.Min(BooleanQuery.GetMaxClauseCount(), termCountCutoff); int docVisitCount = 0; FilteredTermEnum enumerator = query.GetEnum(reader); try { while (true) { Term t = enumerator.Term(); if (t != null) { pendingTerms.Add(t); // Loading the TermInfo from the terms dict here // should not be costly, because 1) the // query/filter will load the TermInfo when it // runs, and 2) the terms dict has a cache: docVisitCount += reader.DocFreq(t); } if (pendingTerms.Count >= termCountLimit || docVisitCount >= docCountCutoff) { // Too many terms -- make a filter. Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter(query)); result.SetBoost(query.GetBoost()); return result; } else if (!enumerator.Next()) { // Enumeration is done, and we hit a small // enough number of terms & docs -- just make a // BooleanQuery, now System.Collections.IEnumerator it = pendingTerms.GetEnumerator(); BooleanQuery bq = new BooleanQuery(true); while (it.MoveNext()) { TermQuery tq = new TermQuery((Term) it.Current); bq.Add(tq, BooleanClause.Occur.SHOULD); } // Strip scores Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq)); result.SetBoost(query.GetBoost()); query.IncTotalNumberOfTerms(pendingTerms.Count); return result; } } } finally { enumerator.Close(); } }
public override Query Rewrite(IndexReader reader, MultiTermQuery query) { FilteredTermEnum enumerator = query.GetEnum(reader); BooleanQuery result = new BooleanQuery(true); int count = 0; try { do { Term t = enumerator.Term(); if (t != null) { TermQuery tq = new TermQuery(t); // found a match tq.SetBoost(query.GetBoost() * enumerator.Difference()); // set the boost result.Add(tq, BooleanClause.Occur.SHOULD); // add to query count++; } } while (enumerator.Next()); } finally { enumerator.Close(); } query.IncTotalNumberOfTerms(count); return result; }
public virtual void Generate(MultiTermQuery query, IndexReader reader, TermEnum enumerator) { int[] docs = new int[32]; int[] freqs = new int[32]; TermDocs termDocs = reader.TermDocs(); try { int termCount = 0; do { Term term = enumerator.Term(); if (term == null) break; termCount++; termDocs.Seek(term); while (true) { int count = termDocs.Read(docs, freqs); if (count != 0) { for (int i = 0; i < count; i++) { HandleDoc(docs[i]); } } else { break; } } } while (enumerator.Next()); query.IncTotalNumberOfTerms(termCount); // {{Aroush-2.9}} is the use of 'temp' as is right? } finally { termDocs.Close(); } }