/// <summary> /// Builds a new TermQuery instance. /// <para>This is intended for subclasses that wish to customize the generated queries.</para> </summary> /// <param name="term"> term </param> /// <param name="context"> the TermContext to be used to create the low level term query. Can be <code>null</code>. </param> /// <returns> new TermQuery instance </returns> protected virtual Query NewTermQuery(Term term, TermContext context) { return context == null ? new TermQuery(term) : new TermQuery(term, context); }
public virtual void CollectTermContext(IndexReader reader, IList<AtomicReaderContext> leaves, TermContext[] contextArray, Term[] queryTerms) { TermsEnum termsEnum = null; foreach (AtomicReaderContext context in leaves) { Fields fields = context.AtomicReader.Fields; if (fields == null) { // reader has no fields continue; } for (int i = 0; i < queryTerms.Length; i++) { Term term = queryTerms[i]; TermContext termContext = contextArray[i]; Terms terms = fields.Terms(term.Field); if (terms == null) { // field does not exist continue; } termsEnum = terms.Iterator(termsEnum); Debug.Assert(termsEnum != null); if (termsEnum == TermsEnum.EMPTY) { continue; } if (termsEnum.SeekExact(term.Bytes)) { if (termContext == null) { contextArray[i] = new TermContext(reader.Context, termsEnum.TermState(), context.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq()); } else { termContext.Register(termsEnum.TermState(), context.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq()); } } } } }
protected internal virtual Query BuildQuery(int maxDoc, TermContext[] contextArray, Term[] queryTerms) { var lowFreq = new BooleanQuery(disableCoord); var highFreq = new BooleanQuery(disableCoord) { Boost = highFreqBoost }; lowFreq.Boost = lowFreqBoost; var query = new BooleanQuery(true); for (int i = 0; i < queryTerms.Length; i++) { TermContext termContext = contextArray[i]; if (termContext == null) { lowFreq.Add(NewTermQuery(queryTerms[i], null), lowFreqOccur); } else { if ((maxTermFrequency >= 1f && termContext.DocFreq > maxTermFrequency) || (termContext.DocFreq > (int)Math.Ceiling(maxTermFrequency * (float)maxDoc))) { highFreq.Add(NewTermQuery(queryTerms[i], termContext), highFreqOccur); } else { lowFreq.Add(NewTermQuery(queryTerms[i], termContext), lowFreqOccur); } } } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int numLowFreqClauses = lowFreq.clauses().size(); int numLowFreqClauses = lowFreq.Clauses.Length; //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int numHighFreqClauses = highFreq.clauses().size(); int numHighFreqClauses = highFreq.Clauses.Length; if (lowFreqOccur == BooleanClause.Occur.SHOULD && numLowFreqClauses > 0) { int minMustMatch = CalcLowFreqMinimumNumberShouldMatch(numLowFreqClauses); lowFreq.MinimumNumberShouldMatch = minMustMatch; } if (highFreqOccur == BooleanClause.Occur.SHOULD && numHighFreqClauses > 0) { int minMustMatch = CalcHighFreqMinimumNumberShouldMatch(numHighFreqClauses); highFreq.MinimumNumberShouldMatch = minMustMatch; } if (lowFreq.Clauses.Length == 0) { /* * if lowFreq is empty we rewrite the high freq terms in a conjunction to * prevent slow queries. */ if (highFreq.MinimumNumberShouldMatch == 0 && highFreqOccur != BooleanClause.Occur.MUST) { foreach (BooleanClause booleanClause in highFreq) { booleanClause.Occur_ = BooleanClause.Occur.MUST; } } highFreq.Boost = Boost; return highFreq; } else if (highFreq.Clauses.Length == 0) { // only do low freq terms - we don't have high freq terms lowFreq.Boost = Boost; return lowFreq; } else { query.Add(highFreq, BooleanClause.Occur.SHOULD); query.Add(lowFreq, BooleanClause.Occur.MUST); query.Boost = Boost; return query; } }
public override Query Rewrite(IndexReader reader) { if (this.terms.Count == 0) { return new BooleanQuery(); } else if (this.terms.Count == 1) { Query tq = NewTermQuery(this.terms[0], null); tq.Boost = Boost; return tq; } var leaves = reader.Leaves; int maxDoc = reader.MaxDoc; var contextArray = new TermContext[terms.Count]; var queryTerms = this.terms.ToArray(); CollectTermContext(reader, leaves, contextArray, queryTerms); return BuildQuery(maxDoc, contextArray, queryTerms); }
/// <summary> /// Creates a <seealso cref="TermContext"/> from a top-level <seealso cref="IndexReaderContext"/> and the /// given <seealso cref="Term"/>. this method will lookup the given term in all context's leaf readers /// and register each of the readers containing the term in the returned <seealso cref="TermContext"/> /// using the leaf reader's ordinal. /// <p> /// Note: the given context must be a top-level context. /// </summary> public static TermContext Build(IndexReaderContext context, Term term) { Debug.Assert(context != null && context.IsTopLevel); string field = term.Field; BytesRef bytes = term.Bytes; TermContext perReaderTermState = new TermContext(context); //if (DEBUG) System.out.println("prts.build term=" + term); foreach (AtomicReaderContext ctx in context.Leaves) { //if (DEBUG) System.out.println(" r=" + leaves[i].reader); Fields fields = ctx.AtomicReader.Fields; if (fields != null) { Terms terms = fields.Terms(field); if (terms != null) { TermsEnum termsEnum = terms.Iterator(null); if (termsEnum.SeekExact(bytes)) { TermState termState = termsEnum.TermState(); //if (DEBUG) System.out.println(" found"); perReaderTermState.Register(termState, ctx.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq()); } } } } return perReaderTermState; }
public virtual void CollectTermContext(IndexReader reader, IList<AtomicReaderContext> leaves, TermContext[] contextArray, Term[] queryTerms) { TermsEnum termsEnum = null; foreach (AtomicReaderContext context in leaves) { Fields fields = context.AtomicReader.Fields; if (fields == null) { // reader has no fields continue; } for (int i = 0; i < queryTerms.Length; i++) { Term term = queryTerms[i]; TermContext termContext = contextArray[i]; //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.index.Terms terms = fields.terms(term.field()); Terms terms = fields.Terms(term.Field()); if (terms == null) { // field does not exist continue; } termsEnum = terms.Iterator(termsEnum); Debug.Assert(termsEnum != null); if (termsEnum == TermsEnum.EMPTY) { continue; } if (termsEnum.SeekExact(term.Bytes())) { if (termContext == null) { contextArray[i] = new TermContext(reader.Context, termsEnum.TermState(), context.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq()); } else { termContext.Register(termsEnum.TermState(), context.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq()); } } } } }