Maintains a IndexReader TermState view over IndexReader instances containing a single term. The TermContext doesn't track if the given TermState objects are valid, neither if the TermState instances refer to the same terms in the associated readers. @lucene.experimental
Пример #1
0
 /// <summary>
 /// Builds a new TermQuery instance.
 /// <para>This is intended for subclasses that wish to customize the generated queries.</para> </summary>
 /// <param name="term"> term </param>
 /// <param name="context"> the TermContext to be used to create the low level term query. Can be <code>null</code>. </param>
 /// <returns> new TermQuery instance </returns>
 protected virtual Query NewTermQuery(Term term, TermContext context)
 {
     return context == null ? new TermQuery(term) : new TermQuery(term, context);
 }
Пример #2
0
        public virtual void CollectTermContext(IndexReader reader, IList<AtomicReaderContext> leaves, TermContext[] contextArray, Term[] queryTerms)
        {
            TermsEnum termsEnum = null;
            foreach (AtomicReaderContext context in leaves)
            {
                Fields fields = context.AtomicReader.Fields;
                if (fields == null)
                {
                    // reader has no fields
                    continue;
                }
                for (int i = 0; i < queryTerms.Length; i++)
                {
                    Term term = queryTerms[i];
                    TermContext termContext = contextArray[i];
                    Terms terms = fields.Terms(term.Field);
                    if (terms == null)
                    {
                        // field does not exist
                        continue;
                    }
                    termsEnum = terms.Iterator(termsEnum);
                    Debug.Assert(termsEnum != null);

                    if (termsEnum == TermsEnum.EMPTY)
                    {
                        continue;
                    }
                    if (termsEnum.SeekExact(term.Bytes))
                    {
                        if (termContext == null)
                        {
                            contextArray[i] = new TermContext(reader.Context, termsEnum.TermState(), context.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq());
                        }
                        else
                        {
                            termContext.Register(termsEnum.TermState(), context.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq());
                        }

                    }

                }
            }
        }
Пример #3
0
        protected internal virtual Query BuildQuery(int maxDoc, TermContext[] contextArray, Term[] queryTerms)
        {
            var lowFreq = new BooleanQuery(disableCoord);
            var highFreq = new BooleanQuery(disableCoord) { Boost = highFreqBoost };
            lowFreq.Boost = lowFreqBoost;
            var query = new BooleanQuery(true);
            for (int i = 0; i < queryTerms.Length; i++)
            {
                TermContext termContext = contextArray[i];
                if (termContext == null)
                {
                    lowFreq.Add(NewTermQuery(queryTerms[i], null), lowFreqOccur);
                }
                else
                {
                    if ((maxTermFrequency >= 1f && termContext.DocFreq > maxTermFrequency) || (termContext.DocFreq > (int)Math.Ceiling(maxTermFrequency * (float)maxDoc)))
                    {
                        highFreq.Add(NewTermQuery(queryTerms[i], termContext), highFreqOccur);
                    }
                    else
                    {
                        lowFreq.Add(NewTermQuery(queryTerms[i], termContext), lowFreqOccur);
                    }
                }

            }
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int numLowFreqClauses = lowFreq.clauses().size();
            int numLowFreqClauses = lowFreq.Clauses.Length;
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int numHighFreqClauses = highFreq.clauses().size();
            int numHighFreqClauses = highFreq.Clauses.Length;
            if (lowFreqOccur == BooleanClause.Occur.SHOULD && numLowFreqClauses > 0)
            {
                int minMustMatch = CalcLowFreqMinimumNumberShouldMatch(numLowFreqClauses);
                lowFreq.MinimumNumberShouldMatch = minMustMatch;
            }
            if (highFreqOccur == BooleanClause.Occur.SHOULD && numHighFreqClauses > 0)
            {
                int minMustMatch = CalcHighFreqMinimumNumberShouldMatch(numHighFreqClauses);
                highFreq.MinimumNumberShouldMatch = minMustMatch;
            }
            if (lowFreq.Clauses.Length == 0)
            {
                /*
                 * if lowFreq is empty we rewrite the high freq terms in a conjunction to
                 * prevent slow queries.
                 */
                if (highFreq.MinimumNumberShouldMatch == 0 && highFreqOccur != BooleanClause.Occur.MUST)
                {
                    foreach (BooleanClause booleanClause in highFreq)
                    {
                        booleanClause.Occur_ = BooleanClause.Occur.MUST;
                    }
                }
                highFreq.Boost = Boost;
                return highFreq;
            }
            else if (highFreq.Clauses.Length == 0)
            {
                // only do low freq terms - we don't have high freq terms
                lowFreq.Boost = Boost;
                return lowFreq;
            }
            else
            {
                query.Add(highFreq, BooleanClause.Occur.SHOULD);
                query.Add(lowFreq, BooleanClause.Occur.MUST);
                query.Boost = Boost;
                return query;
            }
        }
Пример #4
0
 public override Query Rewrite(IndexReader reader)
 {
     if (this.terms.Count == 0)
     {
         return new BooleanQuery();
     }
     else if (this.terms.Count == 1)
     {
         Query tq = NewTermQuery(this.terms[0], null);
         tq.Boost = Boost;
         return tq;
     }
     var leaves = reader.Leaves;
     int maxDoc = reader.MaxDoc;
     var contextArray = new TermContext[terms.Count];
     var queryTerms = this.terms.ToArray();
     CollectTermContext(reader, leaves, contextArray, queryTerms);
     return BuildQuery(maxDoc, contextArray, queryTerms);
 }
Пример #5
0
 /// <summary>
 /// Creates a <seealso cref="TermContext"/> from a top-level <seealso cref="IndexReaderContext"/> and the
 /// given <seealso cref="Term"/>. this method will lookup the given term in all context's leaf readers
 /// and register each of the readers containing the term in the returned <seealso cref="TermContext"/>
 /// using the leaf reader's ordinal.
 /// <p>
 /// Note: the given context must be a top-level context.
 /// </summary>
 public static TermContext Build(IndexReaderContext context, Term term)
 {
     Debug.Assert(context != null && context.IsTopLevel);
     string field = term.Field;
     BytesRef bytes = term.Bytes;
     TermContext perReaderTermState = new TermContext(context);
     //if (DEBUG) System.out.println("prts.build term=" + term);
     foreach (AtomicReaderContext ctx in context.Leaves)
     {
         //if (DEBUG) System.out.println("  r=" + leaves[i].reader);
         Fields fields = ctx.AtomicReader.Fields;
         if (fields != null)
         {
             Terms terms = fields.Terms(field);
             if (terms != null)
             {
                 TermsEnum termsEnum = terms.Iterator(null);
                 if (termsEnum.SeekExact(bytes))
                 {
                     TermState termState = termsEnum.TermState();
                     //if (DEBUG) System.out.println("    found");
                     perReaderTermState.Register(termState, ctx.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq());
                 }
             }
         }
     }
     return perReaderTermState;
 }
Пример #6
0
        public virtual void CollectTermContext(IndexReader reader, IList<AtomicReaderContext> leaves, TermContext[] contextArray, Term[] queryTerms)
        {
            TermsEnum termsEnum = null;
            foreach (AtomicReaderContext context in leaves)
            {
                Fields fields = context.AtomicReader.Fields;
                if (fields == null)
                {
                    // reader has no fields
                    continue;
                }
                for (int i = 0; i < queryTerms.Length; i++)
                {
                    Term term = queryTerms[i];
                    TermContext termContext = contextArray[i];
                    //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
                    //ORIGINAL LINE: final org.apache.lucene.index.Terms terms = fields.terms(term.field());
                    Terms terms = fields.Terms(term.Field());
                    if (terms == null)
                    {
                        // field does not exist
                        continue;
                    }
                    termsEnum = terms.Iterator(termsEnum);
                    Debug.Assert(termsEnum != null);

                    if (termsEnum == TermsEnum.EMPTY)
                    {
                        continue;
                    }
                    if (termsEnum.SeekExact(term.Bytes()))
                    {
                        if (termContext == null)
                        {
                            contextArray[i] = new TermContext(reader.Context, termsEnum.TermState(), context.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq());
                        }
                        else
                        {
                            termContext.Register(termsEnum.TermState(), context.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq());
                        }

                    }

                }
            }
        }