Exemple #1
0
            public override bool Collect(BytesRef bytes)
            {
                int pos = PendingTerms.Add(bytes);

                DocVisitCount += TermsEnum.DocFreq();
                if (PendingTerms.Size() >= TermCountLimit || DocVisitCount >= DocCountCutoff)
                {
                    HasCutOff = true;
                    return(false);
                }

                TermState termState = TermsEnum.TermState();

                Debug.Assert(termState != null);
                if (pos < 0)
                {
                    pos = (-pos) - 1;
                    Array.TermState[pos].Register(termState, ReaderContext.Ord, TermsEnum.DocFreq(), TermsEnum.TotalTermFreq());
                }
                else
                {
                    Array.TermState[pos] = new TermContext(TopReaderContext, termState, ReaderContext.Ord, TermsEnum.DocFreq(), TermsEnum.TotalTermFreq());
                }
                return(true);
            }
        public virtual void CollectTermContext(IndexReader reader, IList <AtomicReaderContext> leaves, TermContext[] contextArray, Term[] queryTerms)
        {
            TermsEnum termsEnum = null;

            foreach (AtomicReaderContext context in leaves)
            {
                Fields fields = context.AtomicReader.Fields;
                if (fields == null)
                {
                    // reader has no fields
                    continue;
                }
                for (int i = 0; i < queryTerms.Length; i++)
                {
                    Term        term        = queryTerms[i];
                    TermContext termContext = contextArray[i];
                    //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
                    //ORIGINAL LINE: final org.apache.lucene.index.Terms terms = fields.terms(term.field());
                    Terms terms = fields.Terms(term.Field());
                    if (terms == null)
                    {
                        // field does not exist
                        continue;
                    }
                    termsEnum = terms.Iterator(termsEnum);
                    Debug.Assert(termsEnum != null);

                    if (termsEnum == TermsEnum.EMPTY)
                    {
                        continue;
                    }
                    if (termsEnum.SeekExact(term.Bytes()))
                    {
                        if (termContext == null)
                        {
                            contextArray[i] = new TermContext(reader.Context, termsEnum.TermState(), context.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq());
                        }
                        else
                        {
                            termContext.Register(termsEnum.TermState(), context.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq());
                        }
                    }
                }
            }
        }
Exemple #3
0
        public virtual void CollectTermContext(IndexReader reader, IList <AtomicReaderContext> leaves, TermContext[] contextArray, Term[] queryTerms)
        {
            TermsEnum termsEnum = null;

            foreach (AtomicReaderContext context in leaves)
            {
                Fields fields = context.AtomicReader.Fields;
                if (fields == null)
                {
                    // reader has no fields
                    continue;
                }
                for (int i = 0; i < queryTerms.Length; i++)
                {
                    Term        term        = queryTerms[i];
                    TermContext termContext = contextArray[i];
                    Terms       terms       = fields.Terms(term.Field);
                    if (terms == null)
                    {
                        // field does not exist
                        continue;
                    }
                    termsEnum = terms.Iterator(termsEnum);
                    Debug.Assert(termsEnum != null);

                    if (termsEnum == TermsEnum.EMPTY)
                    {
                        continue;
                    }
                    if (termsEnum.SeekExact(term.Bytes))
                    {
                        if (termContext == null)
                        {
                            contextArray[i] = new TermContext(reader.Context, termsEnum.TermState(), context.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq());
                        }
                        else
                        {
                            termContext.Register(termsEnum.TermState(), context.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq());
                        }
                    }
                }
            }
        }
Exemple #4
0
            public override bool Collect(BytesRef bytes)
            {
                int       e     = Terms.Add(bytes);
                TermState state = TermsEnum.TermState();

                Debug.Assert(state != null);
                if (e < 0)
                {
                    // duplicate term: update docFreq
                    int pos = (-e) - 1;
                    Array.TermState[pos].Register(state, ReaderContext.Ord, TermsEnum.DocFreq(), TermsEnum.TotalTermFreq());
                    Debug.Assert(Array.Boost[pos] == BoostAtt.Boost, "boost should be equal in all segment TermsEnums");
                }
                else
                {
                    // new entry: we populate the entry initially
                    Array.Boost[e]     = BoostAtt.Boost;
                    Array.TermState[e] = new TermContext(TopReaderContext, state, ReaderContext.Ord, TermsEnum.DocFreq(), TermsEnum.TotalTermFreq());
                    OuterInstance.CheckMaxClauseCount(Terms.Size());
                }
                return(true);
            }
Exemple #5
0
        public override Spans GetSpans(AtomicReaderContext context, Bits acceptDocs, IDictionary <Term, TermContext> termContexts)
        {
            TermContext termContext;

            termContexts.TryGetValue(term, out termContext);
            TermState state;

            if (termContext == null)
            {
                // this happens with span-not query, as it doesn't include the NOT side in extractTerms()
                // so we seek to the term now in this segment..., this sucks because its ugly mostly!
                Fields fields = context.AtomicReader.Fields;
                if (fields != null)
                {
                    Terms terms = fields.Terms(term.Field());
                    if (terms != null)
                    {
                        TermsEnum termsEnum = terms.Iterator(null);
                        if (termsEnum.SeekExact(term.Bytes()))
                        {
                            state = termsEnum.TermState();
                        }
                        else
                        {
                            state = null;
                        }
                    }
                    else
                    {
                        state = null;
                    }
                }
                else
                {
                    state = null;
                }
            }
            else
            {
                state = termContext.Get(context.Ord);
            }

            if (state == null) // term is not present in that reader
            {
                return(TermSpans.EMPTY_TERM_SPANS);
            }

            TermsEnum termsEnum_ = context.AtomicReader.Terms(term.Field()).Iterator(null);

            termsEnum_.SeekExact(term.Bytes(), state);

            DocsAndPositionsEnum postings = termsEnum_.DocsAndPositions(acceptDocs, null, DocsAndPositionsEnum.FLAG_PAYLOADS);

            if (postings != null)
            {
                return(new TermSpans(postings, term));
            }
            else
            {
                // term does exist, but has no positions
                throw new InvalidOperationException("field \"" + term.Field() + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.Text() + ")");
            }
        }
Exemple #6
0
            public override bool Collect(BytesRef bytes)
            {
                float boost = boostAtt.Boost;

                // make sure within a single seg we always collect
                // terms in order
                Debug.Assert(CompareToLastTerm(bytes));

                //System.out.println("TTR.collect term=" + bytes.utf8ToString() + " boost=" + boost + " ord=" + readerContext.ord);
                // ignore uncompetitive hits
                if (StQueue.Size() == MaxSize)
                {
                    ScoreTerm t = StQueue.Top();
                    if (boost < t.Boost)
                    {
                        return(true);
                    }
                    if (boost == t.Boost && termComp.Compare(bytes, t.Bytes) > 0)
                    {
                        return(true);
                    }
                }
                ScoreTerm t2;
                TermState state = termsEnum.TermState();

                Debug.Assert(state != null);
                if (visitedTerms.TryGetValue(bytes, out t2))
                {
                    // if the term is already in the PQ, only update docFreq of term in PQ
                    Debug.Assert(t2.Boost == boost, "boost should be equal in all segment TermsEnums");
                    t2.TermState.Register(state, ReaderContext.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq());
                }
                else
                {
                    // add new entry in PQ, we must clone the term, else it may get overwritten!
                    st.Bytes.CopyBytes(bytes);
                    st.Boost = boost;
                    visitedTerms[st.Bytes] = st;
                    Debug.Assert(st.TermState.DocFreq == 0);
                    st.TermState.Register(state, ReaderContext.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq());
                    StQueue.Add(st);
                    // possibly drop entries from queue
                    if (StQueue.Size() > MaxSize)
                    {
                        st = StQueue.Pop();
                        visitedTerms.Remove(st.Bytes);
                        st.TermState.Clear(); // reset the termstate!
                    }
                    else
                    {
                        st = new ScoreTerm(termComp, new TermContext(TopReaderContext));
                    }
                    Debug.Assert(StQueue.Size() <= MaxSize, "the PQ size must be limited to maxSize");
                    // set maxBoostAtt with values to help FuzzyTermsEnum to optimize
                    if (StQueue.Size() == MaxSize)
                    {
                        t2 = StQueue.Top();
                        maxBoostAtt.MaxNonCompetitiveBoost = t2.Boost;
                        maxBoostAtt.CompetitiveTerm        = t2.Bytes;
                    }
                }

                return(true);
            }