コード例 #1
0
        public override Spans GetSpans(AtomicReaderContext context, IBits acceptDocs, IDictionary <Term, TermContext> termContexts)
        {
            TermState state;

            if (!termContexts.TryGetValue(m_term, out TermContext termContext) || termContext == null)
            {
                // this happens with span-not query, as it doesn't include the NOT side in extractTerms()
                // so we seek to the term now in this segment..., this sucks because its ugly mostly!
                Fields fields = context.AtomicReader.Fields;
                if (fields != null)
                {
                    Terms terms = fields.GetTerms(m_term.Field);
                    if (terms != null)
                    {
                        TermsEnum termsEnum = terms.GetEnumerator();
                        if (termsEnum.SeekExact(m_term.Bytes))
                        {
                            state = termsEnum.GetTermState();
                        }
                        else
                        {
                            state = null;
                        }
                    }
                    else
                    {
                        state = null;
                    }
                }
                else
                {
                    state = null;
                }
            }
            else
            {
                state = termContext.Get(context.Ord);
            }

            if (state == null) // term is not present in that reader
            {
                return(TermSpans.EMPTY_TERM_SPANS);
            }

            TermsEnum termsEnum_ = context.AtomicReader.GetTerms(m_term.Field).GetEnumerator();

            termsEnum_.SeekExact(m_term.Bytes, state);

            DocsAndPositionsEnum postings = termsEnum_.DocsAndPositions(acceptDocs, null, DocsAndPositionsFlags.PAYLOADS);

            if (postings != null)
            {
                return(new TermSpans(postings, m_term));
            }
            else
            {
                // term does exist, but has no positions
                throw IllegalStateException.Create("field \"" + m_term.Field + "\" was indexed without position data; cannot run SpanTermQuery (term=" + m_term.Text + ")");
            }
        }
コード例 #2
0
            public override bool Collect(BytesRef bytes)
            {
                int       e     = terms.Add(bytes);
                TermState state = termsEnum.GetTermState();

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(state != null);
                }
                if (e < 0)
                {
                    // duplicate term: update docFreq
                    int pos = (-e) - 1;
                    array.termState[pos].Register(state, m_readerContext.Ord, termsEnum.DocFreq, termsEnum.TotalTermFreq);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(array.boost[pos] == boostAtt.Boost, "boost should be equal in all segment TermsEnums");
                    }
                }
                else
                {
                    // new entry: we populate the entry initially
                    array.boost[e]     = boostAtt.Boost;
                    array.termState[e] = new TermContext(m_topReaderContext, state, m_readerContext.Ord, termsEnum.DocFreq, termsEnum.TotalTermFreq);
                    outerInstance.CheckMaxClauseCount(terms.Count);
                }
                return(true);
            }
コード例 #3
0
            public override bool Collect(BytesRef bytes)
            {
                int       e     = terms.Add(bytes);
                TermState state = termsEnum.GetTermState();

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(state != null);
                }
                if (e < 0)
                {
                    // duplicate term: update docFreq
                    int pos = (-e) - 1;
                    array.termState[pos].Register(state, m_readerContext.Ord, termsEnum.DocFreq, termsEnum.TotalTermFreq);
                    // LUCENENET specific - compare bits rather than using equality operators to prevent these comparisons from failing in x86 in .NET Framework with optimizations enabled
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(NumericUtils.SingleToSortableInt32(array.boost[pos]) == NumericUtils.SingleToSortableInt32(boostAtt.Boost), "boost should be equal in all segment TermsEnums");
                    }
                }
                else
                {
                    // new entry: we populate the entry initially
                    array.boost[e]     = boostAtt.Boost;
                    array.termState[e] = new TermContext(m_topReaderContext, state, m_readerContext.Ord, termsEnum.DocFreq, termsEnum.TotalTermFreq);
                    outerInstance.CheckMaxClauseCount(terms.Count);
                }
                return(true);
            }
コード例 #4
0
        public virtual void CollectTermContext(IndexReader reader, IList <AtomicReaderContext> leaves, TermContext[] contextArray, Term[] queryTerms)
        {
            TermsEnum termsEnum = null;

            foreach (AtomicReaderContext context in leaves)
            {
                Fields fields = context.AtomicReader.Fields;
                if (fields == null)
                {
                    // reader has no fields
                    continue;
                }
                for (int i = 0; i < queryTerms.Length; i++)
                {
                    Term        term        = queryTerms[i];
                    TermContext termContext = contextArray[i];
                    Terms       terms       = fields.GetTerms(term.Field);
                    if (terms == null)
                    {
                        // field does not exist
                        continue;
                    }
                    termsEnum = terms.GetIterator(termsEnum);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(termsEnum != null);
                    }

                    if (termsEnum == TermsEnum.EMPTY)
                    {
                        continue;
                    }
                    if (termsEnum.SeekExact(term.Bytes))
                    {
                        if (termContext == null)
                        {
                            contextArray[i] = new TermContext(reader.Context, termsEnum.GetTermState(), context.Ord, termsEnum.DocFreq, termsEnum.TotalTermFreq);
                        }
                        else
                        {
                            termContext.Register(termsEnum.GetTermState(), context.Ord, termsEnum.DocFreq, termsEnum.TotalTermFreq);
                        }
                    }
                }
            }
        }
コード例 #5
0
ファイル: TopTermsRewrite.cs プロジェクト: segovia/lucenenet
            public override bool Collect(BytesRef bytes)
            {
                float boost = boostAtt.Boost;

                // make sure within a single seg we always collect
                // terms in order
                Debug.Assert(CompareToLastTerm(bytes));

                //System.out.println("TTR.collect term=" + bytes.utf8ToString() + " boost=" + boost + " ord=" + readerContext.ord);
                // ignore uncompetitive hits
                if (stQueue.Count == maxSize)
                {
                    ScoreTerm t = stQueue.Peek();
                    if (boost < t.Boost)
                    {
                        return(true);
                    }
                    if (boost == t.Boost && termComp.Compare(bytes, t.Bytes) > 0)
                    {
                        return(true);
                    }
                }
                TermState state = termsEnum.GetTermState();

                Debug.Assert(state != null);
                if (visitedTerms.TryGetValue(bytes, out ScoreTerm t2))
                {
                    // if the term is already in the PQ, only update docFreq of term in PQ
                    Debug.Assert(t2.Boost == boost, "boost should be equal in all segment TermsEnums");
                    t2.TermState.Register(state, m_readerContext.Ord, termsEnum.DocFreq, termsEnum.TotalTermFreq);
                }
                else
                {
                    // add new entry in PQ, we must clone the term, else it may get overwritten!
                    st.Bytes.CopyBytes(bytes);
                    st.Boost = boost;
                    visitedTerms[st.Bytes] = st;
                    Debug.Assert(st.TermState.DocFreq == 0);
                    st.TermState.Register(state, m_readerContext.Ord, termsEnum.DocFreq, termsEnum.TotalTermFreq);
                    stQueue.Add(st);
                    // possibly drop entries from queue
                    if (stQueue.Count > maxSize)
                    {
                        st = stQueue.Dequeue();
                        visitedTerms.Remove(st.Bytes);
                        st.TermState.Clear(); // reset the termstate!
                    }
                    else
                    {
                        st = new ScoreTerm(termComp, new TermContext(m_topReaderContext));
                    }
                    Debug.Assert(stQueue.Count <= maxSize, "the PQ size must be limited to maxSize");
                    // set maxBoostAtt with values to help FuzzyTermsEnum to optimize
                    if (stQueue.Count == maxSize)
                    {
                        t2 = stQueue.Peek();
                        maxBoostAtt.MaxNonCompetitiveBoost = t2.Boost;
                        maxBoostAtt.CompetitiveTerm        = t2.Bytes;
                    }
                }

                return(true);
            }
コード例 #6
0
            public override bool Collect(BytesRef bytes)
            {
                float boost = boostAtt.Boost;

                // make sure within a single seg we always collect
                // terms in order
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(CompareToLastTerm(bytes));
                }

                //System.out.println("TTR.collect term=" + bytes.utf8ToString() + " boost=" + boost + " ord=" + readerContext.ord);
                // ignore uncompetitive hits
                if (stQueue.Count == maxSize)
                {
                    ScoreTerm t = stQueue.Peek();
                    // LUCENENET specific - compare bits rather than using equality operators to prevent these comparisons from failing in x86 in .NET Framework with optimizations enabled
                    if (NumericUtils.SingleToSortableInt32(boost) < NumericUtils.SingleToSortableInt32(t.Boost))
                    {
                        return(true);
                    }
                    // LUCENENET specific - compare bits rather than using equality operators to prevent these comparisons from failing in x86 in .NET Framework with optimizations enabled
                    if (NumericUtils.SingleToSortableInt32(boost) == NumericUtils.SingleToSortableInt32(t.Boost) && termComp.Compare(bytes, t.Bytes) > 0)
                    {
                        return(true);
                    }
                }
                TermState state = termsEnum.GetTermState();

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(state != null);
                }
                if (visitedTerms.TryGetValue(bytes, out ScoreTerm t2))
                {
                    // if the term is already in the PQ, only update docFreq of term in PQ
                    // LUCENENET specific - compare bits rather than using equality operators to prevent these comparisons from failing in x86 in .NET Framework with optimizations enabled
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(NumericUtils.SingleToSortableInt32(t2.Boost) == NumericUtils.SingleToSortableInt32(boost), "boost should be equal in all segment TermsEnums");
                    }
                    t2.TermState.Register(state, m_readerContext.Ord, termsEnum.DocFreq, termsEnum.TotalTermFreq);
                }
                else
                {
                    // add new entry in PQ, we must clone the term, else it may get overwritten!
                    st.Bytes.CopyBytes(bytes);
                    st.Boost = boost;
                    visitedTerms[st.Bytes] = st;
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(st.TermState.DocFreq == 0);
                    }
                    st.TermState.Register(state, m_readerContext.Ord, termsEnum.DocFreq, termsEnum.TotalTermFreq);
                    stQueue.Add(st);
                    // possibly drop entries from queue
                    if (stQueue.Count > maxSize)
                    {
                        st = stQueue.Dequeue();
                        visitedTerms.Remove(st.Bytes);
                        st.TermState.Clear(); // reset the termstate!
                    }
                    else
                    {
                        st = new ScoreTerm(termComp, new TermContext(m_topReaderContext));
                    }
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(stQueue.Count <= maxSize, "the PQ size must be limited to maxSize");
                    }
                    // set maxBoostAtt with values to help FuzzyTermsEnum to optimize
                    if (stQueue.Count == maxSize)
                    {
                        t2 = stQueue.Peek();
                        maxBoostAtt.MaxNonCompetitiveBoost = t2.Boost;
                        maxBoostAtt.CompetitiveTerm        = t2.Bytes;
                    }
                }

                return(true);
            }