public override Spans GetSpans(AtomicReaderContext context, IBits acceptDocs, IDictionary <Term, TermContext> termContexts) { TermState state; if (!termContexts.TryGetValue(m_term, out TermContext termContext) || termContext == null) { // this happens with span-not query, as it doesn't include the NOT side in extractTerms() // so we seek to the term now in this segment..., this sucks because its ugly mostly! Fields fields = context.AtomicReader.Fields; if (fields != null) { Terms terms = fields.GetTerms(m_term.Field); if (terms != null) { TermsEnum termsEnum = terms.GetEnumerator(); if (termsEnum.SeekExact(m_term.Bytes)) { state = termsEnum.GetTermState(); } else { state = null; } } else { state = null; } } else { state = null; } } else { state = termContext.Get(context.Ord); } if (state == null) // term is not present in that reader { return(TermSpans.EMPTY_TERM_SPANS); } TermsEnum termsEnum_ = context.AtomicReader.GetTerms(m_term.Field).GetEnumerator(); termsEnum_.SeekExact(m_term.Bytes, state); DocsAndPositionsEnum postings = termsEnum_.DocsAndPositions(acceptDocs, null, DocsAndPositionsFlags.PAYLOADS); if (postings != null) { return(new TermSpans(postings, m_term)); } else { // term does exist, but has no positions throw IllegalStateException.Create("field \"" + m_term.Field + "\" was indexed without position data; cannot run SpanTermQuery (term=" + m_term.Text + ")"); } }
public override bool Collect(BytesRef bytes) { int e = terms.Add(bytes); TermState state = termsEnum.GetTermState(); if (Debugging.AssertsEnabled) { Debugging.Assert(state != null); } if (e < 0) { // duplicate term: update docFreq int pos = (-e) - 1; array.termState[pos].Register(state, m_readerContext.Ord, termsEnum.DocFreq, termsEnum.TotalTermFreq); if (Debugging.AssertsEnabled) { Debugging.Assert(array.boost[pos] == boostAtt.Boost, "boost should be equal in all segment TermsEnums"); } } else { // new entry: we populate the entry initially array.boost[e] = boostAtt.Boost; array.termState[e] = new TermContext(m_topReaderContext, state, m_readerContext.Ord, termsEnum.DocFreq, termsEnum.TotalTermFreq); outerInstance.CheckMaxClauseCount(terms.Count); } return(true); }
public override bool Collect(BytesRef bytes) { int e = terms.Add(bytes); TermState state = termsEnum.GetTermState(); if (Debugging.AssertsEnabled) { Debugging.Assert(state != null); } if (e < 0) { // duplicate term: update docFreq int pos = (-e) - 1; array.termState[pos].Register(state, m_readerContext.Ord, termsEnum.DocFreq, termsEnum.TotalTermFreq); // LUCENENET specific - compare bits rather than using equality operators to prevent these comparisons from failing in x86 in .NET Framework with optimizations enabled if (Debugging.AssertsEnabled) { Debugging.Assert(NumericUtils.SingleToSortableInt32(array.boost[pos]) == NumericUtils.SingleToSortableInt32(boostAtt.Boost), "boost should be equal in all segment TermsEnums"); } } else { // new entry: we populate the entry initially array.boost[e] = boostAtt.Boost; array.termState[e] = new TermContext(m_topReaderContext, state, m_readerContext.Ord, termsEnum.DocFreq, termsEnum.TotalTermFreq); outerInstance.CheckMaxClauseCount(terms.Count); } return(true); }
public virtual void CollectTermContext(IndexReader reader, IList <AtomicReaderContext> leaves, TermContext[] contextArray, Term[] queryTerms) { TermsEnum termsEnum = null; foreach (AtomicReaderContext context in leaves) { Fields fields = context.AtomicReader.Fields; if (fields == null) { // reader has no fields continue; } for (int i = 0; i < queryTerms.Length; i++) { Term term = queryTerms[i]; TermContext termContext = contextArray[i]; Terms terms = fields.GetTerms(term.Field); if (terms == null) { // field does not exist continue; } termsEnum = terms.GetIterator(termsEnum); if (Debugging.AssertsEnabled) { Debugging.Assert(termsEnum != null); } if (termsEnum == TermsEnum.EMPTY) { continue; } if (termsEnum.SeekExact(term.Bytes)) { if (termContext == null) { contextArray[i] = new TermContext(reader.Context, termsEnum.GetTermState(), context.Ord, termsEnum.DocFreq, termsEnum.TotalTermFreq); } else { termContext.Register(termsEnum.GetTermState(), context.Ord, termsEnum.DocFreq, termsEnum.TotalTermFreq); } } } } }
public override bool Collect(BytesRef bytes) { float boost = boostAtt.Boost; // make sure within a single seg we always collect // terms in order Debug.Assert(CompareToLastTerm(bytes)); //System.out.println("TTR.collect term=" + bytes.utf8ToString() + " boost=" + boost + " ord=" + readerContext.ord); // ignore uncompetitive hits if (stQueue.Count == maxSize) { ScoreTerm t = stQueue.Peek(); if (boost < t.Boost) { return(true); } if (boost == t.Boost && termComp.Compare(bytes, t.Bytes) > 0) { return(true); } } TermState state = termsEnum.GetTermState(); Debug.Assert(state != null); if (visitedTerms.TryGetValue(bytes, out ScoreTerm t2)) { // if the term is already in the PQ, only update docFreq of term in PQ Debug.Assert(t2.Boost == boost, "boost should be equal in all segment TermsEnums"); t2.TermState.Register(state, m_readerContext.Ord, termsEnum.DocFreq, termsEnum.TotalTermFreq); } else { // add new entry in PQ, we must clone the term, else it may get overwritten! st.Bytes.CopyBytes(bytes); st.Boost = boost; visitedTerms[st.Bytes] = st; Debug.Assert(st.TermState.DocFreq == 0); st.TermState.Register(state, m_readerContext.Ord, termsEnum.DocFreq, termsEnum.TotalTermFreq); stQueue.Add(st); // possibly drop entries from queue if (stQueue.Count > maxSize) { st = stQueue.Dequeue(); visitedTerms.Remove(st.Bytes); st.TermState.Clear(); // reset the termstate! } else { st = new ScoreTerm(termComp, new TermContext(m_topReaderContext)); } Debug.Assert(stQueue.Count <= maxSize, "the PQ size must be limited to maxSize"); // set maxBoostAtt with values to help FuzzyTermsEnum to optimize if (stQueue.Count == maxSize) { t2 = stQueue.Peek(); maxBoostAtt.MaxNonCompetitiveBoost = t2.Boost; maxBoostAtt.CompetitiveTerm = t2.Bytes; } } return(true); }
public override bool Collect(BytesRef bytes) { float boost = boostAtt.Boost; // make sure within a single seg we always collect // terms in order if (Debugging.AssertsEnabled) { Debugging.Assert(CompareToLastTerm(bytes)); } //System.out.println("TTR.collect term=" + bytes.utf8ToString() + " boost=" + boost + " ord=" + readerContext.ord); // ignore uncompetitive hits if (stQueue.Count == maxSize) { ScoreTerm t = stQueue.Peek(); // LUCENENET specific - compare bits rather than using equality operators to prevent these comparisons from failing in x86 in .NET Framework with optimizations enabled if (NumericUtils.SingleToSortableInt32(boost) < NumericUtils.SingleToSortableInt32(t.Boost)) { return(true); } // LUCENENET specific - compare bits rather than using equality operators to prevent these comparisons from failing in x86 in .NET Framework with optimizations enabled if (NumericUtils.SingleToSortableInt32(boost) == NumericUtils.SingleToSortableInt32(t.Boost) && termComp.Compare(bytes, t.Bytes) > 0) { return(true); } } TermState state = termsEnum.GetTermState(); if (Debugging.AssertsEnabled) { Debugging.Assert(state != null); } if (visitedTerms.TryGetValue(bytes, out ScoreTerm t2)) { // if the term is already in the PQ, only update docFreq of term in PQ // LUCENENET specific - compare bits rather than using equality operators to prevent these comparisons from failing in x86 in .NET Framework with optimizations enabled if (Debugging.AssertsEnabled) { Debugging.Assert(NumericUtils.SingleToSortableInt32(t2.Boost) == NumericUtils.SingleToSortableInt32(boost), "boost should be equal in all segment TermsEnums"); } t2.TermState.Register(state, m_readerContext.Ord, termsEnum.DocFreq, termsEnum.TotalTermFreq); } else { // add new entry in PQ, we must clone the term, else it may get overwritten! st.Bytes.CopyBytes(bytes); st.Boost = boost; visitedTerms[st.Bytes] = st; if (Debugging.AssertsEnabled) { Debugging.Assert(st.TermState.DocFreq == 0); } st.TermState.Register(state, m_readerContext.Ord, termsEnum.DocFreq, termsEnum.TotalTermFreq); stQueue.Add(st); // possibly drop entries from queue if (stQueue.Count > maxSize) { st = stQueue.Dequeue(); visitedTerms.Remove(st.Bytes); st.TermState.Clear(); // reset the termstate! } else { st = new ScoreTerm(termComp, new TermContext(m_topReaderContext)); } if (Debugging.AssertsEnabled) { Debugging.Assert(stQueue.Count <= maxSize, "the PQ size must be limited to maxSize"); } // set maxBoostAtt with values to help FuzzyTermsEnum to optimize if (stQueue.Count == maxSize) { t2 = stQueue.Peek(); maxBoostAtt.MaxNonCompetitiveBoost = t2.Boost; maxBoostAtt.CompetitiveTerm = t2.Bytes; } } return(true); }