public override void CopyFrom(TermState _other) { Debug.Assert(_other is BlockTermState, "can not copy from " + _other.GetType().Name); BlockTermState other = (BlockTermState)_other; base.CopyFrom(_other); DocFreq = other.DocFreq; TotalTermFreq = other.TotalTermFreq; TermBlockOrd = other.TermBlockOrd; BlockFilePointer = other.BlockFilePointer; }
public override void SeekExact(BytesRef term, TermState state) { ActualEnum.SeekExact(term, state); }
public override void CopyFrom(TermState _other) { base.CopyFrom(_other); StandardTermState other = (StandardTermState)_other; FreqOffset = other.FreqOffset; ProxOffset = other.ProxOffset; SkipOffset = other.SkipOffset; }
public override void SeekExact(BytesRef target, TermState otherState) { //System.out.println("BTR.seekExact termState target=" + target.utf8ToString() + " " + target + " this=" + this); Debug.Assert(otherState is BlockTermState); Debug.Assert(!_doOrd || ((BlockTermState) otherState).Ord < _fieldReader._numTerms); _state.CopyFrom(otherState); _seekPending = true; _indexIsCurrent = false; _term.CopyBytes(target); }
public override void CopyFrom(TermState _other) { base.CopyFrom(_other); IntBlockTermState other = (IntBlockTermState)_other; DocStartFP = other.DocStartFP; PosStartFP = other.PosStartFP; PayStartFP = other.PayStartFP; LastPosBlockOffset = other.LastPosBlockOffset; SkipOffset = other.SkipOffset; SingletonDocID = other.SingletonDocID; }
public override void SeekExact(BytesRef term, TermState state) { termOrd = (int) ((OrdTermState) state).Ord; SetTerm(); Debug.Assert(term.Equals(scratch)); }
public override void SeekExact(BytesRef target, TermState otherState) { // if (DEBUG) { // System.out.println("BTTR.seekExact termState seg=" + segment + " target=" + target.utf8ToString() + " " + target + " state=" + otherState); // } Debug.Assert(ClearEOF()); if (target.CompareTo(Term_Renamed) != 0 || !TermExists) { Debug.Assert(otherState != null && otherState is BlockTermState); CurrentFrame = StaticFrame; CurrentFrame.State.CopyFrom(otherState); Term_Renamed.CopyBytes(target); CurrentFrame.MetaDataUpto = CurrentFrame.TermBlockOrd; Debug.Assert(CurrentFrame.MetaDataUpto > 0); ValidIndexPrefix = 0; } else { // if (DEBUG) { // System.out.println(" skip seek: already on target state=" + currentFrame.state); // } } }
public override bool Collect(BytesRef bytes) { float boost = boostAtt.Boost; // make sure within a single seg we always collect // terms in order if (Debugging.AssertsEnabled) { Debugging.Assert(CompareToLastTerm(bytes)); } //System.out.println("TTR.collect term=" + bytes.utf8ToString() + " boost=" + boost + " ord=" + readerContext.ord); // ignore uncompetitive hits if (stQueue.Count == maxSize) { ScoreTerm t = stQueue.Peek(); // LUCENENET specific - compare bits rather than using equality operators to prevent these comparisons from failing in x86 in .NET Framework with optimizations enabled if (NumericUtils.SingleToSortableInt32(boost) < NumericUtils.SingleToSortableInt32(t.Boost)) { return(true); } // LUCENENET specific - compare bits rather than using equality operators to prevent these comparisons from failing in x86 in .NET Framework with optimizations enabled if (NumericUtils.SingleToSortableInt32(boost) == NumericUtils.SingleToSortableInt32(t.Boost) && termComp.Compare(bytes, t.Bytes) > 0) { return(true); } } TermState state = termsEnum.GetTermState(); if (Debugging.AssertsEnabled) { Debugging.Assert(state != null); } if (visitedTerms.TryGetValue(bytes, out ScoreTerm t2)) { // if the term is already in the PQ, only update docFreq of term in PQ // LUCENENET specific - compare bits rather than using equality operators to prevent these comparisons from failing in x86 in .NET Framework with optimizations enabled if (Debugging.AssertsEnabled) { Debugging.Assert(NumericUtils.SingleToSortableInt32(t2.Boost) == NumericUtils.SingleToSortableInt32(boost), "boost should be equal in all segment TermsEnums"); } t2.TermState.Register(state, m_readerContext.Ord, termsEnum.DocFreq, termsEnum.TotalTermFreq); } else { // add new entry in PQ, we must clone the term, else it may get overwritten! st.Bytes.CopyBytes(bytes); st.Boost = boost; visitedTerms[st.Bytes] = st; if (Debugging.AssertsEnabled) { Debugging.Assert(st.TermState.DocFreq == 0); } st.TermState.Register(state, m_readerContext.Ord, termsEnum.DocFreq, termsEnum.TotalTermFreq); stQueue.Add(st); // possibly drop entries from queue if (stQueue.Count > maxSize) { st = stQueue.Dequeue(); visitedTerms.Remove(st.Bytes); st.TermState.Clear(); // reset the termstate! } else { st = new ScoreTerm(termComp, new TermContext(m_topReaderContext)); } if (Debugging.AssertsEnabled) { Debugging.Assert(stQueue.Count <= maxSize, "the PQ size must be limited to maxSize"); } // set maxBoostAtt with values to help FuzzyTermsEnum to optimize if (stQueue.Count == maxSize) { t2 = stQueue.Peek(); maxBoostAtt.MaxNonCompetitiveBoost = t2.Boost; maxBoostAtt.CompetitiveTerm = t2.Bytes; } } return(true); }
public override Scorer GetScorer(AtomicReaderContext context, IBits acceptDocs) { Debug.Assert(outerInstance.terms.Count > 0); AtomicReader reader = context.AtomicReader; IBits liveDocs = acceptDocs; PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[outerInstance.terms.Count]; Terms fieldTerms = reader.GetTerms(outerInstance.field); if (fieldTerms == null) { return(null); } // Reuse single TermsEnum below: TermsEnum te = fieldTerms.GetIterator(null); for (int i = 0; i < outerInstance.terms.Count; i++) { Term t = outerInstance.terms[i]; TermState state = states[i].Get(context.Ord); if (state == null) // term doesnt exist in this segment { Debug.Assert(TermNotInReader(reader, t), "no termstate found but term exists in reader"); return(null); } te.SeekExact(t.Bytes, state); DocsAndPositionsEnum postingsEnum = te.DocsAndPositions(liveDocs, null, DocsAndPositionsFlags.NONE); // PhraseQuery on a field that did not index // positions. if (postingsEnum == null) { Debug.Assert(te.SeekExact(t.Bytes), "termstate found but no term exists in reader"); // term does exist, but has no positions throw new InvalidOperationException("field \"" + t.Field + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.Text() + ")"); } postingsFreqs[i] = new PostingsAndFreq(postingsEnum, te.DocFreq, (int)outerInstance.positions[i], t); } // sort by increasing docFreq order if (outerInstance.slop == 0) { ArrayUtil.TimSort(postingsFreqs); } if (outerInstance.slop == 0) // optimize exact case { ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.GetSimScorer(stats, context)); if (s.noDocs) { return(null); } else { return(s); } } else { return(new SloppyPhraseScorer(this, postingsFreqs, outerInstance.slop, similarity.GetSimScorer(stats, context))); } }
public override void CopyFrom(TermState other) { base.CopyFrom(other); var _other = (PulsingTermState) other; PostingsSize = _other.PostingsSize; if (_other.PostingsSize != -1) { if (Postings == null || Postings.Length < _other.PostingsSize) { Postings = new byte[ArrayUtil.Oversize(_other.PostingsSize, 1)]; } Array.Copy(_other.Postings, 0, Postings, 0, _other.PostingsSize); } else { WrappedTermState.CopyFrom(_other.WrappedTermState); } }
public override Scorer Scorer(AtomicReaderContext context, Bits acceptDocs) { Debug.Assert(OuterInstance.termArrays.Count > 0); AtomicReader reader = (context.AtomicReader); Bits liveDocs = acceptDocs; PhraseQuery.PostingsAndFreq[] postingsFreqs = new PhraseQuery.PostingsAndFreq[OuterInstance.termArrays.Count]; Terms fieldTerms = reader.Terms(OuterInstance.Field); if (fieldTerms == null) { return(null); } // Reuse single TermsEnum below: TermsEnum termsEnum = fieldTerms.Iterator(null); for (int pos = 0; pos < postingsFreqs.Length; pos++) { Term[] terms = OuterInstance.termArrays[pos]; DocsAndPositionsEnum postingsEnum; int docFreq; if (terms.Length > 1) { postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, context, terms, TermContexts, termsEnum); // coarse -- this overcounts since a given doc can // have more than one term: docFreq = 0; for (int termIdx = 0; termIdx < terms.Length; termIdx++) { Term term = terms[termIdx]; TermState termState = TermContexts[term].Get(context.Ord); if (termState == null) { // Term not in reader continue; } termsEnum.SeekExact(term.Bytes(), termState); docFreq += termsEnum.DocFreq(); } if (docFreq == 0) { // None of the terms are in this reader return(null); } } else { Term term = terms[0]; TermState termState = TermContexts[term].Get(context.Ord); if (termState == null) { // Term not in reader return(null); } termsEnum.SeekExact(term.Bytes(), termState); postingsEnum = termsEnum.DocsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE); if (postingsEnum == null) { // term does exist, but has no positions Debug.Assert(termsEnum.Docs(liveDocs, null, DocsEnum.FLAG_NONE) != null, "termstate found but no term exists in reader"); throw new InvalidOperationException("field \"" + term.Field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.Text() + ")"); } docFreq = termsEnum.DocFreq(); } postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, (int)OuterInstance.positions[pos], terms); } // sort by increasing docFreq order if (OuterInstance.slop == 0) { ArrayUtil.TimSort(postingsFreqs); } if (OuterInstance.slop == 0) { ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, Similarity.DoSimScorer(Stats, context)); if (s.NoDocs) { return(null); } else { return(s); } } else { return(new SloppyPhraseScorer(this, postingsFreqs, OuterInstance.slop, Similarity.DoSimScorer(Stats, context))); } }
public override void SeekExact(BytesRef term, TermState state) { Debug.Assert(state != null); this.SeekExact(((OrdTermState)state).Ord); }
public override bool Collect(BytesRef bytes) { float boost = boostAtt.Boost; // make sure within a single seg we always collect // terms in order Debug.Assert(CompareToLastTerm(bytes)); //System.out.println("TTR.collect term=" + bytes.utf8ToString() + " boost=" + boost + " ord=" + readerContext.ord); // ignore uncompetitive hits if (stQueue.Count == maxSize) { ScoreTerm t = stQueue.Peek(); if (boost < t.Boost) { return(true); } if (boost == t.Boost && termComp.Compare(bytes, t.Bytes) > 0) { return(true); } } TermState state = termsEnum.GetTermState(); Debug.Assert(state != null); if (visitedTerms.TryGetValue(bytes, out ScoreTerm t2)) { // if the term is already in the PQ, only update docFreq of term in PQ Debug.Assert(t2.Boost == boost, "boost should be equal in all segment TermsEnums"); t2.TermState.Register(state, m_readerContext.Ord, termsEnum.DocFreq, termsEnum.TotalTermFreq); } else { // add new entry in PQ, we must clone the term, else it may get overwritten! st.Bytes.CopyBytes(bytes); st.Boost = boost; visitedTerms[st.Bytes] = st; Debug.Assert(st.TermState.DocFreq == 0); st.TermState.Register(state, m_readerContext.Ord, termsEnum.DocFreq, termsEnum.TotalTermFreq); stQueue.Add(st); // possibly drop entries from queue if (stQueue.Count > maxSize) { st = stQueue.Dequeue(); visitedTerms.Remove(st.Bytes); st.TermState.Clear(); // reset the termstate! } else { st = new ScoreTerm(termComp, new TermContext(m_topReaderContext)); } Debug.Assert(stQueue.Count <= maxSize, "the PQ size must be limited to maxSize"); // set maxBoostAtt with values to help FuzzyTermsEnum to optimize if (stQueue.Count == maxSize) { t2 = stQueue.Peek(); maxBoostAtt.MaxNonCompetitiveBoost = t2.Boost; maxBoostAtt.CompetitiveTerm = t2.Bytes; } } return(true); }
public override void CopyFrom(TermState tsOther) { base.CopyFrom(tsOther); var other = (SepTermState)tsOther; if (DOC_INDEX == null) { DOC_INDEX = other.DOC_INDEX.Clone(); } else { DOC_INDEX.CopyFrom(other.DOC_INDEX); } if (other.FREQ_INDEX != null) { if (FREQ_INDEX == null) { FREQ_INDEX = other.FREQ_INDEX.Clone(); } else { FREQ_INDEX.CopyFrom(other.FREQ_INDEX); } } else { FREQ_INDEX = null; } if (other.POS_INDEX != null) { if (POS_INDEX == null) { POS_INDEX = other.POS_INDEX.Clone(); } else { POS_INDEX.CopyFrom(other.POS_INDEX); } } else { POS_INDEX = null; } PAYLOAD_FP = other.PAYLOAD_FP; SKIP_FP = other.SKIP_FP; }