public override bool Collect(BytesRef bytes) { int pos = PendingTerms.Add(bytes); DocVisitCount += TermsEnum.DocFreq(); if (PendingTerms.Size() >= TermCountLimit || DocVisitCount >= DocCountCutoff) { HasCutOff = true; return(false); } TermState termState = TermsEnum.TermState(); Debug.Assert(termState != null); if (pos < 0) { pos = (-pos) - 1; Array.TermState[pos].Register(termState, ReaderContext.Ord, TermsEnum.DocFreq(), TermsEnum.TotalTermFreq()); } else { Array.TermState[pos] = new TermContext(TopReaderContext, termState, ReaderContext.Ord, TermsEnum.DocFreq(), TermsEnum.TotalTermFreq()); } return(true); }
public virtual void CollectTermContext(IndexReader reader, IList <AtomicReaderContext> leaves, TermContext[] contextArray, Term[] queryTerms) { TermsEnum termsEnum = null; foreach (AtomicReaderContext context in leaves) { Fields fields = context.AtomicReader.Fields; if (fields == null) { // reader has no fields continue; } for (int i = 0; i < queryTerms.Length; i++) { Term term = queryTerms[i]; TermContext termContext = contextArray[i]; //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.index.Terms terms = fields.terms(term.field()); Terms terms = fields.Terms(term.Field()); if (terms == null) { // field does not exist continue; } termsEnum = terms.Iterator(termsEnum); Debug.Assert(termsEnum != null); if (termsEnum == TermsEnum.EMPTY) { continue; } if (termsEnum.SeekExact(term.Bytes())) { if (termContext == null) { contextArray[i] = new TermContext(reader.Context, termsEnum.TermState(), context.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq()); } else { termContext.Register(termsEnum.TermState(), context.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq()); } } } } }
public virtual void CollectTermContext(IndexReader reader, IList <AtomicReaderContext> leaves, TermContext[] contextArray, Term[] queryTerms) { TermsEnum termsEnum = null; foreach (AtomicReaderContext context in leaves) { Fields fields = context.AtomicReader.Fields; if (fields == null) { // reader has no fields continue; } for (int i = 0; i < queryTerms.Length; i++) { Term term = queryTerms[i]; TermContext termContext = contextArray[i]; Terms terms = fields.Terms(term.Field); if (terms == null) { // field does not exist continue; } termsEnum = terms.Iterator(termsEnum); Debug.Assert(termsEnum != null); if (termsEnum == TermsEnum.EMPTY) { continue; } if (termsEnum.SeekExact(term.Bytes)) { if (termContext == null) { contextArray[i] = new TermContext(reader.Context, termsEnum.TermState(), context.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq()); } else { termContext.Register(termsEnum.TermState(), context.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq()); } } } } }
public override bool Collect(BytesRef bytes) { int e = Terms.Add(bytes); TermState state = TermsEnum.TermState(); Debug.Assert(state != null); if (e < 0) { // duplicate term: update docFreq int pos = (-e) - 1; Array.TermState[pos].Register(state, ReaderContext.Ord, TermsEnum.DocFreq(), TermsEnum.TotalTermFreq()); Debug.Assert(Array.Boost[pos] == BoostAtt.Boost, "boost should be equal in all segment TermsEnums"); } else { // new entry: we populate the entry initially Array.Boost[e] = BoostAtt.Boost; Array.TermState[e] = new TermContext(TopReaderContext, state, ReaderContext.Ord, TermsEnum.DocFreq(), TermsEnum.TotalTermFreq()); OuterInstance.CheckMaxClauseCount(Terms.Size()); } return(true); }
public override Spans GetSpans(AtomicReaderContext context, Bits acceptDocs, IDictionary <Term, TermContext> termContexts) { TermContext termContext; termContexts.TryGetValue(term, out termContext); TermState state; if (termContext == null) { // this happens with span-not query, as it doesn't include the NOT side in extractTerms() // so we seek to the term now in this segment..., this sucks because its ugly mostly! Fields fields = context.AtomicReader.Fields; if (fields != null) { Terms terms = fields.Terms(term.Field()); if (terms != null) { TermsEnum termsEnum = terms.Iterator(null); if (termsEnum.SeekExact(term.Bytes())) { state = termsEnum.TermState(); } else { state = null; } } else { state = null; } } else { state = null; } } else { state = termContext.Get(context.Ord); } if (state == null) // term is not present in that reader { return(TermSpans.EMPTY_TERM_SPANS); } TermsEnum termsEnum_ = context.AtomicReader.Terms(term.Field()).Iterator(null); termsEnum_.SeekExact(term.Bytes(), state); DocsAndPositionsEnum postings = termsEnum_.DocsAndPositions(acceptDocs, null, DocsAndPositionsEnum.FLAG_PAYLOADS); if (postings != null) { return(new TermSpans(postings, term)); } else { // term does exist, but has no positions throw new InvalidOperationException("field \"" + term.Field() + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.Text() + ")"); } }
public override bool Collect(BytesRef bytes) { float boost = boostAtt.Boost; // make sure within a single seg we always collect // terms in order Debug.Assert(CompareToLastTerm(bytes)); //System.out.println("TTR.collect term=" + bytes.utf8ToString() + " boost=" + boost + " ord=" + readerContext.ord); // ignore uncompetitive hits if (StQueue.Size() == MaxSize) { ScoreTerm t = StQueue.Top(); if (boost < t.Boost) { return(true); } if (boost == t.Boost && termComp.Compare(bytes, t.Bytes) > 0) { return(true); } } ScoreTerm t2; TermState state = termsEnum.TermState(); Debug.Assert(state != null); if (visitedTerms.TryGetValue(bytes, out t2)) { // if the term is already in the PQ, only update docFreq of term in PQ Debug.Assert(t2.Boost == boost, "boost should be equal in all segment TermsEnums"); t2.TermState.Register(state, ReaderContext.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq()); } else { // add new entry in PQ, we must clone the term, else it may get overwritten! st.Bytes.CopyBytes(bytes); st.Boost = boost; visitedTerms[st.Bytes] = st; Debug.Assert(st.TermState.DocFreq == 0); st.TermState.Register(state, ReaderContext.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq()); StQueue.Add(st); // possibly drop entries from queue if (StQueue.Size() > MaxSize) { st = StQueue.Pop(); visitedTerms.Remove(st.Bytes); st.TermState.Clear(); // reset the termstate! } else { st = new ScoreTerm(termComp, new TermContext(TopReaderContext)); } Debug.Assert(StQueue.Size() <= MaxSize, "the PQ size must be limited to maxSize"); // set maxBoostAtt with values to help FuzzyTermsEnum to optimize if (StQueue.Size() == MaxSize) { t2 = StQueue.Top(); maxBoostAtt.MaxNonCompetitiveBoost = t2.Boost; maxBoostAtt.CompetitiveTerm = t2.Bytes; } } return(true); }