/// <summary> /// Creates a <seealso cref="TermContext"/> from a top-level <seealso cref="IndexReaderContext"/> and the /// given <seealso cref="Term"/>. this method will lookup the given term in all context's leaf readers /// and register each of the readers containing the term in the returned <seealso cref="TermContext"/> /// using the leaf reader's ordinal. /// <p> /// Note: the given context must be a top-level context. /// </summary> public static TermContext Build(IndexReaderContext context, Term term) { Debug.Assert(context != null && context.IsTopLevel); string field = term.Field(); BytesRef bytes = term.Bytes(); TermContext perReaderTermState = new TermContext(context); //if (DEBUG) System.out.println("prts.build term=" + term); foreach (AtomicReaderContext ctx in context.Leaves) { //if (DEBUG) System.out.println(" r=" + leaves[i].reader); Fields fields = ctx.AtomicReader.Fields; if (fields != null) { Terms terms = fields.Terms(field); if (terms != null) { TermsEnum termsEnum = terms.Iterator(null); if (termsEnum.SeekExact(bytes)) { TermState termState = termsEnum.TermState(); //if (DEBUG) System.out.println(" found"); perReaderTermState.Register(termState, ctx.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq()); } } } } return(perReaderTermState); }
/// <summary> /// Returns the number of documents containing the term /// <code>t</code>. this method returns 0 if the term or /// field does not exists. this method does not take into /// account deleted documents that have not yet been merged /// away. /// </summary> public override sealed long TotalTermFreq(Term term) { Fields fields = Fields; if (fields == null) { return(0); } Terms terms = fields.Terms(term.Field()); if (terms == null) { return(0); } TermsEnum termsEnum = terms.Iterator(null); if (termsEnum.SeekExact(term.Bytes())) { return(termsEnum.TotalTermFreq()); } else { return(0); } }
/// <summary> /// Returns <seealso cref="DocsAndPositionsEnum"/> for the specified /// term. this will return null if the /// field or term does not exist or positions weren't indexed. </summary> /// <seealso cref= TermsEnum#docsAndPositions(Bits, DocsAndPositionsEnum) </seealso> public DocsAndPositionsEnum TermPositionsEnum(Term term) { Debug.Assert(term.Field() != null); Debug.Assert(term.Bytes() != null); Fields fields = Fields; if (fields != null) { Terms terms = fields.Terms(term.Field()); if (terms != null) { TermsEnum termsEnum = terms.Iterator(null); if (termsEnum.SeekExact(term.Bytes())) { return(termsEnum.DocsAndPositions(LiveDocs, null)); } } } return(null); }
// DocValues updates private void ApplyDocValuesUpdates <T1>(IEnumerable <T1> updates, ReadersAndUpdates rld, SegmentReader reader, DocValuesFieldUpdates.Container dvUpdatesContainer) where T1 : DocValuesUpdate { lock (this) { Fields fields = reader.Fields; if (fields == null) { // this reader has no postings return; } // TODO: we can process the updates per DV field, from last to first so that // if multiple terms affect same document for the same field, we add an update // only once (that of the last term). To do that, we can keep a bitset which // marks which documents have already been updated. So e.g. if term T1 // updates doc 7, and then we process term T2 and it updates doc 7 as well, // we don't apply the update since we know T1 came last and therefore wins // the update. // We can also use that bitset as 'liveDocs' to pass to TermEnum.docs(), so // that these documents aren't even returned. string currentField = null; TermsEnum termsEnum = null; DocsEnum docs = null; //System.out.println(Thread.currentThread().getName() + " numericDVUpdate reader=" + reader); foreach (DocValuesUpdate update in updates) { Term term = update.Term; int limit = update.DocIDUpto; // TODO: we traverse the terms in update order (not term order) so that we // apply the updates in the correct order, i.e. if two terms udpate the // same document, the last one that came in wins, irrespective of the // terms lexical order. // we can apply the updates in terms order if we keep an updatesGen (and // increment it with every update) and attach it to each NumericUpdate. Note // that we cannot rely only on docIDUpto because an app may send two updates // which will get same docIDUpto, yet will still need to respect the order // those updates arrived. if (!term.Field().Equals(currentField)) { // if we change the code to process updates in terms order, enable this assert // assert currentField == null || currentField.compareTo(term.field()) < 0; currentField = term.Field(); Terms terms = fields.Terms(currentField); if (terms != null) { termsEnum = terms.Iterator(termsEnum); } else { termsEnum = null; continue; // no terms in that field } } if (termsEnum == null) { continue; } // System.out.println(" term=" + term); if (termsEnum.SeekExact(term.Bytes())) { // we don't need term frequencies for this DocsEnum docsEnum = termsEnum.Docs(rld.LiveDocs, docs, DocsEnum.FLAG_NONE); //System.out.println("BDS: got docsEnum=" + docsEnum); DocValuesFieldUpdates dvUpdates = dvUpdatesContainer.GetUpdates(update.Field, update.Type); if (dvUpdates == null) { dvUpdates = dvUpdatesContainer.NewUpdates(update.Field, update.Type, reader.MaxDoc); } int doc; while ((doc = docsEnum.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { //System.out.println(Thread.currentThread().getName() + " numericDVUpdate term=" + term + " doc=" + docID); if (doc >= limit) { break; // no more docs that can be updated for this term } dvUpdates.Add(doc, update.Value); } } } } }
private void VerifyTermDocs(Directory dir, Term term, int numDocs) { IndexReader reader = DirectoryReader.Open(dir); DocsEnum docsEnum = TestUtil.Docs(Random(), reader, term.Field(), term.Bytes(), null, null, DocsEnum.FLAG_NONE); int count = 0; while (docsEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { count++; } Assert.AreEqual(numDocs, count); reader.Dispose(); }
/// <summary> /// Returns <seealso cref="DocsAndPositionsEnum"/> for the specified /// term. this will return null if the /// field or term does not exist or positions weren't indexed. </summary> /// <seealso cref= TermsEnum#docsAndPositions(Bits, DocsAndPositionsEnum) </seealso> public DocsAndPositionsEnum TermPositionsEnum(Term term) { Debug.Assert(term.Field() != null); Debug.Assert(term.Bytes() != null); Fields fields = Fields; if (fields != null) { Terms terms = fields.Terms(term.Field()); if (terms != null) { TermsEnum termsEnum = terms.Iterator(null); if (termsEnum.SeekExact(term.Bytes())) { return termsEnum.DocsAndPositions(LiveDocs, null); } } } return null; }
/// <summary> /// Returns the number of documents containing the term /// <code>t</code>. this method returns 0 if the term or /// field does not exists. this method does not take into /// account deleted documents that have not yet been merged /// away. /// </summary> public override sealed long TotalTermFreq(Term term) { Fields fields = Fields; if (fields == null) { return 0; } Terms terms = fields.Terms(term.Field()); if (terms == null) { return 0; } TermsEnum termsEnum = terms.Iterator(null); if (termsEnum.SeekExact(term.Bytes())) { return termsEnum.TotalTermFreq(); } else { return 0; } }
/// <summary> /// Creates a <seealso cref="TermContext"/> from a top-level <seealso cref="IndexReaderContext"/> and the /// given <seealso cref="Term"/>. this method will lookup the given term in all context's leaf readers /// and register each of the readers containing the term in the returned <seealso cref="TermContext"/> /// using the leaf reader's ordinal. /// <p> /// Note: the given context must be a top-level context. /// </summary> public static TermContext Build(IndexReaderContext context, Term term) { Debug.Assert(context != null && context.IsTopLevel); string field = term.Field(); BytesRef bytes = term.Bytes(); TermContext perReaderTermState = new TermContext(context); //if (DEBUG) System.out.println("prts.build term=" + term); foreach (AtomicReaderContext ctx in context.Leaves) { //if (DEBUG) System.out.println(" r=" + leaves[i].reader); Fields fields = ctx.AtomicReader.Fields; if (fields != null) { Terms terms = fields.Terms(field); if (terms != null) { TermsEnum termsEnum = terms.Iterator(null); if (termsEnum.SeekExact(bytes)) { TermState termState = termsEnum.TermState(); //if (DEBUG) System.out.println(" found"); perReaderTermState.Register(termState, ctx.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq()); } } } } return perReaderTermState; }