Beispiel #1
0
        /// <summary>
        /// Creates a <seealso cref="TermContext"/> from a top-level <seealso cref="IndexReaderContext"/> and the
        /// given <seealso cref="Term"/>. this method will lookup the given term in all context's leaf readers
        /// and register each of the readers containing the term in the returned <seealso cref="TermContext"/>
        /// using the leaf reader's ordinal.
        /// <p>
        /// Note: the given context must be a top-level context.
        /// </summary>
        public static TermContext Build(IndexReaderContext context, Term term)
        {
            Debug.Assert(context != null && context.IsTopLevel);
            string      field = term.Field();
            BytesRef    bytes = term.Bytes();
            TermContext perReaderTermState = new TermContext(context);

            //if (DEBUG) System.out.println("prts.build term=" + term);
            foreach (AtomicReaderContext ctx in context.Leaves)
            {
                //if (DEBUG) System.out.println("  r=" + leaves[i].reader);
                Fields fields = ctx.AtomicReader.Fields;
                if (fields != null)
                {
                    Terms terms = fields.Terms(field);
                    if (terms != null)
                    {
                        TermsEnum termsEnum = terms.Iterator(null);
                        if (termsEnum.SeekExact(bytes))
                        {
                            TermState termState = termsEnum.TermState();
                            //if (DEBUG) System.out.println("    found");
                            perReaderTermState.Register(termState, ctx.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq());
                        }
                    }
                }
            }
            return(perReaderTermState);
        }
Beispiel #2
0
        /// <summary>
        /// Returns the number of documents containing the term
        /// <code>t</code>.  this method returns 0 if the term or
        /// field does not exists.  this method does not take into
        /// account deleted documents that have not yet been merged
        /// away.
        /// </summary>
        public override sealed long TotalTermFreq(Term term)
        {
            Fields fields = Fields;

            if (fields == null)
            {
                return(0);
            }
            Terms terms = fields.Terms(term.Field());

            if (terms == null)
            {
                return(0);
            }
            TermsEnum termsEnum = terms.Iterator(null);

            if (termsEnum.SeekExact(term.Bytes()))
            {
                return(termsEnum.TotalTermFreq());
            }
            else
            {
                return(0);
            }
        }
Beispiel #3
0
        /// <summary>
        /// Returns <seealso cref="DocsAndPositionsEnum"/> for the specified
        ///  term.  this will return null if the
        ///  field or term does not exist or positions weren't indexed. </summary>
        ///  <seealso cref= TermsEnum#docsAndPositions(Bits, DocsAndPositionsEnum)  </seealso>
        public DocsAndPositionsEnum TermPositionsEnum(Term term)
        {
            Debug.Assert(term.Field() != null);
            Debug.Assert(term.Bytes() != null);
            Fields fields = Fields;

            if (fields != null)
            {
                Terms terms = fields.Terms(term.Field());
                if (terms != null)
                {
                    TermsEnum termsEnum = terms.Iterator(null);
                    if (termsEnum.SeekExact(term.Bytes()))
                    {
                        return(termsEnum.DocsAndPositions(LiveDocs, null));
                    }
                }
            }
            return(null);
        }
Beispiel #4
0
        // DocValues updates
        private void ApplyDocValuesUpdates <T1>(IEnumerable <T1> updates, ReadersAndUpdates rld, SegmentReader reader, DocValuesFieldUpdates.Container dvUpdatesContainer) where T1 : DocValuesUpdate
        {
            lock (this)
            {
                Fields fields = reader.Fields;
                if (fields == null)
                {
                    // this reader has no postings
                    return;
                }

                // TODO: we can process the updates per DV field, from last to first so that
                // if multiple terms affect same document for the same field, we add an update
                // only once (that of the last term). To do that, we can keep a bitset which
                // marks which documents have already been updated. So e.g. if term T1
                // updates doc 7, and then we process term T2 and it updates doc 7 as well,
                // we don't apply the update since we know T1 came last and therefore wins
                // the update.
                // We can also use that bitset as 'liveDocs' to pass to TermEnum.docs(), so
                // that these documents aren't even returned.

                string    currentField = null;
                TermsEnum termsEnum    = null;
                DocsEnum  docs         = null;

                //System.out.println(Thread.currentThread().getName() + " numericDVUpdate reader=" + reader);
                foreach (DocValuesUpdate update in updates)
                {
                    Term term  = update.Term;
                    int  limit = update.DocIDUpto;

                    // TODO: we traverse the terms in update order (not term order) so that we
                    // apply the updates in the correct order, i.e. if two terms udpate the
                    // same document, the last one that came in wins, irrespective of the
                    // terms lexical order.
                    // we can apply the updates in terms order if we keep an updatesGen (and
                    // increment it with every update) and attach it to each NumericUpdate. Note
                    // that we cannot rely only on docIDUpto because an app may send two updates
                    // which will get same docIDUpto, yet will still need to respect the order
                    // those updates arrived.

                    if (!term.Field().Equals(currentField))
                    {
                        // if we change the code to process updates in terms order, enable this assert
                        //        assert currentField == null || currentField.compareTo(term.field()) < 0;
                        currentField = term.Field();
                        Terms terms = fields.Terms(currentField);
                        if (terms != null)
                        {
                            termsEnum = terms.Iterator(termsEnum);
                        }
                        else
                        {
                            termsEnum = null;
                            continue; // no terms in that field
                        }
                    }

                    if (termsEnum == null)
                    {
                        continue;
                    }
                    // System.out.println("  term=" + term);

                    if (termsEnum.SeekExact(term.Bytes()))
                    {
                        // we don't need term frequencies for this
                        DocsEnum docsEnum = termsEnum.Docs(rld.LiveDocs, docs, DocsEnum.FLAG_NONE);

                        //System.out.println("BDS: got docsEnum=" + docsEnum);

                        DocValuesFieldUpdates dvUpdates = dvUpdatesContainer.GetUpdates(update.Field, update.Type);
                        if (dvUpdates == null)
                        {
                            dvUpdates = dvUpdatesContainer.NewUpdates(update.Field, update.Type, reader.MaxDoc);
                        }
                        int doc;
                        while ((doc = docsEnum.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            //System.out.println(Thread.currentThread().getName() + " numericDVUpdate term=" + term + " doc=" + docID);
                            if (doc >= limit)
                            {
                                break; // no more docs that can be updated for this term
                            }
                            dvUpdates.Add(doc, update.Value);
                        }
                    }
                }
            }
        }
Beispiel #5
0
 private void VerifyTermDocs(Directory dir, Term term, int numDocs)
 {
     IndexReader reader = DirectoryReader.Open(dir);
     DocsEnum docsEnum = TestUtil.Docs(Random(), reader, term.Field(), term.Bytes(), null, null, DocsEnum.FLAG_NONE);
     int count = 0;
     while (docsEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
     {
         count++;
     }
     Assert.AreEqual(numDocs, count);
     reader.Dispose();
 }
Beispiel #6
0
 /// <summary>
 /// Returns <seealso cref="DocsAndPositionsEnum"/> for the specified
 ///  term.  this will return null if the
 ///  field or term does not exist or positions weren't indexed. </summary>
 ///  <seealso cref= TermsEnum#docsAndPositions(Bits, DocsAndPositionsEnum)  </seealso>
 public DocsAndPositionsEnum TermPositionsEnum(Term term)
 {
     Debug.Assert(term.Field() != null);
     Debug.Assert(term.Bytes() != null);
     Fields fields = Fields;
     if (fields != null)
     {
         Terms terms = fields.Terms(term.Field());
         if (terms != null)
         {
             TermsEnum termsEnum = terms.Iterator(null);
             if (termsEnum.SeekExact(term.Bytes()))
             {
                 return termsEnum.DocsAndPositions(LiveDocs, null);
             }
         }
     }
     return null;
 }
Beispiel #7
0
 /// <summary>
 /// Returns the number of documents containing the term
 /// <code>t</code>.  this method returns 0 if the term or
 /// field does not exists.  this method does not take into
 /// account deleted documents that have not yet been merged
 /// away.
 /// </summary>
 public override sealed long TotalTermFreq(Term term)
 {
     Fields fields = Fields;
     if (fields == null)
     {
         return 0;
     }
     Terms terms = fields.Terms(term.Field());
     if (terms == null)
     {
         return 0;
     }
     TermsEnum termsEnum = terms.Iterator(null);
     if (termsEnum.SeekExact(term.Bytes()))
     {
         return termsEnum.TotalTermFreq();
     }
     else
     {
         return 0;
     }
 }
Beispiel #8
0
 /// <summary>
 /// Creates a <seealso cref="TermContext"/> from a top-level <seealso cref="IndexReaderContext"/> and the
 /// given <seealso cref="Term"/>. this method will lookup the given term in all context's leaf readers
 /// and register each of the readers containing the term in the returned <seealso cref="TermContext"/>
 /// using the leaf reader's ordinal.
 /// <p>
 /// Note: the given context must be a top-level context.
 /// </summary>
 public static TermContext Build(IndexReaderContext context, Term term)
 {
     Debug.Assert(context != null && context.IsTopLevel);
     string field = term.Field();
     BytesRef bytes = term.Bytes();
     TermContext perReaderTermState = new TermContext(context);
     //if (DEBUG) System.out.println("prts.build term=" + term);
     foreach (AtomicReaderContext ctx in context.Leaves)
     {
         //if (DEBUG) System.out.println("  r=" + leaves[i].reader);
         Fields fields = ctx.AtomicReader.Fields;
         if (fields != null)
         {
             Terms terms = fields.Terms(field);
             if (terms != null)
             {
                 TermsEnum termsEnum = terms.Iterator(null);
                 if (termsEnum.SeekExact(bytes))
                 {
                     TermState termState = termsEnum.TermState();
                     //if (DEBUG) System.out.println("    found");
                     perReaderTermState.Register(termState, ctx.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq());
                 }
             }
         }
     }
     return perReaderTermState;
 }