예제 #1
0
 internal MappingMultiDocsEnum Reset(MultiDocsEnum docsEnum)
 {
     this.NumSubs_Renamed = docsEnum.NumSubs;
     this.Subs_Renamed = docsEnum.Subs;
     Upto = -1;
     Current = null;
     return this;
 }
예제 #2
0
        /// <summary>
        /// Look up the given category in the cache and/or the on-disk storage,
        /// returning the category's ordinal, or a negative number in case the
        /// category does not yet exist in the taxonomy.
        /// </summary>
        protected virtual int FindCategory(FacetLabel categoryPath)
        {
            lock (this)
            {
                // If we can find the category in the cache, or we know the cache is
                // complete, we can return the response directly from it
                int res = cache.Get(categoryPath);
                if (res >= 0 || cacheIsComplete)
                {
                    return(res);
                }

                cacheMisses.IncrementAndGet();
                // After a few cache misses, it makes sense to read all the categories
                // from disk and into the cache. The reason not to do this on the first
                // cache miss (or even when opening the writer) is that it will
                // significantly slow down the case when a taxonomy is opened just to
                // add one category. The idea only spending a long time on reading
                // after enough time was spent on cache misses is known as an "online
                // algorithm".
                PerhapsFillCache();
                res = cache.Get(categoryPath);
                if (res >= 0 || cacheIsComplete)
                {
                    // if after filling the cache from the info on disk, the category is in it
                    // or the cache is complete, return whatever cache.get returned.
                    return(res);
                }

                // if we get here, it means the category is not in the cache, and it is not
                // complete, and therefore we must look for the category on disk.

                // We need to get an answer from the on-disk index.
                InitReaderManager();

                int             doc    = -1;
                DirectoryReader reader = readerManager.Acquire();
                try
                {
                    BytesRef  catTerm   = new BytesRef(FacetsConfig.PathToString(categoryPath.Components, categoryPath.Length));
                    TermsEnum termsEnum = null; // reuse
                    DocsEnum  docs      = null; // reuse
                    foreach (AtomicReaderContext ctx in reader.Leaves)
                    {
                        Terms terms = ctx.AtomicReader.GetTerms(Consts.FULL);
                        if (terms != null)
                        {
                            termsEnum = terms.GetIterator(termsEnum);
                            if (termsEnum.SeekExact(catTerm))
                            {
                                // liveDocs=null because the taxonomy has no deletes
                                docs = termsEnum.Docs(null, docs, 0); // freqs not required
                                // if the term was found, we know it has exactly one document.
                                doc = docs.NextDoc() + ctx.DocBase;
                                break;
                            }
                        }
                    }
                }
                finally
                {
                    readerManager.Release(reader);
                }
                if (doc > 0)
                {
                    AddToCache(categoryPath, doc);
                }
                return(doc);
            }
        }
 /// <summary>
 /// for a docsenum, gets the 'other' reused enum.
 /// Example: Pulsing(Standard).
 /// when doing a term range query you are switching back and forth
 /// between Pulsing and Standard
 ///  
 /// The way the reuse works is that Pulsing.other = Standard and
 /// Standard.other = Pulsing.
 /// </summary>
 private DocsEnum GetOther(DocsEnum de)
 {
     if (de == null)
         return null;
     
     var atts = de.Attributes();
     DocsEnum result;
     atts.AddAttribute<IPulsingEnumAttribute>().Enums().TryGetValue(this, out result);
     return result;
 }
예제 #4
0
        private void DuellReaders(CompositeReader other, AtomicReader memIndexReader)
        {
            AtomicReader competitor = SlowCompositeReaderWrapper.Wrap(other);
            Fields       memFields  = memIndexReader.Fields;

            foreach (string field in competitor.Fields)
            {
                Terms memTerms = memFields.GetTerms(field);
                Terms iwTerms  = memIndexReader.GetTerms(field);
                if (iwTerms == null)
                {
                    assertNull(memTerms);
                }
                else
                {
                    NumericDocValues normValues    = competitor.GetNormValues(field);
                    NumericDocValues memNormValues = memIndexReader.GetNormValues(field);
                    if (normValues != null)
                    {
                        // mem idx always computes norms on the fly
                        assertNotNull(memNormValues);
                        assertEquals(normValues.Get(0), memNormValues.Get(0));
                    }

                    assertNotNull(memTerms);
                    assertEquals(iwTerms.DocCount, memTerms.DocCount);
                    assertEquals(iwTerms.SumDocFreq, memTerms.SumDocFreq);
                    assertEquals(iwTerms.SumTotalTermFreq, memTerms.SumTotalTermFreq);
                    TermsEnum iwTermsIter  = iwTerms.GetIterator(null);
                    TermsEnum memTermsIter = memTerms.GetIterator(null);
                    if (iwTerms.HasPositions)
                    {
                        bool offsets = iwTerms.HasOffsets && memTerms.HasOffsets;

                        while (iwTermsIter.Next() != null)
                        {
                            assertNotNull(memTermsIter.Next());
                            assertEquals(iwTermsIter.Term, memTermsIter.Term);
                            DocsAndPositionsEnum iwDocsAndPos  = iwTermsIter.DocsAndPositions(null, null);
                            DocsAndPositionsEnum memDocsAndPos = memTermsIter.DocsAndPositions(null, null);
                            while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS)
                            {
                                assertEquals(iwDocsAndPos.DocID, memDocsAndPos.NextDoc());
                                assertEquals(iwDocsAndPos.Freq, memDocsAndPos.Freq);
                                for (int i = 0; i < iwDocsAndPos.Freq; i++)
                                {
                                    assertEquals("term: " + iwTermsIter.Term.Utf8ToString(), iwDocsAndPos.NextPosition(), memDocsAndPos.NextPosition());
                                    if (offsets)
                                    {
                                        assertEquals(iwDocsAndPos.StartOffset, memDocsAndPos.StartOffset);
                                        assertEquals(iwDocsAndPos.EndOffset, memDocsAndPos.EndOffset);
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        while (iwTermsIter.Next() != null)
                        {
                            assertEquals(iwTermsIter.Term, memTermsIter.Term);
                            DocsEnum iwDocsAndPos  = iwTermsIter.Docs(null, null);
                            DocsEnum memDocsAndPos = memTermsIter.Docs(null, null);
                            while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS)
                            {
                                assertEquals(iwDocsAndPos.DocID, memDocsAndPos.NextDoc());
                                assertEquals(iwDocsAndPos.Freq, memDocsAndPos.Freq);
                            }
                        }
                    }
                }
            }
        }
예제 #5
0
        private IndexIterationContext CreateContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter,
                                                    bool multipleValuesPerDocument, bool scoreDocsInOrder)
        {
            IndexIterationContext context = new IndexIterationContext();
            int numRandomValues           = nDocs / 2;

            context.RandomUniqueValues = new string[numRandomValues];
            ISet <string> trackSet = new HashSet <string>();

            context.RandomFrom = new bool[numRandomValues];
            for (int i = 0; i < numRandomValues; i++)
            {
                string uniqueRandomValue;
                do
                {
                    uniqueRandomValue = TestUtil.RandomRealisticUnicodeString(Random());
                    //        uniqueRandomValue = TestUtil.randomSimpleString(random);
                } while ("".Equals(uniqueRandomValue, StringComparison.Ordinal) || trackSet.Contains(uniqueRandomValue));
                // Generate unique values and empty strings aren't allowed.
                trackSet.Add(uniqueRandomValue);
                context.RandomFrom[i]         = Random().NextBoolean();
                context.RandomUniqueValues[i] = uniqueRandomValue;
            }

            RandomDoc[] docs = new RandomDoc[nDocs];
            for (int i = 0; i < nDocs; i++)
            {
                string   id       = Convert.ToString(i);
                int      randomI  = Random().Next(context.RandomUniqueValues.Length);
                string   value    = context.RandomUniqueValues[randomI];
                Document document = new Document();
                document.Add(NewTextField(Random(), "id", id, Field.Store.NO));
                document.Add(NewTextField(Random(), "value", value, Field.Store.NO));

                bool from = context.RandomFrom[randomI];
                int  numberOfLinkValues = multipleValuesPerDocument ? 2 + Random().Next(10) : 1;
                docs[i] = new RandomDoc(id, numberOfLinkValues, value, from);
                for (int j = 0; j < numberOfLinkValues; j++)
                {
                    string linkValue = context.RandomUniqueValues[Random().Next(context.RandomUniqueValues.Length)];
                    docs[i].LinkValues.Add(linkValue);
                    if (from)
                    {
                        if (!context.FromDocuments.ContainsKey(linkValue))
                        {
                            context.FromDocuments[linkValue] = new List <RandomDoc>();
                        }
                        if (!context.RandomValueFromDocs.ContainsKey(value))
                        {
                            context.RandomValueFromDocs[value] = new List <RandomDoc>();
                        }

                        context.FromDocuments[linkValue].Add(docs[i]);
                        context.RandomValueFromDocs[value].Add(docs[i]);
                        document.Add(NewTextField(Random(), "from", linkValue, Field.Store.NO));
                    }
                    else
                    {
                        if (!context.ToDocuments.ContainsKey(linkValue))
                        {
                            context.ToDocuments[linkValue] = new List <RandomDoc>();
                        }
                        if (!context.RandomValueToDocs.ContainsKey(value))
                        {
                            context.RandomValueToDocs[value] = new List <RandomDoc>();
                        }

                        context.ToDocuments[linkValue].Add(docs[i]);
                        context.RandomValueToDocs[value].Add(docs[i]);
                        document.Add(NewTextField(Random(), "to", linkValue, Field.Store.NO));
                    }
                }

                RandomIndexWriter w;
                if (from)
                {
                    w = fromWriter;
                }
                else
                {
                    w = toWriter;
                }

                w.AddDocument(document);
                if (Random().Next(10) == 4)
                {
                    w.Commit();
                }
                if (VERBOSE)
                {
                    Console.WriteLine("Added document[" + docs[i].Id + "]: " + document);
                }
            }

            // Pre-compute all possible hits for all unique random values. On top of this also compute all possible score for
            // any ScoreMode.
            IndexSearcher fromSearcher = NewSearcher(fromWriter.Reader);
            IndexSearcher toSearcher   = NewSearcher(toWriter.Reader);

            for (int i = 0; i < context.RandomUniqueValues.Length; i++)
            {
                string uniqueRandomValue = context.RandomUniqueValues[i];
                string fromField;
                string toField;
                IDictionary <string, IDictionary <int, JoinScore> > queryVals;
                if (context.RandomFrom[i])
                {
                    fromField = "from";
                    toField   = "to";
                    queryVals = context.FromHitsToJoinScore;
                }
                else
                {
                    fromField = "to";
                    toField   = "from";
                    queryVals = context.ToHitsToJoinScore;
                }
                IDictionary <BytesRef, JoinScore> joinValueToJoinScores = new Dictionary <BytesRef, JoinScore>();
                if (multipleValuesPerDocument)
                {
                    fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)),
                                        new CollectorAnonymousInnerClassHelper3(this, context, fromField, joinValueToJoinScores));
                }
                else
                {
                    fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)),
                                        new CollectorAnonymousInnerClassHelper4(this, context, fromField, joinValueToJoinScores));
                }

                IDictionary <int, JoinScore> docToJoinScore = new Dictionary <int, JoinScore>();
                if (multipleValuesPerDocument)
                {
                    if (scoreDocsInOrder)
                    {
                        AtomicReader slowCompositeReader = SlowCompositeReaderWrapper.Wrap(toSearcher.IndexReader);
                        Terms        terms = slowCompositeReader.GetTerms(toField);
                        if (terms != null)
                        {
                            DocsEnum             docsEnum   = null;
                            TermsEnum            termsEnum  = null;
                            SortedSet <BytesRef> joinValues =
                                new SortedSet <BytesRef>(BytesRef.UTF8SortedAsUnicodeComparer);
                            joinValues.AddAll(joinValueToJoinScores.Keys);
                            foreach (BytesRef joinValue in joinValues)
                            {
                                termsEnum = terms.GetIterator(termsEnum);
                                if (termsEnum.SeekExact(joinValue))
                                {
                                    docsEnum = termsEnum.Docs(slowCompositeReader.LiveDocs, docsEnum, DocsFlags.NONE);
                                    JoinScore joinScore = joinValueToJoinScores[joinValue];

                                    for (int doc = docsEnum.NextDoc();
                                         doc != DocIdSetIterator.NO_MORE_DOCS;
                                         doc = docsEnum.NextDoc())
                                    {
                                        // First encountered join value determines the score.
                                        // Something to keep in mind for many-to-many relations.
                                        if (!docToJoinScore.ContainsKey(doc))
                                        {
                                            docToJoinScore[doc] = joinScore;
                                        }
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        toSearcher.Search(new MatchAllDocsQuery(),
                                          new CollectorAnonymousInnerClassHelper5(this, context, toField, joinValueToJoinScores,
                                                                                  docToJoinScore));
                    }
                }
                else
                {
                    toSearcher.Search(new MatchAllDocsQuery(),
                                      new CollectorAnonymousInnerClassHelper6(this, toField, joinValueToJoinScores,
                                                                              docToJoinScore));
                }
                queryVals[uniqueRandomValue] = docToJoinScore;
            }

            fromSearcher.IndexReader.Dispose();
            toSearcher.IndexReader.Dispose();

            return(context);
        }
예제 #6
0
 public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags)
 {
     DecodeMetaData();
     return(_blockTermsReader._postingsReader.Docs(_fieldReader._fieldInfo, _state, liveDocs, reuse, flags));
 }
예제 #7
0
        public override int NextDoc()
        {
            while (true)
            {
                if (Current == null)
                {
                    if (Upto == NumSubs_Renamed - 1)
                    {
                        return this.Doc = NO_MORE_DOCS;
                    }
                    else
                    {
                        Upto++;
                        int reader = Subs_Renamed[Upto].Slice.ReaderIndex;
                        Current = Subs_Renamed[Upto].DocsEnum;
                        CurrentBase = MergeState_Renamed.DocBase[reader];
                        CurrentMap = MergeState_Renamed.DocMaps[reader];
                        Debug.Assert(CurrentMap.MaxDoc == Subs_Renamed[Upto].Slice.Length, "readerIndex=" + reader + " subs.len=" + Subs_Renamed.Length + " len1=" + CurrentMap.MaxDoc + " vs " + Subs_Renamed[Upto].Slice.Length);
                    }
                }

                int doc = Current.NextDoc();
                if (doc != NO_MORE_DOCS)
                {
                    // compact deletions
                    doc = CurrentMap.Get(doc);
                    if (doc == -1)
                    {
                        continue;
                    }
                    return this.Doc = CurrentBase + doc;
                }
                else
                {
                    Current = null;
                }
            }
        }
예제 #8
0
 public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags)
 {
     DecodeMetaData();
     return _blockTermsReader._postingsReader.Docs(_fieldReader._fieldInfo, _state, liveDocs, reuse, flags);
 }
예제 #9
0
 public override DocsEnum Docs(Bits skipDocs, DocsEnum reuse, int flags)
 {
     Debug.Assert(!Eof);
     //if (DEBUG) {
     //System.out.println("BTTR.docs seg=" + segment);
     //}
     CurrentFrame.DecodeMetaData();
     //if (DEBUG) {
     //System.out.println("  state=" + currentFrame.state);
     //}
     return OuterInstance.OuterInstance.PostingsReader.Docs(OuterInstance.fieldInfo, CurrentFrame.State, skipDocs, reuse, flags);
 }
예제 #10
0
 public override DocsEnum Docs(Bits skipDocs, DocsEnum reuse, int flags)
 {
     CurrentFrame.DecodeMetaData();
     return OuterInstance.OuterInstance.PostingsReader.Docs(OuterInstance.fieldInfo, CurrentFrame.TermState, skipDocs, reuse, flags);
 }
예제 #11
0
 /// <summary>
 /// for a docsenum, sets the 'other' reused enum.
 /// see GetOther for an example.
 /// </summary>
 private DocsEnum SetOther(DocsEnum de, DocsEnum other)
 {
     var atts = de.Attributes();
     return atts.AddAttribute<IPulsingEnumAttribute>().Enums()[this] = other;
 }
예제 #12
0
        /// <summary>
        /// for a docsenum, gets the 'other' reused enum.
        /// Example: Pulsing(Standard).
        /// when doing a term range query you are switching back and forth
        /// between Pulsing and Standard
        ///  
        /// The way the reuse works is that Pulsing.other = Standard and
        /// Standard.other = Pulsing.
        /// </summary>
        private DocsEnum GetOther(DocsEnum de)
        {
            if (de == null)
                return null;

            var atts = de.Attributes();
            return atts.AddAttribute<IPulsingEnumAttribute>().Enums()[this];
        }
예제 #13
0
        public override DocsEnum Docs(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsEnum reuse,
            int flags)
        {
            var termState = (PulsingTermState) _termState;
            if (termState.PostingsSize != -1)
            {
                PulsingDocsEnum postings;
                if (reuse is PulsingDocsEnum)
                {
                    postings = (PulsingDocsEnum) reuse;
                    if (!postings.CanReuse(field))
                    {
                        postings = new PulsingDocsEnum(field);
                    }
                }
                else
                {
                    // the 'reuse' is actually the wrapped enum
                    var previous = (PulsingDocsEnum) GetOther(reuse);
                    if (previous != null && previous.CanReuse(field))
                    {
                        postings = previous;
                    }
                    else
                    {
                        postings = new PulsingDocsEnum(field);
                    }
                }

                if (reuse != postings)
                    SetOther(postings, reuse); // postings.other = reuse

                return postings.Reset(liveDocs, termState);
            }

            if (!(reuse is PulsingDocsEnum))
                return _wrappedPostingsReader.Docs(field, termState.WrappedTermState, liveDocs, reuse, flags);

            var wrapped = _wrappedPostingsReader.Docs(field, termState.WrappedTermState, liveDocs,
                GetOther(reuse), flags);

            SetOther(wrapped, reuse); // wrapped.other = reuse
            return wrapped;
        }
            public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags)
            {
                DocsEnum inReuse;
                SortingDocsEnum wrapReuse;
                if (reuse != null && reuse is SortingDocsEnum)
                {
                    // if we're asked to reuse the given DocsEnum and it is Sorting, return
                    // the wrapped one, since some Codecs expect it.
                    wrapReuse = (SortingDocsEnum)reuse;
                    inReuse = wrapReuse.Wrapped;
                }
                else
                {
                    wrapReuse = null;
                    inReuse = reuse;
                }

                DocsEnum inDocs = @in.Docs(NewToOld(liveDocs), inReuse, flags);
                bool withFreqs = indexOptions.GetValueOrDefault().CompareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS) >= 0 && (flags & DocsEnum.FLAG_FREQS) != 0;
                return new SortingDocsEnum(docMap.Count, wrapReuse, inDocs, withFreqs, docMap);
            }
예제 #15
0
        public override int GetOrdinal(FacetLabel cp)
        {
            EnsureOpen();
            if (cp.Length == 0)
            {
                return(ROOT_ORDINAL);
            }

            // First try to find the answer in the LRU cache:

            // LUCENENET: Despite LRUHashMap being thread-safe, we get much better performance
            // if reads are separated from writes.
            ordinalCacheLock.EnterReadLock();
            try
            {
                if (ordinalCache.TryGetValue(cp, out Int32Class res))
                {
                    if (res < indexReader.MaxDoc)
                    {
                        // Since the cache is shared with DTR instances allocated from
                        // doOpenIfChanged, we need to ensure that the ordinal is one that
                        // this DTR instance recognizes.
                        return(res);
                    }
                    else
                    {
                        // if we get here, it means that the category was found in the cache,
                        // but is not recognized by this TR instance. Therefore there's no
                        // need to continue search for the path on disk, because we won't find
                        // it there too.
                        return(TaxonomyReader.INVALID_ORDINAL);
                    }
                }
            }
            finally
            {
                ordinalCacheLock.ExitReadLock();
            }

            // If we're still here, we have a cache miss. We need to fetch the
            // value from disk, and then also put it in the cache:
            int      ret  = TaxonomyReader.INVALID_ORDINAL;
            DocsEnum docs = MultiFields.GetTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(FacetsConfig.PathToString(cp.Components, cp.Length)), 0);

            if (docs != null && docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            {
                ret = docs.DocID;

                // we only store the fact that a category exists, not its inexistence.
                // This is required because the caches are shared with new DTR instances
                // that are allocated from doOpenIfChanged. Therefore, if we only store
                // information about found categories, we cannot accidently tell a new
                // generation of DTR that a category does not exist.

                ordinalCacheLock.EnterWriteLock();
                try
                {
                    ordinalCache[cp] = ret;
                }
                finally
                {
                    ordinalCacheLock.ExitWriteLock();
                }
            }

            return(ret);
        }
예제 #16
0
        /// <summary>
        /// checks the terms enum sequentially
        /// if deep is false, it does a 'shallow' test that doesnt go down to the docsenums
        /// </summary>
        public virtual void AssertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, bool deep)
        {
            BytesRef             term;
            Bits                 randomBits     = new RandomBits(MAXDOC, Random().NextDouble(), Random());
            DocsAndPositionsEnum leftPositions  = null;
            DocsAndPositionsEnum rightPositions = null;
            DocsEnum             leftDocs       = null;
            DocsEnum             rightDocs      = null;

            while ((term = leftTermsEnum.Next()) != null)
            {
                Assert.AreEqual(term, rightTermsEnum.Next());
                AssertTermStats(leftTermsEnum, rightTermsEnum);
                if (deep)
                {
                    // with payloads + off
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions));
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions));

                    AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions));
                    AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions));
                    // with payloads only
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS));
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS));

                    AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS));
                    AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS));

                    // with offsets only
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS));
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS));

                    AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS));
                    AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS));

                    // with positions only
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsEnum.FLAG_NONE), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsEnum.FLAG_NONE));
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsEnum.FLAG_NONE), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsEnum.FLAG_NONE));

                    AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsEnum.FLAG_NONE), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsEnum.FLAG_NONE));
                    AssertPositionsSkipping(leftTermsEnum.DocFreq(), leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsEnum.FLAG_NONE), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsEnum.FLAG_NONE));

                    // with freqs:
                    AssertDocsEnum(leftDocs = leftTermsEnum.Docs(null, leftDocs), rightDocs = rightTermsEnum.Docs(null, rightDocs));
                    AssertDocsEnum(leftDocs = leftTermsEnum.Docs(randomBits, leftDocs), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs));

                    // w/o freqs:
                    AssertDocsEnum(leftDocs = leftTermsEnum.Docs(null, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(null, rightDocs, DocsEnum.FLAG_NONE));
                    AssertDocsEnum(leftDocs = leftTermsEnum.Docs(randomBits, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs, DocsEnum.FLAG_NONE));

                    // with freqs:
                    AssertDocsSkipping(leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(null, leftDocs), rightDocs = rightTermsEnum.Docs(null, rightDocs));
                    AssertDocsSkipping(leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(randomBits, leftDocs), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs));

                    // w/o freqs:
                    AssertDocsSkipping(leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(null, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(null, rightDocs, DocsEnum.FLAG_NONE));
                    AssertDocsSkipping(leftTermsEnum.DocFreq(), leftDocs = leftTermsEnum.Docs(randomBits, leftDocs, DocsEnum.FLAG_NONE), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs, DocsEnum.FLAG_NONE));
                }
            }
            Assert.IsNull(rightTermsEnum.Next());
        }
예제 #17
0
		public override DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags)
		{
		  decodeMetaData();
		  FSTDocsEnum docsEnum;

		  if (reuse == null || !(reuse is FSTDocsEnum))
		  {
			docsEnum = new FSTDocsEnum(field.IndexOptions, field.hasPayloads());
		  }
		  else
		  {
			docsEnum = (FSTDocsEnum) reuse;
			if (!docsEnum.canReuse(field.IndexOptions, field.hasPayloads()))
			{
			  docsEnum = new FSTDocsEnum(field.IndexOptions, field.hasPayloads());
			}
		  }
		  return docsEnum.reset(this.postingsSpare, liveDocs, docFreq_Renamed);
		}
예제 #18
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: SortingDocsEnum(int maxDoc, SortingDocsEnum reuse, final org.apache.lucene.index.DocsEnum in, boolean withFreqs, final Sorter.DocMap docMap) throws java.io.IOException
 //JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
 internal SortingDocsEnum(int maxDoc, SortingDocsEnum reuse, DocsEnum @in, bool withFreqs, Sorter.DocMap docMap)
     : base(@in)
 {
     this.maxDoc = maxDoc;
       this.withFreqs = withFreqs;
       if (reuse != null)
       {
     if (reuse.maxDoc == maxDoc)
     {
       sorter = reuse.sorter;
     }
     else
     {
       sorter = new DocFreqSorter(maxDoc);
     }
     docs = reuse.docs;
     freqs = reuse.freqs; // maybe null
       }
       else
       {
     docs = new int[64];
     sorter = new DocFreqSorter(maxDoc);
       }
       docIt = -1;
       int i = 0;
       int doc;
       if (withFreqs)
       {
     if (freqs == null || freqs.Length < docs.Length)
     {
       freqs = new int[docs.Length];
     }
     while ((doc = @in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
     {
       if (i >= docs.Length)
       {
     docs = ArrayUtil.grow(docs, docs.Length + 1);
     freqs = ArrayUtil.grow(freqs, freqs.Length + 1);
       }
       docs[i] = docMap.oldToNew(doc);
       freqs[i] = @in.freq();
       ++i;
     }
       }
       else
       {
     freqs = null;
     while ((doc = @in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
     {
       if (i >= docs.Length)
       {
     docs = ArrayUtil.grow(docs, docs.Length + 1);
       }
       docs[i++] = docMap.oldToNew(doc);
     }
       }
       // TimSort can save much time compared to other sorts in case of
       // reverse sorting, or when sorting a concatenation of sorted readers
       sorter.reset(docs, freqs);
       sorter.sort(0, i);
       upto = i;
 }
 public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags)
 {
     // TODO: reuse
     var e = new SimpleTVDocsEnum();
     e.Reset(liveDocs, (flags & DocsEnum.FLAG_FREQS) == 0 ? 1 : _current.Value.FREQ);
     return e;
 }
예제 #20
0
 public override DocsEnum Docs(IBits liveDocs, DocsEnum reuse, DocsFlags flags)
 {
     return(actualEnum.Docs(liveDocs, reuse, flags));
 }
예제 #21
0
        public virtual void Load(string fieldName, AtomicReader reader, TermListFactory listFactory)
        {
#if FEATURE_STRING_INTERN
            string field = string.Intern(fieldName);
#else
            string field = fieldName;
#endif
            int maxDoc = reader.MaxDoc;

            int dictValueCount      = GetDictValueCount(reader, fieldName);
            BigSegmentedArray order = NewInstance(dictValueCount, maxDoc);

            this.m_orderArray = order;

            List <int> minIDList = new List <int>();
            List <int> maxIDList = new List <int>();
            List <int> freqList  = new List <int>();

            int            length             = maxDoc + 1;
            ITermValueList list               = listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList();
            int            negativeValueCount = GetNegativeValueCount(reader, field);

            int t = 1; // valid term id starts from 1

            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);
            int   totalFreq = 0;
            Terms terms     = reader.GetTerms(field);
            if (terms != null)
            {
                TermsEnum termsEnum = terms.GetIterator(null);
                BytesRef  text;
                while ((text = termsEnum.Next()) != null)
                {
                    // store term text
                    // we expect that there is at most one term per document
                    if (t >= length)
                    {
                        throw new RuntimeException("there are more terms than "
                                                   + "documents in field \"" + field + "\", but it's impossible to sort on "
                                                   + "tokenized fields");
                    }
                    string strText = text.Utf8ToString();
                    list.Add(strText);
                    Term     term     = new Term(field, strText);
                    DocsEnum docsEnum = reader.GetTermDocsEnum(term);
                    // freqList.add(termEnum.docFreq()); // doesn't take into account
                    // deldocs
                    int minID = -1;
                    int maxID = -1;
                    int docID = -1;
                    int df    = 0;
                    int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
                    while ((docID = docsEnum.NextDoc()) != DocsEnum.NO_MORE_DOCS)
                    {
                        df++;
                        order.Add(docID, valId);
                        minID = docID;
                        while (docsEnum.NextDoc() != DocsEnum.NO_MORE_DOCS)
                        {
                            docID = docsEnum.DocID;
                            df++;
                            order.Add(docID, valId);
                        }
                        maxID = docID;
                    }
                    freqList.Add(df);
                    totalFreq += df;
                    minIDList.Add(minID);
                    maxIDList.Add(maxID);
                    t++;
                }
            }

            list.Seal();
            this.m_valArray = list;
            this.m_freqs    = freqList.ToArray();
            this.m_minIDs   = minIDList.ToArray();
            this.m_maxIDs   = maxIDList.ToArray();

            int doc = 0;
            while (doc < maxDoc && order.Get(doc) != 0)
            {
                ++doc;
            }
            if (doc < maxDoc)
            {
                this.m_minIDs[0] = doc;
                // Try to get the max
                doc = maxDoc - 1;
                while (doc >= 0 && order.Get(doc) != 0)
                {
                    --doc;
                }
                this.m_maxIDs[0] = doc;
            }
            this.m_freqs[0] = reader.NumDocs - totalFreq;
        }
예제 #22
0
 private bool CanReuse(DocsEnum reuse, Bits liveDocs)
 {
     if (reuse != null && (reuse is SegmentDocsEnumBase))
     {
         SegmentDocsEnumBase docsEnum = (SegmentDocsEnumBase)reuse;
         // If you are using ParellelReader, and pass in a
         // reused DocsEnum, it could have come from another
         // reader also using standard codec
         if (docsEnum.StartFreqIn == FreqIn)
         {
             // we only reuse if the the actual the incoming enum has the same liveDocs as the given liveDocs
             return liveDocs == docsEnum.LiveDocs;
         }
     }
     return false;
 }
예제 #23
0
        public override DocsEnum Docs(FieldInfo fieldInfo, BlockTermState bTermState, Bits liveDocs, DocsEnum reuse,
                                      int flags)
        {
            var termState = (SepTermState)bTermState;

            SepDocsEnum docsEnum;

            if (!(reuse is SepDocsEnum))
            {
                docsEnum = new SepDocsEnum(this);
            }
            else
            {
                docsEnum = (SepDocsEnum)reuse;
                if (docsEnum.START_DOC_IN != _docIn)
                {
                    // If you are using ParellelReader, and pass in a
                    // reused DocsAndPositionsEnum, it could have come
                    // from another reader also using sep codec
                    docsEnum = new SepDocsEnum(this);
                }
            }

            return(docsEnum.Init(fieldInfo, termState, liveDocs));
        }
예제 #24
0
 public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags)
 {
     DecodeMetaData();
     return(outerInstance.outerInstance.postingsReader.Docs(outerInstance.fieldInfo, state, liveDocs, reuse, flags));
 }
예제 #25
0
 public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags)
 {
     PreDocsEnum docsEnum;
     if (reuse == null || !(reuse is PreDocsEnum))
     {
         docsEnum = new PreDocsEnum(OuterInstance);
     }
     else
     {
         docsEnum = (PreDocsEnum)reuse;
         if (docsEnum.FreqStream != OuterInstance.FreqStream)
         {
             docsEnum = new PreDocsEnum(OuterInstance);
         }
     }
     return docsEnum.Reset(TermEnum, liveDocs);
 }
 public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags) // ignored
 {
     TVDocsEnum docsEnum;
     if (reuse != null && reuse is TVDocsEnum)
     {
         docsEnum = (TVDocsEnum)reuse;
     }
     else
     {
         docsEnum = new TVDocsEnum();
     }
     docsEnum.Reset(liveDocs, Freq);
     return docsEnum;
 }
예제 #27
0
        public override DocsEnum Docs(FieldInfo fieldInfo, BlockTermState termState, IBits liveDocs, DocsEnum reuse,
                                      DocsFlags flags)
        {
            SepTermState termState_ = (SepTermState)termState;

            // If you are using ParellelReader, and pass in a
            // reused DocsAndPositionsEnum, it could have come
            // from another reader also using sep codec
            if (reuse is null || !(reuse is SepDocsEnum docsEnum) || docsEnum.startDocIn != docIn)
            {
                docsEnum = new SepDocsEnum(this);
            }

            return(docsEnum.Init(fieldInfo, termState_, liveDocs));
        }
 public override DocsEnum Docs(IBits liveDocs, DocsEnum reuse, DocsFlags flags)
 {
     return(new RAMDocsEnum(ramField.termToDocs[current], liveDocs));
 }
예제 #29
0
 public DocIdSetIteratorAnonymousInnerClassHelper(DocIdSetAnonymousInnerClassHelper2 outerInstance, DocsEnum termDocsEnum)
 {
     this.OuterInstance = outerInstance;
     this.TermDocsEnum = termDocsEnum;
 }
예제 #30
0
        // we need to guarantee that if several threads call this concurrently, only
        // one executes it, and after it returns, the cache is updated and is either
        // complete or not.
        private void PerhapsFillCache()
        {
            lock (this)
            {
                if (cacheMisses < cacheMissesUntilFill)
                {
                    return;
                }

                if (!shouldFillCache)
                {
                    // we already filled the cache once, there's no need to re-fill it
                    return;
                }
                shouldFillCache = false;

                InitReaderManager();

                bool            aborted = false;
                DirectoryReader reader  = readerManager.Acquire();
                try
                {
                    TermsEnum termsEnum = null;
                    DocsEnum  docsEnum  = null;
                    foreach (AtomicReaderContext ctx in reader.Leaves)
                    {
                        Terms terms = ctx.AtomicReader.GetTerms(Consts.FULL);
                        if (terms != null) // cannot really happen, but be on the safe side
                        {
                            termsEnum = terms.GetIterator(termsEnum);
                            while (termsEnum.Next() != null)
                            {
                                if (!cache.IsFull)
                                {
                                    BytesRef t = termsEnum.Term;
                                    // Since we guarantee uniqueness of categories, each term has exactly
                                    // one document. Also, since we do not allow removing categories (and
                                    // hence documents), there are no deletions in the index. Therefore, it
                                    // is sufficient to call next(), and then doc(), exactly once with no
                                    // 'validation' checks.
                                    FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(t.Utf8ToString()));
                                    docsEnum = termsEnum.Docs(null, docsEnum, DocsFlags.NONE);
                                    bool res = cache.Put(cp, docsEnum.NextDoc() + ctx.DocBase);
                                    Debug.Assert(!res, "entries should not have been evicted from the cache");
                                }
                                else
                                {
                                    // the cache is full and the next put() will evict entries from it, therefore abort the iteration.
                                    aborted = true;
                                    break;
                                }
                            }
                        }
                        if (aborted)
                        {
                            break;
                        }
                    }
                }
                finally
                {
                    readerManager.Release(reader);
                }

                cacheIsComplete = !aborted;
                if (cacheIsComplete)
                {
                    lock (this)
                    {
                        // everything is in the cache, so no need to keep readerManager open.
                        // this block is executed in a sync block so that it works well with
                        // initReaderManager called in parallel.
                        readerManager.Dispose();
                        readerManager            = null;
                        initializedReaderManager = false;
                    }
                }
            }
        }
예제 #31
0
        /// <summary>
        /// Returns a <see cref="DocsEnum"/>, but randomly sometimes uses a
        /// <see cref="MultiDocsEnum"/>, <see cref="DocsAndPositionsEnum"/>.  Returns null
        /// if field/term doesn't exist.
        /// </summary>
        public static DocsEnum Docs(Random random, IndexReader r, string field, BytesRef term, IBits liveDocs, DocsEnum reuse, DocsFlags flags)
        {
            Terms terms = MultiFields.GetTerms(r, field);

            if (terms == null)
            {
                return(null);
            }
            TermsEnum termsEnum = terms.GetEnumerator();

            if (!termsEnum.SeekExact(term))
            {
                return(null);
            }
            return(Docs(random, termsEnum, liveDocs, reuse, flags));
        }
예제 #32
0
        public virtual void Test10kPulsed()
        {
            // we always run this test with pulsing codec.
            Codec cp = TestUtil.AlwaysPostingsFormat(new Pulsing41PostingsFormat(1));

            DirectoryInfo        f   = CreateTempDir("10kpulsed");
            BaseDirectoryWrapper dir = NewFSDirectory(f);

            dir.CheckIndexOnDispose = false; // we do this ourselves explicitly
            RandomIndexWriter iw = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetCodec(cp));

            Document  document = new Document();
            FieldType ft       = new FieldType(TextField.TYPE_STORED);

            switch (TestUtil.NextInt32(Random, 0, 2))
            {
            case 0:
                ft.IndexOptions = IndexOptions.DOCS_ONLY;
                break;

            case 1:
                ft.IndexOptions = IndexOptions.DOCS_AND_FREQS;
                break;

            default:
                ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
                break;
            }

            Field field = NewField("field", "", ft);

            document.Add(field);

            //NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ROOT));

            for (int i = 0; i < 10050; i++)
            {
                //field.StringValue = df.format(i);
                field.SetStringValue(i.ToString("00000", CultureInfo.InvariantCulture));
                iw.AddDocument(document);
            }

            IndexReader ir = iw.GetReader();

            iw.Dispose();

            TermsEnum te = MultiFields.GetTerms(ir, "field").GetIterator(null);
            DocsEnum  de = null;

            for (int i = 0; i < 10050; i++)
            {
                //string expected = df.format(i);
                string expected = i.ToString("00000", CultureInfo.InvariantCulture);
                assertEquals(expected, te.Next().Utf8ToString());
                de = TestUtil.Docs(Random, te, null, de, DocsFlags.NONE);
                assertTrue(de.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.NextDoc());
            }
            ir.Dispose();

            TestUtil.CheckIndex(dir);
            dir.Dispose();
        }
예제 #33
0
        /// <summary>
        /// Returns a <see cref="DocsEnum"/> from a positioned <see cref="TermsEnum"/>, but
        /// randomly sometimes uses a <see cref="MultiDocsEnum"/>, <see cref="DocsAndPositionsEnum"/>.
        /// </summary>
        public static DocsEnum Docs(Random random, TermsEnum termsEnum, IBits liveDocs, DocsEnum reuse, DocsFlags flags)
        {
            if (random.NextBoolean())
            {
                if (random.NextBoolean())
                {
                    DocsAndPositionsFlags posFlags;
                    switch (random.Next(4))
                    {
                    case 0:
                        posFlags = 0;
                        break;

                    case 1:
                        posFlags = DocsAndPositionsFlags.OFFSETS;
                        break;

                    case 2:
                        posFlags = DocsAndPositionsFlags.PAYLOADS;
                        break;

                    default:
                        posFlags = DocsAndPositionsFlags.OFFSETS | DocsAndPositionsFlags.PAYLOADS;
                        break;
                    }
                    // TODO: cast to DocsAndPositionsEnum?
                    DocsAndPositionsEnum docsAndPositions = termsEnum.DocsAndPositions(liveDocs, null, posFlags);
                    if (docsAndPositions != null)
                    {
                        return(docsAndPositions);
                    }
                }
                flags |= DocsFlags.FREQS;
            }
            return(termsEnum.Docs(liveDocs, reuse, flags));
        }
예제 #34
0
 public DocIdSetIteratorAnonymousInnerClassHelper(DocIdSetAnonymousInnerClassHelper2 outerInstance, DocsEnum termDocsEnum)
 {
     this.OuterInstance = outerInstance;
     this.TermDocsEnum  = termDocsEnum;
 }
예제 #35
0
 public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags)
 {
     return(new RAMDocsEnum(RamField.TermToDocs[Current], liveDocs));
 }
예제 #36
0
            //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
            //ORIGINAL LINE: @Override public org.apache.lucene.index.DocsEnum docs(org.apache.lucene.util.Bits liveDocs, org.apache.lucene.index.DocsEnum reuse, final int flags) throws java.io.IOException
            //JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
            public override DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags)
            {
                //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
                //ORIGINAL LINE: final org.apache.lucene.index.DocsEnum inReuse;
                  DocsEnum inReuse;
                //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
                //ORIGINAL LINE: final SortingDocsEnum wrapReuse;
                  SortingDocsEnum wrapReuse;
                  if (reuse != null && reuse is SortingDocsEnum)
                  {
                // if we're asked to reuse the given DocsEnum and it is Sorting, return
                // the wrapped one, since some Codecs expect it.
                wrapReuse = (SortingDocsEnum) reuse;
                inReuse = wrapReuse.Wrapped;
                  }
                  else
                  {
                wrapReuse = null;
                inReuse = reuse;
                  }

                //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
                //ORIGINAL LINE: final org.apache.lucene.index.DocsEnum inDocs = in.docs(newToOld(liveDocs), inReuse, flags);
                  DocsEnum inDocs = @in.docs(newToOld(liveDocs), inReuse, flags);
                //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
                //ORIGINAL LINE: final boolean withFreqs = indexOptions.compareTo(org.apache.lucene.index.FieldInfo.IndexOptions.DOCS_AND_FREQS) >=0 && (flags & org.apache.lucene.index.DocsEnum.FLAG_FREQS) != 0;
                  bool withFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0 && (flags & DocsEnum.FLAG_FREQS) != 0;
                  return new SortingDocsEnum(docMap.size(), wrapReuse, inDocs, withFreqs, docMap);
            }
예제 #37
0
 /// <summary>
 /// Must fully consume state, since after this call that
 /// <see cref="TermState"/> may be reused.
 /// </summary>
 public abstract DocsEnum Docs(FieldInfo fieldInfo, BlockTermState state, IBits skipDocs, DocsEnum reuse, DocsFlags flags);
예제 #38
0
 // for testing
 internal virtual bool reused(DocsEnum other)
 {
     if (other == null || !(other is SortingDocsEnum))
       {
     return false;
       }
       return docs == ((SortingDocsEnum) other).docs;
 }
 public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags)
 {
     return Delegate().Docs(liveDocs, reuse, flags);
 }
예제 #40
0
 public override DocsEnum Docs(IBits liveDocs, DocsEnum reuse, DocsFlags flags)
 {
     return(CreatePagesEnum());
 }
                public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags)
                {
                    // TODO: implement reuse, something like Pulsing:
                    // it's hairy!

                    if (outerInstance.terms[termOrd] is LowFreqTerm)
                    {
                        int[] postings = ((LowFreqTerm) outerInstance.terms[termOrd]).postings;
                        if (outerInstance.hasFreq)
                        {
                            if (outerInstance.hasPos)
                            {
                                int posLen;
                                if (outerInstance.hasOffsets_Renamed)
                                {
                                    posLen = 3;
                                }
                                else
                                {
                                    posLen = 1;
                                }
                                if (outerInstance.hasPayloads_Renamed)
                                {
                                    posLen++;
                                }
                                LowFreqDocsEnum docsEnum;
                                if (reuse is LowFreqDocsEnum)
                                {
                                    docsEnum = (LowFreqDocsEnum) reuse;
                                    if (!docsEnum.CanReuse(liveDocs, posLen))
                                    {
                                        docsEnum = new LowFreqDocsEnum(liveDocs, posLen);
                                    }
                                }
                                else
                                {
                                    docsEnum = new LowFreqDocsEnum(liveDocs, posLen);
                                }

                                return docsEnum.Reset(postings);
                            }
                            else
                            {
                                LowFreqDocsEnumNoPos docsEnum;
                                if (reuse is LowFreqDocsEnumNoPos)
                                {
                                    docsEnum = (LowFreqDocsEnumNoPos) reuse;
                                    if (!docsEnum.CanReuse(liveDocs))
                                    {
                                        docsEnum = new LowFreqDocsEnumNoPos(liveDocs);
                                    }
                                }
                                else
                                {
                                    docsEnum = new LowFreqDocsEnumNoPos(liveDocs);
                                }

                                return docsEnum.Reset(postings);
                            }
                        }
                        else
                        {
                            LowFreqDocsEnumNoTF docsEnum;
                            if (reuse is LowFreqDocsEnumNoTF)
                            {
                                docsEnum = (LowFreqDocsEnumNoTF) reuse;
                                if (!docsEnum.CanReuse(liveDocs))
                                {
                                    docsEnum = new LowFreqDocsEnumNoTF(liveDocs);
                                }
                            }
                            else
                            {
                                docsEnum = new LowFreqDocsEnumNoTF(liveDocs);
                            }

                            return docsEnum.Reset(postings);
                        }
                    }
                    else
                    {
                        HighFreqTerm term = (HighFreqTerm) outerInstance.terms[termOrd];

                        HighFreqDocsEnum docsEnum;
                        if (reuse is HighFreqDocsEnum)
                        {
                            docsEnum = (HighFreqDocsEnum) reuse;
                            if (!docsEnum.canReuse(liveDocs))
                            {
                                docsEnum = new HighFreqDocsEnum(liveDocs);
                            }
                        }
                        else
                        {
                            docsEnum = new HighFreqDocsEnum(liveDocs);
                        }

                        //System.out.println("  DE for term=" + new BytesRef(terms[termOrd].term).utf8ToString() + ": " + term.docIDs.length + " docs");
                        return docsEnum.Reset(term.docIDs, term.freqs);
                    }
                }
예제 #42
0
 public override DocsEnum Docs(IBits liveDocs, DocsEnum reuse, DocsFlags flags)
 {
     throw new System.NotSupportedException();
 }
예제 #43
0
        /// <summary>
        /// Default merge impl: append documents, mapping around
        /// deletes.
        /// </summary>
        public virtual TermStats Merge(MergeState mergeState, IndexOptions indexOptions, DocsEnum postings, FixedBitSet visitedDocs)
        {
            int  df    = 0;
            long totTF = 0;

            if (indexOptions == IndexOptions.DOCS_ONLY)
            {
                while (true)
                {
                    int doc = postings.NextDoc();
                    if (doc == DocIdSetIterator.NO_MORE_DOCS)
                    {
                        break;
                    }
                    visitedDocs.Set(doc);
                    this.StartDoc(doc, -1);
                    this.FinishDoc();
                    df++;
                }
                totTF = -1;
            }
            else if (indexOptions == IndexOptions.DOCS_AND_FREQS)
            {
                while (true)
                {
                    int doc = postings.NextDoc();
                    if (doc == DocIdSetIterator.NO_MORE_DOCS)
                    {
                        break;
                    }
                    visitedDocs.Set(doc);
                    int freq = postings.Freq;
                    this.StartDoc(doc, freq);
                    this.FinishDoc();
                    df++;
                    totTF += freq;
                }
            }
            else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
            {
                var postingsEnum = (DocsAndPositionsEnum)postings;
                while (true)
                {
                    int doc = postingsEnum.NextDoc();
                    if (doc == DocIdSetIterator.NO_MORE_DOCS)
                    {
                        break;
                    }
                    visitedDocs.Set(doc);
                    int freq = postingsEnum.Freq;
                    this.StartDoc(doc, freq);
                    totTF += freq;
                    for (int i = 0; i < freq; i++)
                    {
                        int      position = postingsEnum.NextPosition();
                        BytesRef payload  = postingsEnum.GetPayload();
                        this.AddPosition(position, payload, -1, -1);
                    }
                    this.FinishDoc();
                    df++;
                }
            }
            else
            {
                Debug.Assert(indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
                var postingsEnum = (DocsAndPositionsEnum)postings;
                while (true)
                {
                    int doc = postingsEnum.NextDoc();
                    if (doc == DocIdSetIterator.NO_MORE_DOCS)
                    {
                        break;
                    }
                    visitedDocs.Set(doc);
                    int freq = postingsEnum.Freq;
                    this.StartDoc(doc, freq);
                    totTF += freq;
                    for (int i = 0; i < freq; i++)
                    {
                        int      position = postingsEnum.NextPosition();
                        BytesRef payload  = postingsEnum.GetPayload();
                        this.AddPosition(position, payload, postingsEnum.StartOffset, postingsEnum.EndOffset);
                    }
                    this.FinishDoc();
                    df++;
                }
            }
            return(new TermStats(df, indexOptions == IndexOptions.DOCS_ONLY ? -1 : totTF));
        }
예제 #44
0
 public override DocsEnum Docs(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsEnum reuse, int flags)
 {
     if (CanReuse(reuse, liveDocs))
     {
         // if (DEBUG) System.out.println("SPR.docs ts=" + termState);
         return ((SegmentDocsEnumBase)reuse).Reset(fieldInfo, (StandardTermState)termState);
     }
     return NewDocsEnum(liveDocs, fieldInfo, (StandardTermState)termState);
 }
예제 #45
0
 /// <summary>
 /// Construct a <see cref="TermScorer"/>.
 /// </summary>
 /// <param name="weight">
 ///          The weight of the <see cref="Index.Term"/> in the query. </param>
 /// <param name="td">
 ///          An iterator over the documents matching the <see cref="Index.Term"/>. </param>
 /// <param name="docScorer">
 ///          The <see cref="Similarity.SimScorer"/> implementation
 ///          to be used for score computations. </param>
 internal TermScorer(Weight weight, DocsEnum td, Similarity.SimScorer docScorer)
     : base(weight)
 {
     this.docScorer = docScorer;
     this.docsEnum  = td;
 }
 public override DocsEnum Docs(IBits liveDocs, DocsEnum reuse, DocsFlags flags)
 {
     return(Delegate.Docs(liveDocs, reuse, flags));
 }
 public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags)
 {
     throw new System.NotSupportedException();
 }
예제 #48
0
 public override DocsEnum Docs(IBits liveDocs, DocsEnum reuse, DocsFlags flags)
 {
     throw UnsupportedOperationException.Create();
 }
        /// <summary>
        /// loads multi-value facet data. This method uses a workarea to prepare loading.
        /// </summary>
        /// <param name="fieldName"></param>
        /// <param name="reader"></param>
        /// <param name="listFactory"></param>
        /// <param name="workArea"></param>
        public virtual void Load(string fieldName, AtomicReader reader, TermListFactory listFactory, BoboSegmentReader.WorkArea workArea)
        {
#if FEATURE_STRING_INTERN
            string field = string.Intern(fieldName);
#else
            string field = fieldName;
#endif
            int maxdoc = reader.MaxDoc;
            BigNestedInt32Array.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea);

            ITermValueList list               = (listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList());
            List <int>     minIDList          = new List <int>();
            List <int>     maxIDList          = new List <int>();
            List <int>     freqList           = new List <int>();
            OpenBitSet     bitset             = new OpenBitSet(maxdoc + 1);
            int            negativeValueCount = GetNegativeValueCount(reader, field);
            int            t = 1; // valid term id starts from 1
            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);

            m_overflow = false;
            Terms terms = reader.GetTerms(field);
            if (terms != null)
            {
                TermsEnum termsEnum = terms.GetIterator(null);
                BytesRef  text;
                while ((text = termsEnum.Next()) != null)
                {
                    string strText = text.Utf8ToString();
                    list.Add(strText);

                    Term     term     = new Term(field, strText);
                    DocsEnum docsEnum = reader.GetTermDocsEnum(term);
                    int      df       = 0;
                    int      minID    = -1;
                    int      maxID    = -1;
                    int      docID    = -1;
                    int      valId    = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
                    while ((docID = docsEnum.NextDoc()) != DocsEnum.NO_MORE_DOCS)
                    {
                        df++;
                        if (!loader.Add(docID, valId))
                        {
                            LogOverflow(fieldName);
                        }
                        minID = docID;
                        bitset.FastSet(docID);
                        while (docsEnum.NextDoc() != DocsEnum.NO_MORE_DOCS)
                        {
                            docID = docsEnum.DocID;
                            df++;
                            if (!loader.Add(docID, valId))
                            {
                                LogOverflow(fieldName);
                            }
                            bitset.FastSet(docID);
                        }
                        maxID = docID;
                    }
                    freqList.Add(df);
                    minIDList.Add(minID);
                    maxIDList.Add(maxID);
                    t++;
                }
            }

            list.Seal();

            try
            {
                m_nestedArray.Load(maxdoc + 1, loader);
            }
            catch (Exception e)
            {
                throw new RuntimeException("failed to load due to " + e.ToString(), e);
            }

            this.m_valArray = list;
            this.m_freqs    = freqList.ToArray();
            this.m_minIDs   = minIDList.ToArray();
            this.m_maxIDs   = maxIDList.ToArray();

            int doc = 0;
            while (doc < maxdoc && !m_nestedArray.Contains(doc, 0, true))
            {
                ++doc;
            }
            if (doc < maxdoc)
            {
                this.m_minIDs[0] = doc;
                doc = maxdoc - 1;
                while (doc >= 0 && !m_nestedArray.Contains(doc, 0, true))
                {
                    --doc;
                }
                this.m_maxIDs[0] = doc;
            }
            this.m_freqs[0] = maxdoc - (int)bitset.Cardinality();
        }
예제 #50
0
 public override DocsEnum Docs(FieldInfo fieldInfo, BlockTermState termState, IBits liveDocs, DocsEnum reuse, DocsFlags flags)
 {
     if (CanReuse(reuse, liveDocs))
     {
         // if (DEBUG) System.out.println("SPR.docs ts=" + termState2);
         return(((SegmentDocsEnumBase)reuse).Reset(fieldInfo, (StandardTermState)termState));
     }
     return(NewDocsEnum(liveDocs, fieldInfo, (StandardTermState)termState));
 }
예제 #51
0
        public override DocsEnum Docs(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsEnum reuse,
                                      int flags)
        {
            var termState = (PulsingTermState)_termState;

            if (termState.PostingsSize != -1)
            {
                PulsingDocsEnum postings;
                if (reuse is PulsingDocsEnum)
                {
                    postings = (PulsingDocsEnum)reuse;
                    if (!postings.CanReuse(field))
                    {
                        postings = new PulsingDocsEnum(field);
                    }
                }
                else
                {
                    // the 'reuse' is actually the wrapped enum
                    var previous = (PulsingDocsEnum)GetOther(reuse);
                    if (previous != null && previous.CanReuse(field))
                    {
                        postings = previous;
                    }
                    else
                    {
                        postings = new PulsingDocsEnum(field);
                    }
                }

                if (reuse != postings)
                {
                    SetOther(postings, reuse); // postings.other = reuse
                }
                return(postings.Reset(liveDocs, termState));
            }

            if (!(reuse is PulsingDocsEnum))
            {
                return(_wrappedPostingsReader.Docs(field, termState.WrappedTermState, liveDocs, reuse, flags));
            }

            var wrapped = _wrappedPostingsReader.Docs(field, termState.WrappedTermState, liveDocs,
                                                      GetOther(reuse), flags);

            SetOther(wrapped, reuse); // wrapped.other = reuse
            return(wrapped);
        }
예제 #52
0
            internal SortingDocsEnum(int maxDoc, SortingDocsEnum reuse, DocsEnum input, bool withFreqs, Sorter.DocMap docMap)
                : base(input)
            {
                this.maxDoc    = maxDoc;
                this.withFreqs = withFreqs;
                if (reuse != null)
                {
                    if (reuse.maxDoc == maxDoc)
                    {
                        sorter = reuse.sorter;
                    }
                    else
                    {
                        sorter = new DocFreqSorter(maxDoc);
                    }
                    docs  = reuse.docs;
                    freqs = reuse.freqs; // maybe null
                }
                else
                {
                    docs   = new int[64];
                    sorter = new DocFreqSorter(maxDoc);
                }
                docIt = -1;
                int i = 0;
                int doc;

                if (withFreqs)
                {
                    if (freqs == null || freqs.Length < docs.Length)
                    {
                        freqs = new int[docs.Length];
                    }
                    while ((doc = input.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        if (i >= docs.Length)
                        {
                            docs  = ArrayUtil.Grow(docs, docs.Length + 1);
                            freqs = ArrayUtil.Grow(freqs, freqs.Length + 1);
                        }
                        docs[i]  = docMap.OldToNew(doc);
                        freqs[i] = input.Freq;
                        ++i;
                    }
                }
                else
                {
                    freqs = null;
                    while ((doc = input.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        if (i >= docs.Length)
                        {
                            docs = ArrayUtil.Grow(docs, docs.Length + 1);
                        }
                        docs[i++] = docMap.OldToNew(doc);
                    }
                }
                // TimSort can save much time compared to other sorts in case of
                // reverse sorting, or when sorting a concatenation of sorted readers
                sorter.Reset(docs, freqs);
                sorter.Sort(0, i);
                upto = i;
            }
예제 #53
0
        /// <summary>
        /// for a docsenum, sets the 'other' reused enum.
        /// see GetOther for an example.
        /// </summary>
        private DocsEnum SetOther(DocsEnum de, DocsEnum other)
        {
            var atts = de.Attributes();

            return(atts.AddAttribute <IPulsingEnumAttribute>().Enums()[this] = other);
        }
예제 #54
0
 public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags)
 {
     return ActualEnum.Docs(liveDocs, reuse, flags);
 }
예제 #55
0
        public override DocsEnum Docs(FieldInfo fieldInfo, BlockTermState bTermState, Bits liveDocs, DocsEnum reuse,
            int flags)
        {
            var termState = (SepTermState)bTermState;

            SepDocsEnum docsEnum;
            if (!(reuse is SepDocsEnum))
            {
                docsEnum = new SepDocsEnum(this);
            }
            else
            {
                docsEnum = (SepDocsEnum) reuse;
                if (docsEnum.START_DOC_IN != _docIn)
                {
                    // If you are using ParellelReader, and pass in a
                    // reused DocsAndPositionsEnum, it could have come
                    // from another reader also using sep codec
                    docsEnum = new SepDocsEnum(this);
                }
            }

            return docsEnum.Init(fieldInfo, termState, liveDocs);
        }