Beispiel #1
0
            /// <summary>
            /// Build a new <seealso cref="WAH8DocIdSet"/>. </summary>
            public virtual WAH8DocIdSet Build()
            {
                if (Cardinality == 0)
                {
                    Debug.Assert(LastWordNum == -1);
                    return(EMPTY);
                }
                WriteSequence();
                byte[] data = Arrays.CopyOf((byte[])(Array)@out.Bytes, @out.Length);

                // Now build the index
                int valueCount = (NumSequences - 1) / IndexInterval_Renamed + 1;
                MonotonicAppendingLongBuffer indexPositions, indexWordNums;

                if (valueCount <= 1)
                {
                    indexPositions = indexWordNums = SINGLE_ZERO_BUFFER;
                }
                else
                {
                    const int pageSize                     = 128;
                    int       initialPageCount             = (valueCount + pageSize - 1) / pageSize;
                    MonotonicAppendingLongBuffer positions = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, PackedInts.COMPACT);
                    MonotonicAppendingLongBuffer wordNums  = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, PackedInts.COMPACT);

                    positions.Add(0L);
                    wordNums.Add(0L);
                    Iterator it = new Iterator(data, Cardinality, int.MaxValue, SINGLE_ZERO_BUFFER, SINGLE_ZERO_BUFFER);
                    Debug.Assert([email protected] == 0);
                    Debug.Assert(it.WordNum == -1);
                    for (int i = 1; i < valueCount; ++i)
                    {
                        // skip indexInterval sequences
                        for (int j = 0; j < IndexInterval_Renamed; ++j)
                        {
                            bool readSequence = it.ReadSequence();
                            Debug.Assert(readSequence);
                            it.SkipDirtyBytes();
                        }
                        int position = [email protected];
                        int wordNum  = it.WordNum;
                        positions.Add(position);
                        wordNums.Add(wordNum + 1);
                    }
                    positions.Freeze();
                    wordNums.Freeze();
                    indexPositions = positions;
                    indexWordNums  = wordNums;
                }

                return(new WAH8DocIdSet(data, Cardinality, IndexInterval_Renamed, indexPositions, indexWordNums));
            }
Beispiel #2
0
        /// <summary>
        /// Computes the old-to-new permutation over the given comparator.
        /// </summary>
        private static Sorter.DocMap Sort(int maxDoc, DocComparator comparator)
        {
            // check if the index is sorted
            bool sorted = true;

            for (int i = 1; i < maxDoc; ++i)
            {
                if (comparator.Compare(i - 1, i) > 0)
                {
                    sorted = false;
                    break;
                }
            }
            if (sorted)
            {
                return(null);
            }

            // sort doc IDs
            int[] docs = new int[maxDoc];
            for (int i = 0; i < maxDoc; i++)
            {
                docs[i] = i;
            }

            DocValueSorter sorter = new DocValueSorter(docs, comparator);

            // It can be common to sort a reader, add docs, sort it again, ... and in
            // that case timSort can save a lot of time
            sorter.Sort(0, docs.Length); // docs is now the newToOld mapping

            // The reason why we use MonotonicAppendingLongBuffer here is that it
            // wastes very little memory if the index is in random order but can save
            // a lot of memory if the index is already "almost" sorted
            MonotonicAppendingLongBuffer newToOld = new MonotonicAppendingLongBuffer();

            for (int i = 0; i < maxDoc; ++i)
            {
                newToOld.Add(docs[i]);
            }
            newToOld.Freeze();

            for (int i = 0; i < maxDoc; ++i)
            {
                docs[(int)newToOld.Get(i)] = i;
            } // docs is now the oldToNew mapping

            MonotonicAppendingLongBuffer oldToNew = new MonotonicAppendingLongBuffer();

            for (int i = 0; i < maxDoc; ++i)
            {
                oldToNew.Add(docs[i]);
            }
            oldToNew.Freeze();

            return(new DocMapAnonymousInnerClassHelper(maxDoc, newToOld, oldToNew));
        }
Beispiel #3
0
            /// <summary>
            /// Creates an ordinal map that allows mapping ords to/from a merged
            /// space from <code>subs</code>. </summary>
            /// <param name="owner"> a cache key </param>
            /// <param name="subs"> TermsEnums that support <seealso cref="TermsEnum#ord()"/>. They need
            ///             not be dense (e.g. can be FilteredTermsEnums}. </param>
            /// <exception cref="IOException"> if an I/O error occurred. </exception>
            public OrdinalMap(object owner, TermsEnum[] subs)
            {
                // create the ordinal mappings by pulling a termsenum over each sub's
                // unique terms, and walking a multitermsenum over those
                this.Owner      = owner;
                GlobalOrdDeltas = new MonotonicAppendingLongBuffer(PackedInts.COMPACT);
                FirstSegments   = new AppendingPackedLongBuffer(PackedInts.COMPACT);
                OrdDeltas       = new MonotonicAppendingLongBuffer[subs.Length];
                for (int i = 0; i < OrdDeltas.Length; i++)
                {
                    OrdDeltas[i] = new MonotonicAppendingLongBuffer();
                }
                long[]           segmentOrds = new long[subs.Length];
                ReaderSlice[]    slices      = new ReaderSlice[subs.Length];
                TermsEnumIndex[] indexes     = new TermsEnumIndex[slices.Length];
                for (int i = 0; i < slices.Length; i++)
                {
                    slices[i]  = new ReaderSlice(0, 0, i);
                    indexes[i] = new TermsEnumIndex(subs[i], i);
                }
                MultiTermsEnum mte = new MultiTermsEnum(slices);

                mte.Reset(indexes);
                long globalOrd = 0;

                while (mte.Next() != null)
                {
                    TermsEnumWithSlice[] matches = mte.MatchArray;
                    for (int i = 0; i < mte.MatchCount; i++)
                    {
                        int  segmentIndex = matches[i].Index;
                        long segmentOrd   = matches[i].Terms.Ord();
                        long delta        = globalOrd - segmentOrd;
                        // for each unique term, just mark the first segment index/delta where it occurs
                        if (i == 0)
                        {
                            FirstSegments.Add(segmentIndex);
                            GlobalOrdDeltas.Add(delta);
                        }
                        // for each per-segment ord, map it back to the global term.
                        while (segmentOrds[segmentIndex] <= segmentOrd)
                        {
                            OrdDeltas[segmentIndex].Add(delta);
                            segmentOrds[segmentIndex]++;
                        }
                    }
                    globalOrd++;
                }
                FirstSegments.Freeze();
                GlobalOrdDeltas.Freeze();
                for (int i = 0; i < OrdDeltas.Length; ++i)
                {
                    OrdDeltas[i].Freeze();
                }
            }
            /// <summary>
            /// Build the <seealso cref="PForDeltaDocIdSet"/> instance. </summary>
            public virtual PForDeltaDocIdSet Build()
            {
                Debug.Assert(BufferSize < BLOCK_SIZE);

                if (Cardinality == 0)
                {
                    Debug.Assert(PreviousDoc == -1);
                    return(EMPTY);
                }

                EncodeBlock();
                sbyte[] dataArr = Arrays.CopyOf(Data.Bytes, Data.Length + MAX_BYTE_BLOCK_COUNT);

                int indexSize = (NumBlocks - 1) / IndexInterval_Renamed + 1;
                MonotonicAppendingLongBuffer docIDs, offsets;

                if (indexSize <= 1)
                {
                    docIDs = offsets = SINGLE_ZERO_BUFFER;
                }
                else
                {
                    const int pageSize         = 128;
                    int       initialPageCount = (indexSize + pageSize - 1) / pageSize;
                    docIDs  = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, PackedInts.COMPACT);
                    offsets = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, PackedInts.COMPACT);
                    // Now build the index
                    Iterator it = new Iterator(dataArr, Cardinality, int.MaxValue, SINGLE_ZERO_BUFFER, SINGLE_ZERO_BUFFER);
                    for (int k = 0; k < indexSize; ++k)
                    {
                        docIDs.Add(it.DocID() + 1);
                        offsets.Add(it.Offset);
                        for (int i = 0; i < IndexInterval_Renamed; ++i)
                        {
                            it.SkipBlock();
                            if (it.DocID() == DocIdSetIterator.NO_MORE_DOCS)
                            {
                                goto indexBreak;
                            }
                        }
                        indexContinue :;
                    }
                    indexBreak :
                    docIDs.Freeze();
                    offsets.Freeze();
                }

                return(new PForDeltaDocIdSet(dataArr, Cardinality, IndexInterval_Renamed, docIDs, offsets));
            }
        static PForDeltaDocIdSet()
        {
            SINGLE_ZERO_BUFFER.Add(0);
            SINGLE_ZERO_BUFFER.Freeze();
            int maxByteBLockCount = 0;

            for (int i = 1; i < ITERATIONS.Length; ++i)
            {
                DECODERS[i] = PackedInts.GetDecoder(PackedInts.Format.PACKED, PackedInts.VERSION_CURRENT, i);
                Debug.Assert(BLOCK_SIZE % DECODERS[i].ByteValueCount() == 0);
                ITERATIONS[i]        = BLOCK_SIZE / DECODERS[i].ByteValueCount();
                BYTE_BLOCK_COUNTS[i] = ITERATIONS[i] * DECODERS[i].ByteBlockCount();
                maxByteBLockCount    = Math.Max(maxByteBLockCount, DECODERS[i].ByteBlockCount());
            }
            MAX_BYTE_BLOCK_COUNT = maxByteBLockCount;
        }
Beispiel #6
0
            public static DocMap Build(int maxDoc, Bits liveDocs)
            {
                Debug.Assert(liveDocs != null);
                MonotonicAppendingLongBuffer docMap = new MonotonicAppendingLongBuffer();
                int del = 0;

                for (int i = 0; i < maxDoc; ++i)
                {
                    docMap.Add(i - del);
                    if (!liveDocs.Get(i))
                    {
                        ++del;
                    }
                }
                docMap.Freeze();
                int numDeletedDocs = del;

                Debug.Assert(docMap.Size() == maxDoc);
                return(new DocMapAnonymousInnerClassHelper(maxDoc, liveDocs, docMap, numDeletedDocs));
            }
Beispiel #7
0
            internal virtual MonotonicAppendingLongBuffer GetDeletes(IList <AtomicReader> readers)
            {
                MonotonicAppendingLongBuffer deletes = new MonotonicAppendingLongBuffer();
                int deleteCount = 0;

                foreach (AtomicReader reader in readers)
                {
                    int  maxDoc   = reader.MaxDoc;
                    Bits liveDocs = reader.LiveDocs;
                    for (int i = 0; i < maxDoc; ++i)
                    {
                        if (liveDocs != null && !liveDocs.Get(i))
                        {
                            ++deleteCount;
                        }
                        else
                        {
                            deletes.Add(deleteCount);
                        }
                    }
                }
                deletes.Freeze();
                return(deletes);
            }
Beispiel #8
0
 static WAH8DocIdSet()
 {
     SINGLE_ZERO_BUFFER.Add(0L);
     SINGLE_ZERO_BUFFER.Freeze();
 }
            /// <summary>
            /// Build the <seealso cref="PForDeltaDocIdSet"/> instance. </summary>
            public virtual PForDeltaDocIdSet Build()
            {
                Debug.Assert(BufferSize < BLOCK_SIZE);

                if (Cardinality == 0)
                {
                    Debug.Assert(PreviousDoc == -1);
                    return EMPTY;
                }

                EncodeBlock();
                sbyte[] dataArr = Arrays.CopyOf(Data.Bytes, Data.Length + MAX_BYTE_BLOCK_COUNT);

                int indexSize = (NumBlocks - 1) / IndexInterval_Renamed + 1;
                MonotonicAppendingLongBuffer docIDs, offsets;
                if (indexSize <= 1)
                {
                    docIDs = offsets = SINGLE_ZERO_BUFFER;
                }
                else
                {
                    const int pageSize = 128;
                    int initialPageCount = (indexSize + pageSize - 1) / pageSize;
                    docIDs = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, PackedInts.COMPACT);
                    offsets = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, PackedInts.COMPACT);
                    // Now build the index
                    Iterator it = new Iterator(dataArr, Cardinality, int.MaxValue, SINGLE_ZERO_BUFFER, SINGLE_ZERO_BUFFER);
                    for (int k = 0; k < indexSize; ++k)
                    {
                        docIDs.Add(it.DocID() + 1);
                        offsets.Add(it.Offset);
                        for (int i = 0; i < IndexInterval_Renamed; ++i)
                        {
                            it.SkipBlock();
                            if (it.DocID() == DocIdSetIterator.NO_MORE_DOCS)
                            {
                                goto indexBreak;
                            }
                        }
                    indexContinue: ;
                    }
                indexBreak:
                    docIDs.Freeze();
                    offsets.Freeze();
                }

                return new PForDeltaDocIdSet(dataArr, Cardinality, IndexInterval_Renamed, docIDs, offsets);
            }
Beispiel #10
0
 /// <summary>
 /// Creates an ordinal map that allows mapping ords to/from a merged
 /// space from <code>subs</code>. </summary>
 /// <param name="owner"> a cache key </param>
 /// <param name="subs"> TermsEnums that support <seealso cref="TermsEnum#ord()"/>. They need
 ///             not be dense (e.g. can be FilteredTermsEnums}. </param>
 /// <exception cref="IOException"> if an I/O error occurred. </exception>
 public OrdinalMap(object owner, TermsEnum[] subs)
 {
     // create the ordinal mappings by pulling a termsenum over each sub's
     // unique terms, and walking a multitermsenum over those
     this.Owner = owner;
     GlobalOrdDeltas = new MonotonicAppendingLongBuffer(PackedInts.COMPACT);
     FirstSegments = new AppendingPackedLongBuffer(PackedInts.COMPACT);
     OrdDeltas = new MonotonicAppendingLongBuffer[subs.Length];
     for (int i = 0; i < OrdDeltas.Length; i++)
     {
         OrdDeltas[i] = new MonotonicAppendingLongBuffer();
     }
     long[] segmentOrds = new long[subs.Length];
     ReaderSlice[] slices = new ReaderSlice[subs.Length];
     TermsEnumIndex[] indexes = new TermsEnumIndex[slices.Length];
     for (int i = 0; i < slices.Length; i++)
     {
         slices[i] = new ReaderSlice(0, 0, i);
         indexes[i] = new TermsEnumIndex(subs[i], i);
     }
     MultiTermsEnum mte = new MultiTermsEnum(slices);
     mte.Reset(indexes);
     long globalOrd = 0;
     while (mte.Next() != null)
     {
         TermsEnumWithSlice[] matches = mte.MatchArray;
         for (int i = 0; i < mte.MatchCount; i++)
         {
             int segmentIndex = matches[i].Index;
             long segmentOrd = matches[i].Terms.Ord();
             long delta = globalOrd - segmentOrd;
             // for each unique term, just mark the first segment index/delta where it occurs
             if (i == 0)
             {
                 FirstSegments.Add(segmentIndex);
                 GlobalOrdDeltas.Add(delta);
             }
             // for each per-segment ord, map it back to the global term.
             while (segmentOrds[segmentIndex] <= segmentOrd)
             {
                 OrdDeltas[segmentIndex].Add(delta);
                 segmentOrds[segmentIndex]++;
             }
         }
         globalOrd++;
     }
     FirstSegments.Freeze();
     GlobalOrdDeltas.Freeze();
     for (int i = 0; i < OrdDeltas.Length; ++i)
     {
         OrdDeltas[i].Freeze();
     }
 }
Beispiel #11
0
 public static DocMap Build(int maxDoc, Bits liveDocs)
 {
     Debug.Assert(liveDocs != null);
     MonotonicAppendingLongBuffer docMap = new MonotonicAppendingLongBuffer();
     int del = 0;
     for (int i = 0; i < maxDoc; ++i)
     {
         docMap.Add(i - del);
         if (!liveDocs.Get(i))
         {
             ++del;
         }
     }
     docMap.Freeze();
     int numDeletedDocs = del;
     Debug.Assert(docMap.Size() == maxDoc);
     return new DocMapAnonymousInnerClassHelper(maxDoc, liveDocs, docMap, numDeletedDocs);
 }