/// <summary> /// Build a new <seealso cref="WAH8DocIdSet"/>. </summary> public virtual WAH8DocIdSet Build() { if (Cardinality == 0) { Debug.Assert(LastWordNum == -1); return(EMPTY); } WriteSequence(); byte[] data = Arrays.CopyOf((byte[])(Array)@out.Bytes, @out.Length); // Now build the index int valueCount = (NumSequences - 1) / IndexInterval_Renamed + 1; MonotonicAppendingLongBuffer indexPositions, indexWordNums; if (valueCount <= 1) { indexPositions = indexWordNums = SINGLE_ZERO_BUFFER; } else { const int pageSize = 128; int initialPageCount = (valueCount + pageSize - 1) / pageSize; MonotonicAppendingLongBuffer positions = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, PackedInts.COMPACT); MonotonicAppendingLongBuffer wordNums = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, PackedInts.COMPACT); positions.Add(0L); wordNums.Add(0L); Iterator it = new Iterator(data, Cardinality, int.MaxValue, SINGLE_ZERO_BUFFER, SINGLE_ZERO_BUFFER); Debug.Assert([email protected] == 0); Debug.Assert(it.WordNum == -1); for (int i = 1; i < valueCount; ++i) { // skip indexInterval sequences for (int j = 0; j < IndexInterval_Renamed; ++j) { bool readSequence = it.ReadSequence(); Debug.Assert(readSequence); it.SkipDirtyBytes(); } int position = [email protected]; int wordNum = it.WordNum; positions.Add(position); wordNums.Add(wordNum + 1); } positions.Freeze(); wordNums.Freeze(); indexPositions = positions; indexWordNums = wordNums; } return(new WAH8DocIdSet(data, Cardinality, IndexInterval_Renamed, indexPositions, indexWordNums)); }
/// <summary> /// Computes the old-to-new permutation over the given comparator. /// </summary> private static Sorter.DocMap Sort(int maxDoc, DocComparator comparator) { // check if the index is sorted bool sorted = true; for (int i = 1; i < maxDoc; ++i) { if (comparator.Compare(i - 1, i) > 0) { sorted = false; break; } } if (sorted) { return(null); } // sort doc IDs int[] docs = new int[maxDoc]; for (int i = 0; i < maxDoc; i++) { docs[i] = i; } DocValueSorter sorter = new DocValueSorter(docs, comparator); // It can be common to sort a reader, add docs, sort it again, ... and in // that case timSort can save a lot of time sorter.Sort(0, docs.Length); // docs is now the newToOld mapping // The reason why we use MonotonicAppendingLongBuffer here is that it // wastes very little memory if the index is in random order but can save // a lot of memory if the index is already "almost" sorted MonotonicAppendingLongBuffer newToOld = new MonotonicAppendingLongBuffer(); for (int i = 0; i < maxDoc; ++i) { newToOld.Add(docs[i]); } newToOld.Freeze(); for (int i = 0; i < maxDoc; ++i) { docs[(int)newToOld.Get(i)] = i; } // docs is now the oldToNew mapping MonotonicAppendingLongBuffer oldToNew = new MonotonicAppendingLongBuffer(); for (int i = 0; i < maxDoc; ++i) { oldToNew.Add(docs[i]); } oldToNew.Freeze(); return(new DocMapAnonymousInnerClassHelper(maxDoc, newToOld, oldToNew)); }
/// <summary> /// Creates an ordinal map that allows mapping ords to/from a merged /// space from <code>subs</code>. </summary> /// <param name="owner"> a cache key </param> /// <param name="subs"> TermsEnums that support <seealso cref="TermsEnum#ord()"/>. They need /// not be dense (e.g. can be FilteredTermsEnums}. </param> /// <exception cref="IOException"> if an I/O error occurred. </exception> public OrdinalMap(object owner, TermsEnum[] subs) { // create the ordinal mappings by pulling a termsenum over each sub's // unique terms, and walking a multitermsenum over those this.Owner = owner; GlobalOrdDeltas = new MonotonicAppendingLongBuffer(PackedInts.COMPACT); FirstSegments = new AppendingPackedLongBuffer(PackedInts.COMPACT); OrdDeltas = new MonotonicAppendingLongBuffer[subs.Length]; for (int i = 0; i < OrdDeltas.Length; i++) { OrdDeltas[i] = new MonotonicAppendingLongBuffer(); } long[] segmentOrds = new long[subs.Length]; ReaderSlice[] slices = new ReaderSlice[subs.Length]; TermsEnumIndex[] indexes = new TermsEnumIndex[slices.Length]; for (int i = 0; i < slices.Length; i++) { slices[i] = new ReaderSlice(0, 0, i); indexes[i] = new TermsEnumIndex(subs[i], i); } MultiTermsEnum mte = new MultiTermsEnum(slices); mte.Reset(indexes); long globalOrd = 0; while (mte.Next() != null) { TermsEnumWithSlice[] matches = mte.MatchArray; for (int i = 0; i < mte.MatchCount; i++) { int segmentIndex = matches[i].Index; long segmentOrd = matches[i].Terms.Ord(); long delta = globalOrd - segmentOrd; // for each unique term, just mark the first segment index/delta where it occurs if (i == 0) { FirstSegments.Add(segmentIndex); GlobalOrdDeltas.Add(delta); } // for each per-segment ord, map it back to the global term. while (segmentOrds[segmentIndex] <= segmentOrd) { OrdDeltas[segmentIndex].Add(delta); segmentOrds[segmentIndex]++; } } globalOrd++; } FirstSegments.Freeze(); GlobalOrdDeltas.Freeze(); for (int i = 0; i < OrdDeltas.Length; ++i) { OrdDeltas[i].Freeze(); } }
/// <summary> /// Build the <seealso cref="PForDeltaDocIdSet"/> instance. </summary> public virtual PForDeltaDocIdSet Build() { Debug.Assert(BufferSize < BLOCK_SIZE); if (Cardinality == 0) { Debug.Assert(PreviousDoc == -1); return(EMPTY); } EncodeBlock(); sbyte[] dataArr = Arrays.CopyOf(Data.Bytes, Data.Length + MAX_BYTE_BLOCK_COUNT); int indexSize = (NumBlocks - 1) / IndexInterval_Renamed + 1; MonotonicAppendingLongBuffer docIDs, offsets; if (indexSize <= 1) { docIDs = offsets = SINGLE_ZERO_BUFFER; } else { const int pageSize = 128; int initialPageCount = (indexSize + pageSize - 1) / pageSize; docIDs = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, PackedInts.COMPACT); offsets = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, PackedInts.COMPACT); // Now build the index Iterator it = new Iterator(dataArr, Cardinality, int.MaxValue, SINGLE_ZERO_BUFFER, SINGLE_ZERO_BUFFER); for (int k = 0; k < indexSize; ++k) { docIDs.Add(it.DocID() + 1); offsets.Add(it.Offset); for (int i = 0; i < IndexInterval_Renamed; ++i) { it.SkipBlock(); if (it.DocID() == DocIdSetIterator.NO_MORE_DOCS) { goto indexBreak; } } indexContinue :; } indexBreak : docIDs.Freeze(); offsets.Freeze(); } return(new PForDeltaDocIdSet(dataArr, Cardinality, IndexInterval_Renamed, docIDs, offsets)); }
static PForDeltaDocIdSet() { SINGLE_ZERO_BUFFER.Add(0); SINGLE_ZERO_BUFFER.Freeze(); int maxByteBLockCount = 0; for (int i = 1; i < ITERATIONS.Length; ++i) { DECODERS[i] = PackedInts.GetDecoder(PackedInts.Format.PACKED, PackedInts.VERSION_CURRENT, i); Debug.Assert(BLOCK_SIZE % DECODERS[i].ByteValueCount() == 0); ITERATIONS[i] = BLOCK_SIZE / DECODERS[i].ByteValueCount(); BYTE_BLOCK_COUNTS[i] = ITERATIONS[i] * DECODERS[i].ByteBlockCount(); maxByteBLockCount = Math.Max(maxByteBLockCount, DECODERS[i].ByteBlockCount()); } MAX_BYTE_BLOCK_COUNT = maxByteBLockCount; }
public static DocMap Build(int maxDoc, Bits liveDocs) { Debug.Assert(liveDocs != null); MonotonicAppendingLongBuffer docMap = new MonotonicAppendingLongBuffer(); int del = 0; for (int i = 0; i < maxDoc; ++i) { docMap.Add(i - del); if (!liveDocs.Get(i)) { ++del; } } docMap.Freeze(); int numDeletedDocs = del; Debug.Assert(docMap.Size() == maxDoc); return(new DocMapAnonymousInnerClassHelper(maxDoc, liveDocs, docMap, numDeletedDocs)); }
internal virtual MonotonicAppendingLongBuffer GetDeletes(IList <AtomicReader> readers) { MonotonicAppendingLongBuffer deletes = new MonotonicAppendingLongBuffer(); int deleteCount = 0; foreach (AtomicReader reader in readers) { int maxDoc = reader.MaxDoc; Bits liveDocs = reader.LiveDocs; for (int i = 0; i < maxDoc; ++i) { if (liveDocs != null && !liveDocs.Get(i)) { ++deleteCount; } else { deletes.Add(deleteCount); } } } deletes.Freeze(); return(deletes); }
static WAH8DocIdSet() { SINGLE_ZERO_BUFFER.Add(0L); SINGLE_ZERO_BUFFER.Freeze(); }
/// <summary> /// Build the <seealso cref="PForDeltaDocIdSet"/> instance. </summary> public virtual PForDeltaDocIdSet Build() { Debug.Assert(BufferSize < BLOCK_SIZE); if (Cardinality == 0) { Debug.Assert(PreviousDoc == -1); return EMPTY; } EncodeBlock(); sbyte[] dataArr = Arrays.CopyOf(Data.Bytes, Data.Length + MAX_BYTE_BLOCK_COUNT); int indexSize = (NumBlocks - 1) / IndexInterval_Renamed + 1; MonotonicAppendingLongBuffer docIDs, offsets; if (indexSize <= 1) { docIDs = offsets = SINGLE_ZERO_BUFFER; } else { const int pageSize = 128; int initialPageCount = (indexSize + pageSize - 1) / pageSize; docIDs = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, PackedInts.COMPACT); offsets = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, PackedInts.COMPACT); // Now build the index Iterator it = new Iterator(dataArr, Cardinality, int.MaxValue, SINGLE_ZERO_BUFFER, SINGLE_ZERO_BUFFER); for (int k = 0; k < indexSize; ++k) { docIDs.Add(it.DocID() + 1); offsets.Add(it.Offset); for (int i = 0; i < IndexInterval_Renamed; ++i) { it.SkipBlock(); if (it.DocID() == DocIdSetIterator.NO_MORE_DOCS) { goto indexBreak; } } indexContinue: ; } indexBreak: docIDs.Freeze(); offsets.Freeze(); } return new PForDeltaDocIdSet(dataArr, Cardinality, IndexInterval_Renamed, docIDs, offsets); }
/// <summary> /// Creates an ordinal map that allows mapping ords to/from a merged /// space from <code>subs</code>. </summary> /// <param name="owner"> a cache key </param> /// <param name="subs"> TermsEnums that support <seealso cref="TermsEnum#ord()"/>. They need /// not be dense (e.g. can be FilteredTermsEnums}. </param> /// <exception cref="IOException"> if an I/O error occurred. </exception> public OrdinalMap(object owner, TermsEnum[] subs) { // create the ordinal mappings by pulling a termsenum over each sub's // unique terms, and walking a multitermsenum over those this.Owner = owner; GlobalOrdDeltas = new MonotonicAppendingLongBuffer(PackedInts.COMPACT); FirstSegments = new AppendingPackedLongBuffer(PackedInts.COMPACT); OrdDeltas = new MonotonicAppendingLongBuffer[subs.Length]; for (int i = 0; i < OrdDeltas.Length; i++) { OrdDeltas[i] = new MonotonicAppendingLongBuffer(); } long[] segmentOrds = new long[subs.Length]; ReaderSlice[] slices = new ReaderSlice[subs.Length]; TermsEnumIndex[] indexes = new TermsEnumIndex[slices.Length]; for (int i = 0; i < slices.Length; i++) { slices[i] = new ReaderSlice(0, 0, i); indexes[i] = new TermsEnumIndex(subs[i], i); } MultiTermsEnum mte = new MultiTermsEnum(slices); mte.Reset(indexes); long globalOrd = 0; while (mte.Next() != null) { TermsEnumWithSlice[] matches = mte.MatchArray; for (int i = 0; i < mte.MatchCount; i++) { int segmentIndex = matches[i].Index; long segmentOrd = matches[i].Terms.Ord(); long delta = globalOrd - segmentOrd; // for each unique term, just mark the first segment index/delta where it occurs if (i == 0) { FirstSegments.Add(segmentIndex); GlobalOrdDeltas.Add(delta); } // for each per-segment ord, map it back to the global term. while (segmentOrds[segmentIndex] <= segmentOrd) { OrdDeltas[segmentIndex].Add(delta); segmentOrds[segmentIndex]++; } } globalOrd++; } FirstSegments.Freeze(); GlobalOrdDeltas.Freeze(); for (int i = 0; i < OrdDeltas.Length; ++i) { OrdDeltas[i].Freeze(); } }
public static DocMap Build(int maxDoc, Bits liveDocs) { Debug.Assert(liveDocs != null); MonotonicAppendingLongBuffer docMap = new MonotonicAppendingLongBuffer(); int del = 0; for (int i = 0; i < maxDoc; ++i) { docMap.Add(i - del); if (!liveDocs.Get(i)) { ++del; } } docMap.Freeze(); int numDeletedDocs = del; Debug.Assert(docMap.Size() == maxDoc); return new DocMapAnonymousInnerClassHelper(maxDoc, liveDocs, docMap, numDeletedDocs); }