// TODO: once we remove the deprecated termText() method // and switch entirely to char[] termBuffer we don't need // to use this method anymore, only for late init of the buffer private void InitTermBuffer() { if (termBuffer == null) { if (termText == null) { termBuffer = new char[ArrayUtil.GetNextSize(MIN_BUFFER_SIZE)]; termLength = 0; } else { int length = termText.Length; if (length < MIN_BUFFER_SIZE) { length = MIN_BUFFER_SIZE; } termBuffer = new char[ArrayUtil.GetNextSize(length)]; termLength = termText.Length; SupportClass.TextSupport.GetCharsFromString(termText, 0, termText.Length, termBuffer, 0); termText = null; } } else { termText = null; } }
internal virtual void AddPosition(int position, int startOffset, int length, int payloadLength) { if (hasPositions) { if (posStart + totalPositions == outerInstance.positionsBuf.Length) { outerInstance.positionsBuf = ArrayUtil.Grow(outerInstance.positionsBuf); } outerInstance.positionsBuf[posStart + totalPositions] = position; } if (hasOffsets) { if (offStart + totalPositions == outerInstance.startOffsetsBuf.Length) { int newLength = ArrayUtil.Oversize(offStart + totalPositions, 4); outerInstance.startOffsetsBuf = Arrays.CopyOf(outerInstance.startOffsetsBuf, newLength); outerInstance.lengthsBuf = Arrays.CopyOf(outerInstance.lengthsBuf, newLength); } outerInstance.startOffsetsBuf[offStart + totalPositions] = startOffset; outerInstance.lengthsBuf[offStart + totalPositions] = length; } if (hasPayloads) { if (payStart + totalPositions == outerInstance.payloadLengthsBuf.Length) { outerInstance.payloadLengthsBuf = ArrayUtil.Grow(outerInstance.payloadLengthsBuf); } outerInstance.payloadLengthsBuf[payStart + totalPositions] = payloadLength; } ++totalPositions; }
private void CheckReadBytes(IndexInput input, int size, int pos) { // Just to see that "offset" is treated properly in readBytes(), we // add an arbitrary offset at the beginning of the array int offset = size % 10; // arbitrary buffer = ArrayUtil.Grow(buffer, offset + size); Assert.AreEqual(pos, input.GetFilePointer()); long left = TEST_FILE_LENGTH - input.GetFilePointer(); if (left <= 0) { return; } else if (left < size) { size = (int)left; } input.ReadBytes(buffer, offset, size); Assert.AreEqual(pos + size, input.GetFilePointer()); for (int i = 0; i < size; i++) { Assert.AreEqual(Byten(pos + i), buffer[offset + i], "pos=" + i + " filepos=" + (pos + i)); } }
public void Reset() { // Shrink back if we are overallocated now: docIDs = ArrayUtil.Shrink(docIDs, upto); norms = ArrayUtil.Shrink(norms, upto); upto = 0; }
/// <summary>Grows the termBuffer to at least size newSize, preserving the /// existing content. Note: If the next operation is to change /// the contents of the term buffer use /// {@link #SetTermBuffer(char[], int, int)}, /// {@link #SetTermBuffer(String)}, or /// {@link #SetTermBuffer(String, int, int)} /// to optimally combine the resize with the setting of the termBuffer. /// </summary> /// <param name="newSize">minimum size of the new termBuffer /// </param> /// <returns> newly created termBuffer with length >= newSize /// </returns> public virtual char[] ResizeTermBuffer(int newSize) { if (termBuffer == null) { // The buffer is always at least MIN_BUFFER_SIZE newSize = newSize < MIN_BUFFER_SIZE?MIN_BUFFER_SIZE:newSize; //Preserve termText if (termText != null) { int ttLen = termText.Length; newSize = newSize < ttLen?ttLen:newSize; termBuffer = new char[ArrayUtil.GetNextSize(newSize)]; SupportClass.TextSupport.GetCharsFromString(termText, 0, termText.Length, termBuffer, 0); termText = null; } else { // no term Text, the first allocation termBuffer = new char[ArrayUtil.GetNextSize(newSize)]; } } else { if (termBuffer.Length < newSize) { // Not big enough; create a new array with slight // over allocation and preserve content char[] newCharBuffer = new char[ArrayUtil.GetNextSize(newSize)]; Array.Copy(termBuffer, 0, newCharBuffer, 0, termBuffer.Length); termBuffer = newCharBuffer; } } return(termBuffer); }
/// <summary> /// Sole constructor. </summary> public CompressingTermVectorsWriter(Directory directory, SegmentInfo si, string segmentSuffix, IOContext context, string formatName, CompressionMode compressionMode, int chunkSize) { if (Debugging.AssertsEnabled) { Debugging.Assert(directory != null); } this.directory = directory; this.segment = si.Name; this.segmentSuffix = segmentSuffix; this.compressionMode = compressionMode; this.compressor = compressionMode.NewCompressor(); this.chunkSize = chunkSize; numDocs = 0; pendingDocs = new LinkedList <DocData>(); termSuffixes = new GrowableByteArrayDataOutput(ArrayUtil.Oversize(chunkSize, 1)); payloadBytes = new GrowableByteArrayDataOutput(ArrayUtil.Oversize(1, 1)); lastTerm = new BytesRef(ArrayUtil.Oversize(30, 1)); bool success = false; IndexOutput indexStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION), context); try { vectorsStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, segmentSuffix, VECTORS_EXTENSION), context); string codecNameIdx = formatName + CODEC_SFX_IDX; string codecNameDat = formatName + CODEC_SFX_DAT; CodecUtil.WriteHeader(indexStream, codecNameIdx, VERSION_CURRENT); CodecUtil.WriteHeader(vectorsStream, codecNameDat, VERSION_CURRENT); if (Debugging.AssertsEnabled) { Debugging.Assert(CodecUtil.HeaderLength(codecNameDat) == vectorsStream.GetFilePointer()); Debugging.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.GetFilePointer()); } indexWriter = new CompressingStoredFieldsIndexWriter(indexStream); indexStream = null; vectorsStream.WriteVInt32(PackedInt32s.VERSION_CURRENT); vectorsStream.WriteVInt32(chunkSize); writer = new BlockPackedWriter(vectorsStream, BLOCK_SIZE); positionsBuf = new int[1024]; startOffsetsBuf = new int[1024]; lengthsBuf = new int[1024]; payloadLengthsBuf = new int[1024]; success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(indexStream); Abort(); } } }
public override int GetHashCode() { InitTermBuffer(); int code = termLength; code = code * 31 + ArrayUtil.HashCode(termBuffer, 0, termLength); return(code); }
private void InitTermBuffer() { if (termBuffer == null) { termBuffer = new char[ArrayUtil.GetNextSize(MIN_BUFFER_SIZE)]; termLength = 0; } }
public void GetPostings(RawPostingList[] postings) { lock (this) { System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermsHash.getPostings start")); System.Diagnostics.Debug.Assert(postingsFreeCount <= postingsFreeList.Length); System.Diagnostics.Debug.Assert(postingsFreeCount <= postingsAllocCount, "postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount); int numToCopy; if (postingsFreeCount < postings.Length) { numToCopy = postingsFreeCount; } else { numToCopy = postings.Length; } int start = postingsFreeCount - numToCopy; System.Diagnostics.Debug.Assert(start >= 0); System.Diagnostics.Debug.Assert(start + numToCopy <= postingsFreeList.Length); System.Diagnostics.Debug.Assert(numToCopy <= postings.Length); Array.Copy(postingsFreeList, start, postings, 0, numToCopy); // Directly allocate the remainder if any if (numToCopy != postings.Length) { int extra = postings.Length - numToCopy; int newPostingsAllocCount = postingsAllocCount + extra; consumer.CreatePostings(postings, numToCopy, extra); System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermsHash.getPostings after create")); postingsAllocCount += extra; if (trackAllocations) { docWriter.BytesAllocated(extra * bytesPerPosting); } if (newPostingsAllocCount > postingsFreeList.Length) { // Pre-allocate the postingsFreeList so it's large // enough to hold all postings we've given out postingsFreeList = new RawPostingList[ArrayUtil.GetNextSize(newPostingsAllocCount)]; } } postingsFreeCount -= numToCopy; if (trackAllocations) { docWriter.BytesUsed(postings.Length * bytesPerPosting); } } }
internal void AddField(int fieldNumber) { if (numVectorFields == fieldNumbers.Length) { fieldNumbers = ArrayUtil.Grow(fieldNumbers); fieldPointers = ArrayUtil.Grow(fieldPointers); } fieldNumbers[numVectorFields] = fieldNumber; fieldPointers[numVectorFields] = perDocTvf.FilePointer; numVectorFields++; }
internal void addField(int fieldNumber) { if (numVectorFields == fieldNumbers.Length) { fieldNumbers = ArrayUtil.Grow(fieldNumbers); fieldPointers = ArrayUtil.Grow(fieldPointers); } fieldNumbers[numVectorFields] = fieldNumber; fieldPointers[numVectorFields] = tvf.GetFilePointer(); numVectorFields++; }
internal void ShrinkFreePostings(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { System.Diagnostics.Debug.Assert(postingsFreeCount == postingsAllocCount, "Thread.currentThread().getName()" + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer); int newSize = ArrayUtil.GetShrinkSize(postingsFreeList.Length, postingsAllocCount); if (newSize != postingsFreeList.Length) { RawPostingList[] newArray = new RawPostingList[newSize]; Array.Copy(postingsFreeList, 0, newArray, 0, postingsFreeCount); postingsFreeList = newArray; } }
internal void shrinkFreePostings(IDictionary <object, ICollection <object> > threadsAndFields, DocumentsWriter.FlushState state) { System.Diagnostics.Debug.Assert(postingsFreeCount == postingsAllocCount, System.Threading.Thread.CurrentThread.Name + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer); int newSize = ArrayUtil.GetShrinkSize(postingsFreeList.Length, postingsAllocCount); if (newSize != postingsFreeList.Length) { RawPostingList[] newArray = new RawPostingList[newSize]; System.Array.Copy(postingsFreeList, 0, newArray, 0, postingsFreeCount); postingsFreeList = newArray; } }
public override int GetHashCode() { InitTermBuffer(); int code = termLength; code = code * 31 + startOffset; code = code * 31 + endOffset; code = code * 31 + flags; code = code * 31 + positionIncrement; code = code * 31 + type.GetHashCode(); code = (payload == null ? code : code * 31 + payload.GetHashCode()); code = code * 31 + ArrayUtil.HashCode(termBuffer, 0, termLength); return(code); }
internal virtual FormatPostingsDocsConsumer AddTerm(System.String text) { int len = text.Length; if (termBuffer == null || termBuffer.Length < 1 + len) { termBuffer = new char[ArrayUtil.GetNextSize(1 + len)]; } for (int i = 0; i < len; i++) { termBuffer[i] = (char)text[i]; } termBuffer[len] = (char)(0xffff); return(AddTerm(termBuffer, 0)); }
public override void StartTerm(BytesRef term, int freq) { Debug.Assert(freq >= 1); int prefix = StringHelper.BytesDifference(lastTerm, term); curField.AddTerm(freq, prefix, term.Length - prefix); termSuffixes.WriteBytes(term.Bytes, term.Offset + prefix, term.Length - prefix); // copy last term if (lastTerm.Bytes.Length < term.Length) { lastTerm.Bytes = new byte[ArrayUtil.Oversize(term.Length, 1)]; } lastTerm.Offset = 0; lastTerm.Length = term.Length; Array.Copy(term.Bytes, term.Offset, lastTerm.Bytes, 0, term.Length); }
internal override void Finish() { System.Diagnostics.Debug.Assert(docIDs.Length == norms.Length); if (fieldInfo.isIndexed && !fieldInfo.omitNorms) { if (docIDs.Length <= upto) { System.Diagnostics.Debug.Assert(docIDs.Length == upto); docIDs = ArrayUtil.Grow(docIDs, 1 + upto); norms = ArrayUtil.Grow(norms, 1 + upto); } float norm = docState.similarity.ComputeNorm(fieldInfo.name, fieldState); norms[upto] = Similarity.EncodeNorm(norm); docIDs[upto] = docState.docID; upto++; } }
/// <summary>Allocates a buffer char[] of at least newSize, without preserving the existing content. /// its always used in places that set the content /// </summary> /// <param name="newSize">minimum size of the buffer /// </param> private void GrowTermBuffer(int newSize) { if (termBuffer == null) { // The buffer is always at least MIN_BUFFER_SIZE termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE?MIN_BUFFER_SIZE:newSize)]; } else { if (termBuffer.Length < newSize) { // Not big enough; create a new array with slight // over allocation: termBuffer = new char[ArrayUtil.GetNextSize(newSize)]; } } }
public override void Decompress(DataInput @in, int originalLength, int offset, int length, BytesRef bytes) { Debug.Assert(offset + length <= originalLength); // add 7 padding bytes, this is not necessary but can help decompression run faster if (bytes.Bytes.Length < originalLength + 7) { bytes.Bytes = new byte[ArrayUtil.Oversize(originalLength + 7, 1)]; } int decompressedLength = LZ4.Decompress(@in, offset + length, bytes.Bytes, 0); if (decompressedLength > originalLength) { throw new CorruptIndexException("Corrupted: lengths mismatch: " + decompressedLength + " > " + originalLength + " (resource=" + @in + ")"); } bytes.Offset = offset; bytes.Length = length; }
/// <summary>Grows the termBuffer to at least size newSize, preserving the /// existing content. Note: If the next operation is to change /// the contents of the term buffer use /// {@link #SetTermBuffer(char[], int, int)}, /// {@link #SetTermBuffer(String)}, or /// {@link #SetTermBuffer(String, int, int)} /// to optimally combine the resize with the setting of the termBuffer. /// </summary> /// <param name="newSize">minimum size of the new termBuffer /// </param> /// <returns> newly created termBuffer with length >= newSize /// </returns> public virtual char[] ResizeTermBuffer(int newSize) { if (termBuffer == null) { // The buffer is always at least MIN_BUFFER_SIZE termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE?MIN_BUFFER_SIZE:newSize)]; } else { if (termBuffer.Length < newSize) { // Not big enough; create a new array with slight // over allocation and preserve content char[] newCharBuffer = new char[ArrayUtil.GetNextSize(newSize)]; Array.Copy(termBuffer, 0, newCharBuffer, 0, termBuffer.Length); termBuffer = newCharBuffer; } } return(termBuffer); }
/// <summary> /// Allocates a buffer char[] of at least newSize. /// </summary> /// <param name="newSize">minimum size of the buffer</param> /// <returns>newly created buffer with length >= newSize or null if the current termBuffer is big enough</returns> private char[] GrowTermBuffer(int newSize) { if (termBuffer != null) { if (termBuffer.Length >= newSize) { // Already big enough return(null); } else { // Not big enough; create a new array with slight // over-allocation return(new char[ArrayUtil.GetNextSize(newSize)]); } } else { // determine the best size // The buffer is always at least MIN_BUFFER_SIZE if (newSize < MIN_BUFFER_SIZE) { newSize = MIN_BUFFER_SIZE; } // If there is already a termText, then the size has to be at least that big if (termText != null) { int ttLengh = termText.Length; if (newSize < ttLengh) { newSize = ttLengh; } } return(new char[newSize]); } }
internal PerDoc GetPerDoc() { lock (this) { if (freeCount == 0) { allocCount++; if (allocCount > docFreeList.Length) { // Grow our free list up front to make sure we have // enough space to recycle all outstanding PerDoc // instances System.Diagnostics.Debug.Assert(allocCount == 1 + docFreeList.Length); docFreeList = new PerDoc[ArrayUtil.GetNextSize(allocCount)]; } return(new PerDoc(this)); } else { return(docFreeList[--freeCount]); } } }
/// <summary> /// Subclass & override if you change the encoding. /// </summary> protected virtual void Decode(BytesRef buf, Int32sRef ordinals) { // grow the buffer up front, even if by a large number of values (buf.length) // that saves the need to check inside the loop for every decoded value if // the buffer needs to grow. if (ordinals.Int32s.Length < buf.Length) { ordinals.Int32s = ArrayUtil.Grow(ordinals.Int32s, buf.Length); } ordinals.Offset = 0; ordinals.Length = 0; // it is better if the decoding is inlined like so, and not e.g. // in a utility method int upto = buf.Offset + buf.Length; int value = 0; int offset = buf.Offset; int prev = 0; while (offset < upto) { byte b = buf.Bytes[offset++]; if ((sbyte)b >= 0) { ordinals.Int32s[ordinals.Length] = ((value << 7) | b) + prev; value = 0; prev = ordinals.Int32s[ordinals.Length]; ordinals.Length++; } else { value = (value << 7) | (b & 0x7F); } } }
public override int GetHashCode() { return(ArrayUtil.HashCode(data, internalOffset, internalOffset + internalLength)); }
public override int GetHashCode() { return(ArrayUtil.HashCode(data, offset, offset + length)); }
public override void AddProx(int numProx, DataInput positions, DataInput offsets) { Debug.Assert((curField.hasPositions) == (positions != null)); Debug.Assert((curField.hasOffsets) == (offsets != null)); if (curField.hasPositions) { int posStart = curField.posStart + curField.totalPositions; if (posStart + numProx > positionsBuf.Length) { positionsBuf = ArrayUtil.Grow(positionsBuf, posStart + numProx); } int position = 0; if (curField.hasPayloads) { int payStart = curField.payStart + curField.totalPositions; if (payStart + numProx > payloadLengthsBuf.Length) { payloadLengthsBuf = ArrayUtil.Grow(payloadLengthsBuf, payStart + numProx); } for (int i = 0; i < numProx; ++i) { int code = positions.ReadVInt32(); if ((code & 1) != 0) { // this position has a payload int payloadLength = positions.ReadVInt32(); payloadLengthsBuf[payStart + i] = payloadLength; payloadBytes.CopyBytes(positions, payloadLength); } else { payloadLengthsBuf[payStart + i] = 0; } position += (int)((uint)code >> 1); positionsBuf[posStart + i] = position; } } else { for (int i = 0; i < numProx; ++i) { position += ((int)((uint)positions.ReadVInt32() >> 1)); positionsBuf[posStart + i] = position; } } } if (curField.hasOffsets) { int offStart = curField.offStart + curField.totalPositions; if (offStart + numProx > startOffsetsBuf.Length) { int newLength = ArrayUtil.Oversize(offStart + numProx, 4); startOffsetsBuf = Arrays.CopyOf(startOffsetsBuf, newLength); lengthsBuf = Arrays.CopyOf(lengthsBuf, newLength); } int lastOffset = 0, startOffset, endOffset; for (int i = 0; i < numProx; ++i) { startOffset = lastOffset + offsets.ReadVInt32(); endOffset = startOffset + offsets.ReadVInt32(); lastOffset = endOffset; startOffsetsBuf[offStart + i] = startOffset; lengthsBuf[offStart + i] = endOffset - startOffset; } } curField.totalPositions += numProx; }
internal readonly PackedInt32s.Reader[] startPointersDeltas; // delta from the avg // It is the responsibility of the caller to close fieldsIndexIn after this constructor // has been called internal CompressingStoredFieldsIndexReader(IndexInput fieldsIndexIn, SegmentInfo si) { maxDoc = si.DocCount; int[] docBases = new int[16]; long[] startPointers = new long[16]; int[] avgChunkDocs = new int[16]; long[] avgChunkSizes = new long[16]; PackedInt32s.Reader[] docBasesDeltas = new PackedInt32s.Reader[16]; PackedInt32s.Reader[] startPointersDeltas = new PackedInt32s.Reader[16]; int packedIntsVersion = fieldsIndexIn.ReadVInt32(); int blockCount = 0; for (; ;) { int numChunks = fieldsIndexIn.ReadVInt32(); if (numChunks == 0) { break; } if (blockCount == docBases.Length) { int newSize = ArrayUtil.Oversize(blockCount + 1, 8); docBases = Arrays.CopyOf(docBases, newSize); startPointers = Arrays.CopyOf(startPointers, newSize); avgChunkDocs = Arrays.CopyOf(avgChunkDocs, newSize); avgChunkSizes = Arrays.CopyOf(avgChunkSizes, newSize); docBasesDeltas = Arrays.CopyOf(docBasesDeltas, newSize); startPointersDeltas = Arrays.CopyOf(startPointersDeltas, newSize); } // doc bases docBases[blockCount] = fieldsIndexIn.ReadVInt32(); avgChunkDocs[blockCount] = fieldsIndexIn.ReadVInt32(); int bitsPerDocBase = fieldsIndexIn.ReadVInt32(); if (bitsPerDocBase > 32) { throw new CorruptIndexException("Corrupted bitsPerDocBase (resource=" + fieldsIndexIn + ")"); } docBasesDeltas[blockCount] = PackedInt32s.GetReaderNoHeader(fieldsIndexIn, PackedInt32s.Format.PACKED, packedIntsVersion, numChunks, bitsPerDocBase); // start pointers startPointers[blockCount] = fieldsIndexIn.ReadVInt64(); avgChunkSizes[blockCount] = fieldsIndexIn.ReadVInt64(); int bitsPerStartPointer = fieldsIndexIn.ReadVInt32(); if (bitsPerStartPointer > 64) { throw new CorruptIndexException("Corrupted bitsPerStartPointer (resource=" + fieldsIndexIn + ")"); } startPointersDeltas[blockCount] = PackedInt32s.GetReaderNoHeader(fieldsIndexIn, PackedInt32s.Format.PACKED, packedIntsVersion, numChunks, bitsPerStartPointer); ++blockCount; } this.docBases = Arrays.CopyOf(docBases, blockCount); this.startPointers = Arrays.CopyOf(startPointers, blockCount); this.avgChunkDocs = Arrays.CopyOf(avgChunkDocs, blockCount); this.avgChunkSizes = Arrays.CopyOf(avgChunkSizes, blockCount); this.docBasesDeltas = Arrays.CopyOf(docBasesDeltas, blockCount); this.startPointersDeltas = Arrays.CopyOf(startPointersDeltas, blockCount); }