internal virtual void AddPosition(int position, int startOffset, int length, int payloadLength) { if (hasPositions) { if (posStart + totalPositions == outerInstance.positionsBuf.Length) { outerInstance.positionsBuf = ArrayUtil.Grow(outerInstance.positionsBuf); } outerInstance.positionsBuf[posStart + totalPositions] = position; } if (hasOffsets) { if (offStart + totalPositions == outerInstance.startOffsetsBuf.Length) { int newLength = ArrayUtil.Oversize(offStart + totalPositions, 4); outerInstance.startOffsetsBuf = Arrays.CopyOf(outerInstance.startOffsetsBuf, newLength); outerInstance.lengthsBuf = Arrays.CopyOf(outerInstance.lengthsBuf, newLength); } outerInstance.startOffsetsBuf[offStart + totalPositions] = startOffset; outerInstance.lengthsBuf[offStart + totalPositions] = length; } if (hasPayloads) { if (payStart + totalPositions == outerInstance.payloadLengthsBuf.Length) { outerInstance.payloadLengthsBuf = ArrayUtil.Grow(outerInstance.payloadLengthsBuf); } outerInstance.payloadLengthsBuf[payStart + totalPositions] = payloadLength; } ++totalPositions; }
/// <summary> /// Sole constructor. </summary> public CompressingTermVectorsWriter(Directory directory, SegmentInfo si, string segmentSuffix, IOContext context, string formatName, CompressionMode compressionMode, int chunkSize) { if (Debugging.AssertsEnabled) { Debugging.Assert(directory != null); } this.directory = directory; this.segment = si.Name; this.segmentSuffix = segmentSuffix; this.compressionMode = compressionMode; this.compressor = compressionMode.NewCompressor(); this.chunkSize = chunkSize; numDocs = 0; pendingDocs = new LinkedList <DocData>(); termSuffixes = new GrowableByteArrayDataOutput(ArrayUtil.Oversize(chunkSize, 1)); payloadBytes = new GrowableByteArrayDataOutput(ArrayUtil.Oversize(1, 1)); lastTerm = new BytesRef(ArrayUtil.Oversize(30, 1)); bool success = false; IndexOutput indexStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION), context); try { vectorsStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, segmentSuffix, VECTORS_EXTENSION), context); string codecNameIdx = formatName + CODEC_SFX_IDX; string codecNameDat = formatName + CODEC_SFX_DAT; CodecUtil.WriteHeader(indexStream, codecNameIdx, VERSION_CURRENT); CodecUtil.WriteHeader(vectorsStream, codecNameDat, VERSION_CURRENT); if (Debugging.AssertsEnabled) { Debugging.Assert(CodecUtil.HeaderLength(codecNameDat) == vectorsStream.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream Debugging.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream } indexWriter = new CompressingStoredFieldsIndexWriter(indexStream); indexStream = null; vectorsStream.WriteVInt32(PackedInt32s.VERSION_CURRENT); vectorsStream.WriteVInt32(chunkSize); writer = new BlockPackedWriter(vectorsStream, BLOCK_SIZE); positionsBuf = new int[1024]; startOffsetsBuf = new int[1024]; lengthsBuf = new int[1024]; payloadLengthsBuf = new int[1024]; success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(indexStream); Abort(); } } }
public override void StartTerm(BytesRef term, int freq) { Debug.Assert(freq >= 1); int prefix = StringHelper.BytesDifference(lastTerm, term); curField.AddTerm(freq, prefix, term.Length - prefix); termSuffixes.WriteBytes(term.Bytes, term.Offset + prefix, term.Length - prefix); // copy last term if (lastTerm.Bytes.Length < term.Length) { lastTerm.Bytes = new byte[ArrayUtil.Oversize(term.Length, 1)]; } lastTerm.Offset = 0; lastTerm.Length = term.Length; Array.Copy(term.Bytes, term.Offset, lastTerm.Bytes, 0, term.Length); }
public override void Decompress(DataInput @in, int originalLength, int offset, int length, BytesRef bytes) { Debug.Assert(offset + length <= originalLength); // add 7 padding bytes, this is not necessary but can help decompression run faster if (bytes.Bytes.Length < originalLength + 7) { bytes.Bytes = new byte[ArrayUtil.Oversize(originalLength + 7, 1)]; } int decompressedLength = LZ4.Decompress(@in, offset + length, bytes.Bytes, 0); if (decompressedLength > originalLength) { throw new CorruptIndexException("Corrupted: lengths mismatch: " + decompressedLength + " > " + originalLength + " (resource=" + @in + ")"); } bytes.Offset = offset; bytes.Length = length; }
internal readonly PackedInt32s.Reader[] startPointersDeltas; // delta from the avg // It is the responsibility of the caller to close fieldsIndexIn after this constructor // has been called internal CompressingStoredFieldsIndexReader(IndexInput fieldsIndexIn, SegmentInfo si) { maxDoc = si.DocCount; int[] docBases = new int[16]; long[] startPointers = new long[16]; int[] avgChunkDocs = new int[16]; long[] avgChunkSizes = new long[16]; PackedInt32s.Reader[] docBasesDeltas = new PackedInt32s.Reader[16]; PackedInt32s.Reader[] startPointersDeltas = new PackedInt32s.Reader[16]; int packedIntsVersion = fieldsIndexIn.ReadVInt32(); int blockCount = 0; for (; ;) { int numChunks = fieldsIndexIn.ReadVInt32(); if (numChunks == 0) { break; } if (blockCount == docBases.Length) { int newSize = ArrayUtil.Oversize(blockCount + 1, 8); docBases = Arrays.CopyOf(docBases, newSize); startPointers = Arrays.CopyOf(startPointers, newSize); avgChunkDocs = Arrays.CopyOf(avgChunkDocs, newSize); avgChunkSizes = Arrays.CopyOf(avgChunkSizes, newSize); docBasesDeltas = Arrays.CopyOf(docBasesDeltas, newSize); startPointersDeltas = Arrays.CopyOf(startPointersDeltas, newSize); } // doc bases docBases[blockCount] = fieldsIndexIn.ReadVInt32(); avgChunkDocs[blockCount] = fieldsIndexIn.ReadVInt32(); int bitsPerDocBase = fieldsIndexIn.ReadVInt32(); if (bitsPerDocBase > 32) { throw new CorruptIndexException("Corrupted bitsPerDocBase (resource=" + fieldsIndexIn + ")"); } docBasesDeltas[blockCount] = PackedInt32s.GetReaderNoHeader(fieldsIndexIn, PackedInt32s.Format.PACKED, packedIntsVersion, numChunks, bitsPerDocBase); // start pointers startPointers[blockCount] = fieldsIndexIn.ReadVInt64(); avgChunkSizes[blockCount] = fieldsIndexIn.ReadVInt64(); int bitsPerStartPointer = fieldsIndexIn.ReadVInt32(); if (bitsPerStartPointer > 64) { throw new CorruptIndexException("Corrupted bitsPerStartPointer (resource=" + fieldsIndexIn + ")"); } startPointersDeltas[blockCount] = PackedInt32s.GetReaderNoHeader(fieldsIndexIn, PackedInt32s.Format.PACKED, packedIntsVersion, numChunks, bitsPerStartPointer); ++blockCount; } this.docBases = Arrays.CopyOf(docBases, blockCount); this.startPointers = Arrays.CopyOf(startPointers, blockCount); this.avgChunkDocs = Arrays.CopyOf(avgChunkDocs, blockCount); this.avgChunkSizes = Arrays.CopyOf(avgChunkSizes, blockCount); this.docBasesDeltas = Arrays.CopyOf(docBasesDeltas, blockCount); this.startPointersDeltas = Arrays.CopyOf(startPointersDeltas, blockCount); }
public override void AddProx(int numProx, DataInput positions, DataInput offsets) { if (Debugging.AssertsEnabled) { Debugging.Assert((curField.hasPositions) == (positions != null)); Debugging.Assert((curField.hasOffsets) == (offsets != null)); } if (curField.hasPositions) { int posStart = curField.posStart + curField.totalPositions; if (posStart + numProx > positionsBuf.Length) { positionsBuf = ArrayUtil.Grow(positionsBuf, posStart + numProx); } int position = 0; if (curField.hasPayloads) { int payStart = curField.payStart + curField.totalPositions; if (payStart + numProx > payloadLengthsBuf.Length) { payloadLengthsBuf = ArrayUtil.Grow(payloadLengthsBuf, payStart + numProx); } for (int i = 0; i < numProx; ++i) { int code = positions.ReadVInt32(); if ((code & 1) != 0) { // this position has a payload int payloadLength = positions.ReadVInt32(); payloadLengthsBuf[payStart + i] = payloadLength; payloadBytes.CopyBytes(positions, payloadLength); } else { payloadLengthsBuf[payStart + i] = 0; } position += code.TripleShift(1); positionsBuf[posStart + i] = position; } } else { for (int i = 0; i < numProx; ++i) { position += positions.ReadVInt32().TripleShift(1); positionsBuf[posStart + i] = position; } } } if (curField.hasOffsets) { int offStart = curField.offStart + curField.totalPositions; if (offStart + numProx > startOffsetsBuf.Length) { int newLength = ArrayUtil.Oversize(offStart + numProx, 4); startOffsetsBuf = Arrays.CopyOf(startOffsetsBuf, newLength); lengthsBuf = Arrays.CopyOf(lengthsBuf, newLength); } int lastOffset = 0, startOffset, endOffset; for (int i = 0; i < numProx; ++i) { startOffset = lastOffset + offsets.ReadVInt32(); endOffset = startOffset + offsets.ReadVInt32(); lastOffset = endOffset; startOffsetsBuf[offStart + i] = startOffset; lengthsBuf[offStart + i] = endOffset - startOffset; } } curField.totalPositions += numProx; }