예제 #1
0
 internal virtual void AddPosition(int position, int startOffset, int length, int payloadLength)
 {
     if (hasPositions)
     {
         if (posStart + totalPositions == outerInstance.positionsBuf.Length)
         {
             outerInstance.positionsBuf = ArrayUtil.Grow(outerInstance.positionsBuf);
         }
         outerInstance.positionsBuf[posStart + totalPositions] = position;
     }
     if (hasOffsets)
     {
         if (offStart + totalPositions == outerInstance.startOffsetsBuf.Length)
         {
             int newLength = ArrayUtil.Oversize(offStart + totalPositions, 4);
             outerInstance.startOffsetsBuf = Arrays.CopyOf(outerInstance.startOffsetsBuf, newLength);
             outerInstance.lengthsBuf      = Arrays.CopyOf(outerInstance.lengthsBuf, newLength);
         }
         outerInstance.startOffsetsBuf[offStart + totalPositions] = startOffset;
         outerInstance.lengthsBuf[offStart + totalPositions]      = length;
     }
     if (hasPayloads)
     {
         if (payStart + totalPositions == outerInstance.payloadLengthsBuf.Length)
         {
             outerInstance.payloadLengthsBuf = ArrayUtil.Grow(outerInstance.payloadLengthsBuf);
         }
         outerInstance.payloadLengthsBuf[payStart + totalPositions] = payloadLength;
     }
     ++totalPositions;
 }
예제 #2
0
        /// <summary>
        /// Sole constructor. </summary>
        public CompressingTermVectorsWriter(Directory directory, SegmentInfo si, string segmentSuffix, IOContext context, string formatName, CompressionMode compressionMode, int chunkSize)
        {
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(directory != null);
            }
            this.directory       = directory;
            this.segment         = si.Name;
            this.segmentSuffix   = segmentSuffix;
            this.compressionMode = compressionMode;
            this.compressor      = compressionMode.NewCompressor();
            this.chunkSize       = chunkSize;

            numDocs      = 0;
            pendingDocs  = new LinkedList <DocData>();
            termSuffixes = new GrowableByteArrayDataOutput(ArrayUtil.Oversize(chunkSize, 1));
            payloadBytes = new GrowableByteArrayDataOutput(ArrayUtil.Oversize(1, 1));
            lastTerm     = new BytesRef(ArrayUtil.Oversize(30, 1));

            bool        success     = false;
            IndexOutput indexStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION), context);

            try
            {
                vectorsStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, segmentSuffix, VECTORS_EXTENSION), context);

                string codecNameIdx = formatName + CODEC_SFX_IDX;
                string codecNameDat = formatName + CODEC_SFX_DAT;
                CodecUtil.WriteHeader(indexStream, codecNameIdx, VERSION_CURRENT);
                CodecUtil.WriteHeader(vectorsStream, codecNameDat, VERSION_CURRENT);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(CodecUtil.HeaderLength(codecNameDat) == vectorsStream.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                    Debugging.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.Position);   // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                }

                indexWriter = new CompressingStoredFieldsIndexWriter(indexStream);
                indexStream = null;

                vectorsStream.WriteVInt32(PackedInt32s.VERSION_CURRENT);
                vectorsStream.WriteVInt32(chunkSize);
                writer = new BlockPackedWriter(vectorsStream, BLOCK_SIZE);

                positionsBuf      = new int[1024];
                startOffsetsBuf   = new int[1024];
                lengthsBuf        = new int[1024];
                payloadLengthsBuf = new int[1024];

                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.DisposeWhileHandlingException(indexStream);
                    Abort();
                }
            }
        }
예제 #3
0
        public override void StartTerm(BytesRef term, int freq)
        {
            Debug.Assert(freq >= 1);
            int prefix = StringHelper.BytesDifference(lastTerm, term);

            curField.AddTerm(freq, prefix, term.Length - prefix);
            termSuffixes.WriteBytes(term.Bytes, term.Offset + prefix, term.Length - prefix);
            // copy last term
            if (lastTerm.Bytes.Length < term.Length)
            {
                lastTerm.Bytes = new byte[ArrayUtil.Oversize(term.Length, 1)];
            }
            lastTerm.Offset = 0;
            lastTerm.Length = term.Length;
            Array.Copy(term.Bytes, term.Offset, lastTerm.Bytes, 0, term.Length);
        }
예제 #4
0
            public override void Decompress(DataInput @in, int originalLength, int offset, int length, BytesRef bytes)
            {
                Debug.Assert(offset + length <= originalLength);
                // add 7 padding bytes, this is not necessary but can help decompression run faster
                if (bytes.Bytes.Length < originalLength + 7)
                {
                    bytes.Bytes = new byte[ArrayUtil.Oversize(originalLength + 7, 1)];
                }
                int decompressedLength = LZ4.Decompress(@in, offset + length, bytes.Bytes, 0);

                if (decompressedLength > originalLength)
                {
                    throw new CorruptIndexException("Corrupted: lengths mismatch: " + decompressedLength + " > " + originalLength + " (resource=" + @in + ")");
                }
                bytes.Offset = offset;
                bytes.Length = length;
            }
        internal readonly PackedInt32s.Reader[] startPointersDeltas; // delta from the avg

        // It is the responsibility of the caller to close fieldsIndexIn after this constructor
        // has been called
        internal CompressingStoredFieldsIndexReader(IndexInput fieldsIndexIn, SegmentInfo si)
        {
            maxDoc = si.DocCount;
            int[]  docBases      = new int[16];
            long[] startPointers = new long[16];
            int[]  avgChunkDocs  = new int[16];
            long[] avgChunkSizes = new long[16];
            PackedInt32s.Reader[] docBasesDeltas      = new PackedInt32s.Reader[16];
            PackedInt32s.Reader[] startPointersDeltas = new PackedInt32s.Reader[16];

            int packedIntsVersion = fieldsIndexIn.ReadVInt32();

            int blockCount = 0;

            for (; ;)
            {
                int numChunks = fieldsIndexIn.ReadVInt32();
                if (numChunks == 0)
                {
                    break;
                }
                if (blockCount == docBases.Length)
                {
                    int newSize = ArrayUtil.Oversize(blockCount + 1, 8);
                    docBases            = Arrays.CopyOf(docBases, newSize);
                    startPointers       = Arrays.CopyOf(startPointers, newSize);
                    avgChunkDocs        = Arrays.CopyOf(avgChunkDocs, newSize);
                    avgChunkSizes       = Arrays.CopyOf(avgChunkSizes, newSize);
                    docBasesDeltas      = Arrays.CopyOf(docBasesDeltas, newSize);
                    startPointersDeltas = Arrays.CopyOf(startPointersDeltas, newSize);
                }

                // doc bases
                docBases[blockCount]     = fieldsIndexIn.ReadVInt32();
                avgChunkDocs[blockCount] = fieldsIndexIn.ReadVInt32();
                int bitsPerDocBase = fieldsIndexIn.ReadVInt32();
                if (bitsPerDocBase > 32)
                {
                    throw new CorruptIndexException("Corrupted bitsPerDocBase (resource=" + fieldsIndexIn + ")");
                }
                docBasesDeltas[blockCount] = PackedInt32s.GetReaderNoHeader(fieldsIndexIn, PackedInt32s.Format.PACKED, packedIntsVersion, numChunks, bitsPerDocBase);

                // start pointers
                startPointers[blockCount] = fieldsIndexIn.ReadVInt64();
                avgChunkSizes[blockCount] = fieldsIndexIn.ReadVInt64();
                int bitsPerStartPointer = fieldsIndexIn.ReadVInt32();
                if (bitsPerStartPointer > 64)
                {
                    throw new CorruptIndexException("Corrupted bitsPerStartPointer (resource=" + fieldsIndexIn + ")");
                }
                startPointersDeltas[blockCount] = PackedInt32s.GetReaderNoHeader(fieldsIndexIn, PackedInt32s.Format.PACKED, packedIntsVersion, numChunks, bitsPerStartPointer);

                ++blockCount;
            }

            this.docBases            = Arrays.CopyOf(docBases, blockCount);
            this.startPointers       = Arrays.CopyOf(startPointers, blockCount);
            this.avgChunkDocs        = Arrays.CopyOf(avgChunkDocs, blockCount);
            this.avgChunkSizes       = Arrays.CopyOf(avgChunkSizes, blockCount);
            this.docBasesDeltas      = Arrays.CopyOf(docBasesDeltas, blockCount);
            this.startPointersDeltas = Arrays.CopyOf(startPointersDeltas, blockCount);
        }
예제 #6
0
        public override void AddProx(int numProx, DataInput positions, DataInput offsets)
        {
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert((curField.hasPositions) == (positions != null));
                Debugging.Assert((curField.hasOffsets) == (offsets != null));
            }

            if (curField.hasPositions)
            {
                int posStart = curField.posStart + curField.totalPositions;
                if (posStart + numProx > positionsBuf.Length)
                {
                    positionsBuf = ArrayUtil.Grow(positionsBuf, posStart + numProx);
                }
                int position = 0;
                if (curField.hasPayloads)
                {
                    int payStart = curField.payStart + curField.totalPositions;
                    if (payStart + numProx > payloadLengthsBuf.Length)
                    {
                        payloadLengthsBuf = ArrayUtil.Grow(payloadLengthsBuf, payStart + numProx);
                    }
                    for (int i = 0; i < numProx; ++i)
                    {
                        int code = positions.ReadVInt32();
                        if ((code & 1) != 0)
                        {
                            // this position has a payload
                            int payloadLength = positions.ReadVInt32();
                            payloadLengthsBuf[payStart + i] = payloadLength;
                            payloadBytes.CopyBytes(positions, payloadLength);
                        }
                        else
                        {
                            payloadLengthsBuf[payStart + i] = 0;
                        }
                        position += code.TripleShift(1);
                        positionsBuf[posStart + i] = position;
                    }
                }
                else
                {
                    for (int i = 0; i < numProx; ++i)
                    {
                        position += positions.ReadVInt32().TripleShift(1);
                        positionsBuf[posStart + i] = position;
                    }
                }
            }

            if (curField.hasOffsets)
            {
                int offStart = curField.offStart + curField.totalPositions;
                if (offStart + numProx > startOffsetsBuf.Length)
                {
                    int newLength = ArrayUtil.Oversize(offStart + numProx, 4);
                    startOffsetsBuf = Arrays.CopyOf(startOffsetsBuf, newLength);
                    lengthsBuf      = Arrays.CopyOf(lengthsBuf, newLength);
                }
                int lastOffset = 0, startOffset, endOffset;
                for (int i = 0; i < numProx; ++i)
                {
                    startOffset = lastOffset + offsets.ReadVInt32();
                    endOffset   = startOffset + offsets.ReadVInt32();
                    lastOffset  = endOffset;
                    startOffsetsBuf[offStart + i] = startOffset;
                    lengthsBuf[offStart + i]      = endOffset - startOffset;
                }
            }

            curField.totalPositions += numProx;
        }