private NumericDocValues LoadNumeric(FieldInfo field) { NumericEntry entry = numerics[field.Number]; data.Seek(entry.offset + entry.missingBytes); switch (entry.format) { case TABLE_COMPRESSED: int size = data.ReadVInt32(); if (size > 256) { throw new CorruptIndexException( "TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + data); } var decode = new long[size]; for (int i = 0; i < decode.Length; i++) { decode[i] = data.ReadInt64(); } int formatID = data.ReadVInt32(); int bitsPerValue = data.ReadVInt32(); var ordsReader = PackedInt32s.GetReaderNoHeader(data, PackedInt32s.Format.ById(formatID), entry.packedIntsVersion, maxDoc, bitsPerValue); ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(decode) + ordsReader.RamBytesUsed()); return(new NumericDocValuesAnonymousInnerClassHelper(this, decode, ordsReader)); case DELTA_COMPRESSED: int blockSize = data.ReadVInt32(); var reader = new BlockPackedReader(data, entry.packedIntsVersion, blockSize, maxDoc, false); ramBytesUsed.AddAndGet(reader.RamBytesUsed()); return(reader); case UNCOMPRESSED: var bytes = new byte[maxDoc]; data.ReadBytes(bytes, 0, bytes.Length); ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(bytes)); // LUCENENET: IMPORTANT - some bytes are negative here, so we need to pass as sbyte return(new NumericDocValuesAnonymousInnerClassHelper2(this, (sbyte[])(Array)bytes)); case GCD_COMPRESSED: long min = data.ReadInt64(); long mult = data.ReadInt64(); int quotientBlockSize = data.ReadVInt32(); var quotientReader = new BlockPackedReader(data, entry.packedIntsVersion, quotientBlockSize, maxDoc, false); ramBytesUsed.AddAndGet(quotientReader.RamBytesUsed()); return(new NumericDocValuesAnonymousInnerClassHelper3(this, min, mult, quotientReader)); default: throw new InvalidOperationException(); } }
private NumericDocValues LoadNumeric(FieldInfo field) { NumericEntry entry = numerics[field.Number]; data.Seek(entry.Offset); switch (entry.Format) { case TABLE_COMPRESSED: int size = data.ReadVInt32(); if (size > 256) { throw new CorruptIndexException("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + data); } var decode = new long[size]; for (int i = 0; i < decode.Length; i++) { decode[i] = data.ReadInt64(); } int formatID = data.ReadVInt32(); int bitsPerValue = data.ReadVInt32(); PackedInt32s.Reader ordsReader = PackedInt32s.GetReaderNoHeader(data, PackedInt32s.Format.ById(formatID), entry.PackedInt32sVersion, maxDoc, bitsPerValue); ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(decode) + ordsReader.RamBytesUsed()); return(new NumericDocValuesAnonymousClass(decode, ordsReader)); case DELTA_COMPRESSED: int blockSize = data.ReadVInt32(); var reader = new BlockPackedReader(data, entry.PackedInt32sVersion, blockSize, maxDoc, false); ramBytesUsed.AddAndGet(reader.RamBytesUsed()); return(reader); case UNCOMPRESSED: byte[] bytes = new byte[maxDoc]; data.ReadBytes(bytes, 0, bytes.Length); ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(bytes)); return(new NumericDocValuesAnonymousClass2(bytes)); case GCD_COMPRESSED: long min = data.ReadInt64(); long mult = data.ReadInt64(); int quotientBlockSize = data.ReadVInt32(); BlockPackedReader quotientReader = new BlockPackedReader(data, entry.PackedInt32sVersion, quotientBlockSize, maxDoc, false); ramBytesUsed.AddAndGet(quotientReader.RamBytesUsed()); return(new NumericDocValuesAnonymousClass3(min, mult, quotientReader)); default: throw AssertionError.Create(); } }
/// <summary> /// Sole constructor. </summary> public BlockPackedReader(IndexInput @in, int packedIntsVersion, int blockSize, long valueCount, bool direct) { this.valueCount = valueCount; blockShift = PackedInt32s.CheckBlockSize(blockSize, AbstractBlockPackedWriter.MIN_BLOCK_SIZE, AbstractBlockPackedWriter.MAX_BLOCK_SIZE); blockMask = blockSize - 1; int numBlocks = PackedInt32s.NumBlocks(valueCount, blockSize); long[] minValues = null; subReaders = new PackedInt32s.Reader[numBlocks]; for (int i = 0; i < numBlocks; ++i) { int token = @in.ReadByte() & 0xFF; int bitsPerValue = token.TripleShift(AbstractBlockPackedWriter.BPV_SHIFT); if (bitsPerValue > 64) { throw new Exception("Corrupted"); } if ((token & AbstractBlockPackedWriter.MIN_VALUE_EQUALS_0) == 0) { if (minValues == null) { minValues = new long[numBlocks]; } minValues[i] = BlockPackedReaderIterator.ZigZagDecode(1L + BlockPackedReaderIterator.ReadVInt64(@in)); } if (bitsPerValue == 0) { subReaders[i] = new PackedInt32s.NullReader(blockSize); } else { int size = (int)Math.Min(blockSize, valueCount - (long)i * blockSize); if (direct) { long pointer = @in.GetFilePointer(); subReaders[i] = PackedInt32s.GetDirectReaderNoHeader(@in, PackedInt32s.Format.PACKED, packedIntsVersion, size, bitsPerValue); @in.Seek(pointer + PackedInt32s.Format.PACKED.ByteCount(packedIntsVersion, size, bitsPerValue)); } else { subReaders[i] = PackedInt32s.GetReaderNoHeader(@in, PackedInt32s.Format.PACKED, packedIntsVersion, size, bitsPerValue); } } } this.minValues = minValues; }
/// <summary> /// Sole constructor. </summary> public MonotonicBlockPackedReader(IndexInput @in, int packedIntsVersion, int blockSize, long valueCount, bool direct) { this.valueCount = valueCount; blockShift = PackedInt32s.CheckBlockSize(blockSize, AbstractBlockPackedWriter.MIN_BLOCK_SIZE, AbstractBlockPackedWriter.MAX_BLOCK_SIZE); blockMask = blockSize - 1; int numBlocks = PackedInt32s.NumBlocks(valueCount, blockSize); minValues = new long[numBlocks]; averages = new float[numBlocks]; subReaders = new PackedInt32s.Reader[numBlocks]; for (int i = 0; i < numBlocks; ++i) { minValues[i] = @in.ReadVInt64(); averages[i] = J2N.BitConversion.Int32BitsToSingle(@in.ReadInt32()); int bitsPerValue = @in.ReadVInt32(); if (bitsPerValue > 64) { throw new Exception("Corrupted"); } if (bitsPerValue == 0) { subReaders[i] = new PackedInt32s.NullReader(blockSize); } else { int size = (int)Math.Min(blockSize, valueCount - (long)i * blockSize); if (direct) { long pointer = @in.GetFilePointer(); subReaders[i] = PackedInt32s.GetDirectReaderNoHeader(@in, PackedInt32s.Format.PACKED, packedIntsVersion, size, bitsPerValue); @in.Seek(pointer + PackedInt32s.Format.PACKED.ByteCount(packedIntsVersion, size, bitsPerValue)); } else { subReaders[i] = PackedInt32s.GetReaderNoHeader(@in, PackedInt32s.Format.PACKED, packedIntsVersion, size, bitsPerValue); } } } }
internal readonly PackedInt32s.Reader[] startPointersDeltas; // delta from the avg // It is the responsibility of the caller to close fieldsIndexIn after this constructor // has been called internal CompressingStoredFieldsIndexReader(IndexInput fieldsIndexIn, SegmentInfo si) { maxDoc = si.DocCount; int[] docBases = new int[16]; long[] startPointers = new long[16]; int[] avgChunkDocs = new int[16]; long[] avgChunkSizes = new long[16]; PackedInt32s.Reader[] docBasesDeltas = new PackedInt32s.Reader[16]; PackedInt32s.Reader[] startPointersDeltas = new PackedInt32s.Reader[16]; int packedIntsVersion = fieldsIndexIn.ReadVInt32(); int blockCount = 0; for (; ;) { int numChunks = fieldsIndexIn.ReadVInt32(); if (numChunks == 0) { break; } if (blockCount == docBases.Length) { int newSize = ArrayUtil.Oversize(blockCount + 1, 8); docBases = Arrays.CopyOf(docBases, newSize); startPointers = Arrays.CopyOf(startPointers, newSize); avgChunkDocs = Arrays.CopyOf(avgChunkDocs, newSize); avgChunkSizes = Arrays.CopyOf(avgChunkSizes, newSize); docBasesDeltas = Arrays.CopyOf(docBasesDeltas, newSize); startPointersDeltas = Arrays.CopyOf(startPointersDeltas, newSize); } // doc bases docBases[blockCount] = fieldsIndexIn.ReadVInt32(); avgChunkDocs[blockCount] = fieldsIndexIn.ReadVInt32(); int bitsPerDocBase = fieldsIndexIn.ReadVInt32(); if (bitsPerDocBase > 32) { throw new CorruptIndexException("Corrupted bitsPerDocBase (resource=" + fieldsIndexIn + ")"); } docBasesDeltas[blockCount] = PackedInt32s.GetReaderNoHeader(fieldsIndexIn, PackedInt32s.Format.PACKED, packedIntsVersion, numChunks, bitsPerDocBase); // start pointers startPointers[blockCount] = fieldsIndexIn.ReadVInt64(); avgChunkSizes[blockCount] = fieldsIndexIn.ReadVInt64(); int bitsPerStartPointer = fieldsIndexIn.ReadVInt32(); if (bitsPerStartPointer > 64) { throw new CorruptIndexException("Corrupted bitsPerStartPointer (resource=" + fieldsIndexIn + ")"); } startPointersDeltas[blockCount] = PackedInt32s.GetReaderNoHeader(fieldsIndexIn, PackedInt32s.Format.PACKED, packedIntsVersion, numChunks, bitsPerStartPointer); ++blockCount; } this.docBases = Arrays.CopyOf(docBases, blockCount); this.startPointers = Arrays.CopyOf(startPointers, blockCount); this.avgChunkDocs = Arrays.CopyOf(avgChunkDocs, blockCount); this.avgChunkSizes = Arrays.CopyOf(avgChunkSizes, blockCount); this.docBasesDeltas = Arrays.CopyOf(docBasesDeltas, blockCount); this.startPointersDeltas = Arrays.CopyOf(startPointersDeltas, blockCount); }
public override Fields Get(int doc) { EnsureOpen(); // seek to the right place { long startPointer = indexReader.GetStartPointer(doc); vectorsStream.Seek(startPointer); } // decode // - docBase: first doc ID of the chunk // - chunkDocs: number of docs of the chunk int docBase = vectorsStream.ReadVInt32(); int chunkDocs = vectorsStream.ReadVInt32(); if (doc < docBase || doc >= docBase + chunkDocs || docBase + chunkDocs > numDocs) { throw new CorruptIndexException("docBase=" + docBase + ",chunkDocs=" + chunkDocs + ",doc=" + doc + " (resource=" + vectorsStream + ")"); } int skip; // number of fields to skip int numFields; // number of fields of the document we're looking for int totalFields; // total number of fields of the chunk (sum for all docs) if (chunkDocs == 1) { skip = 0; numFields = totalFields = vectorsStream.ReadVInt32(); } else { reader.Reset(vectorsStream, chunkDocs); int sum = 0; for (int i = docBase; i < doc; ++i) { sum += (int)reader.Next(); } skip = sum; numFields = (int)reader.Next(); sum += numFields; for (int i = doc + 1; i < docBase + chunkDocs; ++i) { sum += (int)reader.Next(); } totalFields = sum; } if (numFields == 0) { // no vectors return(null); } // read field numbers that have term vectors int[] fieldNums; { int token = vectorsStream.ReadByte() & 0xFF; Debug.Assert(token != 0); // means no term vectors, cannot happen since we checked for numFields == 0 int bitsPerFieldNum = token & 0x1F; int totalDistinctFields = (int)((uint)token >> 5); if (totalDistinctFields == 0x07) { totalDistinctFields += vectorsStream.ReadVInt32(); } ++totalDistinctFields; PackedInt32s.IReaderIterator it = PackedInt32s.GetReaderIteratorNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalDistinctFields, bitsPerFieldNum, 1); fieldNums = new int[totalDistinctFields]; for (int i = 0; i < totalDistinctFields; ++i) { fieldNums[i] = (int)it.Next(); } } // read field numbers and flags int[] fieldNumOffs = new int[numFields]; PackedInt32s.Reader flags; { int bitsPerOff = PackedInt32s.BitsRequired(fieldNums.Length - 1); PackedInt32s.Reader allFieldNumOffs = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, bitsPerOff); switch (vectorsStream.ReadVInt32()) { case 0: PackedInt32s.Reader fieldFlags = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, fieldNums.Length, CompressingTermVectorsWriter.FLAGS_BITS); PackedInt32s.Mutable f = PackedInt32s.GetMutable(totalFields, CompressingTermVectorsWriter.FLAGS_BITS, PackedInt32s.COMPACT); for (int i = 0; i < totalFields; ++i) { int fieldNumOff = (int)allFieldNumOffs.Get(i); Debug.Assert(fieldNumOff >= 0 && fieldNumOff < fieldNums.Length); int fgs = (int)fieldFlags.Get(fieldNumOff); f.Set(i, fgs); } flags = f; break; case 1: flags = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, CompressingTermVectorsWriter.FLAGS_BITS); break; default: throw new Exception(); } for (int i = 0; i < numFields; ++i) { fieldNumOffs[i] = (int)allFieldNumOffs.Get(skip + i); } } // number of terms per field for all fields PackedInt32s.Reader numTerms; int totalTerms; { int bitsRequired = vectorsStream.ReadVInt32(); numTerms = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, bitsRequired); int sum = 0; for (int i = 0; i < totalFields; ++i) { sum += (int)numTerms.Get(i); } totalTerms = sum; } // term lengths int docOff = 0, docLen = 0, totalLen; int[] fieldLengths = new int[numFields]; int[][] prefixLengths = new int[numFields][]; int[][] suffixLengths = new int[numFields][]; { reader.Reset(vectorsStream, totalTerms); // skip int toSkip = 0; for (int i = 0; i < skip; ++i) { toSkip += (int)numTerms.Get(i); } reader.Skip(toSkip); // read prefix lengths for (int i = 0; i < numFields; ++i) { int termCount = (int)numTerms.Get(skip + i); int[] fieldPrefixLengths = new int[termCount]; prefixLengths[i] = fieldPrefixLengths; for (int j = 0; j < termCount;) { Int64sRef next = reader.Next(termCount - j); for (int k = 0; k < next.Length; ++k) { fieldPrefixLengths[j++] = (int)next.Int64s[next.Offset + k]; } } } reader.Skip(totalTerms - reader.Ord); reader.Reset(vectorsStream, totalTerms); // skip toSkip = 0; for (int i = 0; i < skip; ++i) { for (int j = 0; j < numTerms.Get(i); ++j) { docOff += (int)reader.Next(); } } for (int i = 0; i < numFields; ++i) { int termCount = (int)numTerms.Get(skip + i); int[] fieldSuffixLengths = new int[termCount]; suffixLengths[i] = fieldSuffixLengths; for (int j = 0; j < termCount;) { Int64sRef next = reader.Next(termCount - j); for (int k = 0; k < next.Length; ++k) { fieldSuffixLengths[j++] = (int)next.Int64s[next.Offset + k]; } } fieldLengths[i] = Sum(suffixLengths[i]); docLen += fieldLengths[i]; } totalLen = docOff + docLen; for (int i = skip + numFields; i < totalFields; ++i) { for (int j = 0; j < numTerms.Get(i); ++j) { totalLen += (int)reader.Next(); } } } // term freqs int[] termFreqs = new int[totalTerms]; { reader.Reset(vectorsStream, totalTerms); for (int i = 0; i < totalTerms;) { Int64sRef next = reader.Next(totalTerms - i); for (int k = 0; k < next.Length; ++k) { termFreqs[i++] = 1 + (int)next.Int64s[next.Offset + k]; } } } // total number of positions, offsets and payloads int totalPositions = 0, totalOffsets = 0, totalPayloads = 0; for (int i = 0, termIndex = 0; i < totalFields; ++i) { int f = (int)flags.Get(i); int termCount = (int)numTerms.Get(i); for (int j = 0; j < termCount; ++j) { int freq = termFreqs[termIndex++]; if ((f & CompressingTermVectorsWriter.POSITIONS) != 0) { totalPositions += freq; } if ((f & CompressingTermVectorsWriter.OFFSETS) != 0) { totalOffsets += freq; } if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0) { totalPayloads += freq; } } Debug.Assert(i != totalFields - 1 || termIndex == totalTerms, termIndex + " " + totalTerms); } int[][] positionIndex = PositionIndex(skip, numFields, numTerms, termFreqs); int[][] positions, startOffsets, lengths; if (totalPositions > 0) { positions = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.POSITIONS, totalPositions, positionIndex); } else { positions = new int[numFields][]; } if (totalOffsets > 0) { // average number of chars per term float[] charsPerTerm = new float[fieldNums.Length]; for (int i = 0; i < charsPerTerm.Length; ++i) { charsPerTerm[i] = J2N.BitConversion.Int32BitsToSingle(vectorsStream.ReadInt32()); } startOffsets = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.OFFSETS, totalOffsets, positionIndex); lengths = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.OFFSETS, totalOffsets, positionIndex); for (int i = 0; i < numFields; ++i) { int[] fStartOffsets = startOffsets[i]; int[] fPositions = positions[i]; // patch offsets from positions if (fStartOffsets != null && fPositions != null) { float fieldCharsPerTerm = charsPerTerm[fieldNumOffs[i]]; for (int j = 0; j < startOffsets[i].Length; ++j) { fStartOffsets[j] += (int)(fieldCharsPerTerm * fPositions[j]); } } if (fStartOffsets != null) { int[] fPrefixLengths = prefixLengths[i]; int[] fSuffixLengths = suffixLengths[i]; int[] fLengths = lengths[i]; for (int j = 0, end = (int)numTerms.Get(skip + i); j < end; ++j) { // delta-decode start offsets and patch lengths using term lengths int termLength = fPrefixLengths[j] + fSuffixLengths[j]; lengths[i][positionIndex[i][j]] += termLength; for (int k = positionIndex[i][j] + 1; k < positionIndex[i][j + 1]; ++k) { fStartOffsets[k] += fStartOffsets[k - 1]; fLengths[k] += termLength; } } } } } else { startOffsets = lengths = new int[numFields][]; } if (totalPositions > 0) { // delta-decode positions for (int i = 0; i < numFields; ++i) { int[] fPositions = positions[i]; int[] fpositionIndex = positionIndex[i]; if (fPositions != null) { for (int j = 0, end = (int)numTerms.Get(skip + i); j < end; ++j) { // delta-decode start offsets for (int k = fpositionIndex[j] + 1; k < fpositionIndex[j + 1]; ++k) { fPositions[k] += fPositions[k - 1]; } } } } } // payload lengths int[][] payloadIndex = new int[numFields][]; int totalPayloadLength = 0; int payloadOff = 0; int payloadLen = 0; if (totalPayloads > 0) { reader.Reset(vectorsStream, totalPayloads); // skip int termIndex = 0; for (int i = 0; i < skip; ++i) { int f = (int)flags.Get(i); int termCount = (int)numTerms.Get(i); if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0) { for (int j = 0; j < termCount; ++j) { int freq = termFreqs[termIndex + j]; for (int k = 0; k < freq; ++k) { int l = (int)reader.Next(); payloadOff += l; } } } termIndex += termCount; } totalPayloadLength = payloadOff; // read doc payload lengths for (int i = 0; i < numFields; ++i) { int f = (int)flags.Get(skip + i); int termCount = (int)numTerms.Get(skip + i); if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0) { int totalFreq = positionIndex[i][termCount]; payloadIndex[i] = new int[totalFreq + 1]; int posIdx = 0; payloadIndex[i][posIdx] = payloadLen; for (int j = 0; j < termCount; ++j) { int freq = termFreqs[termIndex + j]; for (int k = 0; k < freq; ++k) { int payloadLength = (int)reader.Next(); payloadLen += payloadLength; payloadIndex[i][posIdx + 1] = payloadLen; ++posIdx; } } Debug.Assert(posIdx == totalFreq); } termIndex += termCount; } totalPayloadLength += payloadLen; for (int i = skip + numFields; i < totalFields; ++i) { int f = (int)flags.Get(i); int termCount = (int)numTerms.Get(i); if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0) { for (int j = 0; j < termCount; ++j) { int freq = termFreqs[termIndex + j]; for (int k = 0; k < freq; ++k) { totalPayloadLength += (int)reader.Next(); } } } termIndex += termCount; } Debug.Assert(termIndex == totalTerms, termIndex + " " + totalTerms); } // decompress data BytesRef suffixBytes = new BytesRef(); decompressor.Decompress(vectorsStream, totalLen + totalPayloadLength, docOff + payloadOff, docLen + payloadLen, suffixBytes); suffixBytes.Length = docLen; BytesRef payloadBytes = new BytesRef(suffixBytes.Bytes, suffixBytes.Offset + docLen, payloadLen); int[] FieldFlags = new int[numFields]; for (int i = 0; i < numFields; ++i) { FieldFlags[i] = (int)flags.Get(skip + i); } int[] fieldNumTerms = new int[numFields]; for (int i = 0; i < numFields; ++i) { fieldNumTerms[i] = (int)numTerms.Get(skip + i); } int[][] fieldTermFreqs = new int[numFields][]; { int termIdx = 0; for (int i = 0; i < skip; ++i) { termIdx += (int)numTerms.Get(i); } for (int i = 0; i < numFields; ++i) { int termCount = (int)numTerms.Get(skip + i); fieldTermFreqs[i] = new int[termCount]; for (int j = 0; j < termCount; ++j) { fieldTermFreqs[i][j] = termFreqs[termIdx++]; } } } Debug.Assert(Sum(fieldLengths) == docLen, Sum(fieldLengths) + " != " + docLen); return(new TVFields(this, fieldNums, FieldFlags, fieldNumOffs, fieldNumTerms, fieldLengths, prefixLengths, suffixLengths, fieldTermFreqs, positionIndex, positions, startOffsets, lengths, payloadBytes, payloadIndex, suffixBytes)); }