private void ReadSortedSetFieldWithAddresses(int fieldNumber, IndexInput meta, FieldInfos infos) { // sortedset = binary + numeric (addresses) + ordIndex if (meta.ReadVInt32() != fieldNumber) { throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.ReadByte() != Lucene45DocValuesFormat.BINARY) { throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } BinaryEntry b = ReadBinaryEntry(meta); binaries[fieldNumber] = b; if (meta.ReadVInt32() != fieldNumber) { throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.ReadByte() != Lucene45DocValuesFormat.NUMERIC) { throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } NumericEntry n1 = ReadNumericEntry(meta); ords[fieldNumber] = n1; if (meta.ReadVInt32() != fieldNumber) { throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.ReadByte() != Lucene45DocValuesFormat.NUMERIC) { throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } NumericEntry n2 = ReadNumericEntry(meta); ordIndexes[fieldNumber] = n2; }
private void Demo_FSIndexInputBug(Directory fsdir, string file) { // Setup the test file - we need more than 1024 bytes IndexOutput os = fsdir.CreateOutput(file, IOContext.DEFAULT); for (int i = 0; i < 2000; i++) { os.WriteByte((byte)i); } os.Dispose(); IndexInput @in = fsdir.OpenInput(file, IOContext.DEFAULT); // this read primes the buffer in IndexInput @in.ReadByte(); // Close the file @in.Dispose(); // ERROR: this call should fail, but succeeds because the buffer // is still filled @in.ReadByte(); // ERROR: this call should fail, but succeeds for some reason as well @in.Seek(1099); try { // OK: this call correctly fails. We are now past the 1024 internal // buffer, so an actual IO is attempted, which fails @in.ReadByte(); Assert.Fail("expected readByte() to throw exception"); } catch (Exception e) when(e.IsIOException()) { // expected exception } }
/// <summary> /// Skip the next block of data. /// </summary> /// <param name="in"> The input where to read data. </param> /// <exception cref="System.IO.IOException"> If there is a low-level I/O error. </exception> internal void SkipBlock(IndexInput @in) { int numBits = @in.ReadByte(); if (numBits == ALL_VALUES_EQUAL) { @in.ReadVInt32(); return; } Debug.Assert(numBits > 0 && numBits <= 32, numBits.ToString()); int encodedSize = encodedSizes[numBits]; @in.Seek(@in.GetFilePointer() + encodedSize); }
private void Read(IndexInput input, String fileName) { int firstInt = input.ReadVInt(); if (firstInt < 0) { // This is a real format format = firstInt; } else { format = FORMAT_PRE; } if (format != FORMAT_PRE & format != FORMAT_START) { throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\""); } int size; if (format == FORMAT_PRE) { size = firstInt; } else { size = input.ReadVInt(); //read in the size } for (int i = 0; i < size; i++) { String name = StringHelper.Intern(input.ReadString()); byte bits = input.ReadByte(); bool isIndexed = (bits & IS_INDEXED) != 0; bool storeTermVector = (bits & STORE_TERMVECTOR) != 0; bool storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; bool storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; bool omitNorms = (bits & OMIT_NORMS) != 0; bool storePayloads = (bits & STORE_PAYLOADS) != 0; bool omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0; AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); } if (input.FilePointer != input.Length()) { throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.FilePointer + " vs size " + input.Length()); } }
/// <summary>read as a d-gaps list </summary> private void ReadDgaps(IndexInput input) { size = input.ReadInt(); // (re)read size count = input.ReadInt(); // read count bits = new byte[(size >> 3) + 1]; // allocate bits int last = 0; int n = Count(); while (n > 0) { last += input.ReadVInt(); bits[last] = input.ReadByte(); n -= BYTE_COUNTS[bits[last] & 0xFF]; } }
/// <summary> /// Read as a d-gaps list. </summary> private void ReadSetDgaps(IndexInput input) { size = input.ReadInt32(); // (re)read size count = input.ReadInt32(); // read count bits = new byte[GetNumBytes(size)]; // allocate bits int last = 0; int n = Count(); while (n > 0) { last += input.ReadVInt32(); bits[last] = input.ReadByte(); n -= BitUtil.BitCount(bits[last]); Debug.Assert(n >= 0); } }
/// <summary> /// read as a d-gaps list </summary> private void ReadSetDgaps(IndexInput input) { Size_Renamed = input.ReadInt(); // (re)read size Count_Renamed = input.ReadInt(); // read count Bits = new byte[GetNumBytes(Size_Renamed)]; // allocate bits int last = 0; int n = Count(); while (n > 0) { last += input.ReadVInt(); Bits[last] = input.ReadByte(); n -= BitUtil.BitCount(Bits[last]); Debug.Assert(n >= 0); } }
/// <summary> /// Skip the next block of data. /// </summary> /// <param name="in"> The input where to read data. </param> /// <exception cref="IOException"> If there is a low-level I/O error. </exception> internal void SkipBlock(IndexInput @in) { int numBits = @in.ReadByte(); if (numBits == ALL_VALUES_EQUAL) { @in.ReadVInt32(); return; } if (Debugging.AssertsEnabled) { Debugging.Assert(numBits > 0 && numBits <= 32, numBits.ToString()); } int encodedSize = encodedSizes[numBits]; @in.Seek(@in.Position + encodedSize); // LUCENENET specific: Renamed from getFilePointer() to match FileStream }
/// <summary> /// Sole constructor. </summary> public BlockPackedReader(IndexInput @in, int packedIntsVersion, int blockSize, long valueCount, bool direct) { this.valueCount = valueCount; blockShift = PackedInt32s.CheckBlockSize(blockSize, AbstractBlockPackedWriter.MIN_BLOCK_SIZE, AbstractBlockPackedWriter.MAX_BLOCK_SIZE); blockMask = blockSize - 1; int numBlocks = PackedInt32s.NumBlocks(valueCount, blockSize); long[] minValues = null; subReaders = new PackedInt32s.Reader[numBlocks]; for (int i = 0; i < numBlocks; ++i) { int token = @in.ReadByte() & 0xFF; int bitsPerValue = token.TripleShift(AbstractBlockPackedWriter.BPV_SHIFT); if (bitsPerValue > 64) { throw new Exception("Corrupted"); } if ((token & AbstractBlockPackedWriter.MIN_VALUE_EQUALS_0) == 0) { if (minValues == null) { minValues = new long[numBlocks]; } minValues[i] = BlockPackedReaderIterator.ZigZagDecode(1L + BlockPackedReaderIterator.ReadVInt64(@in)); } if (bitsPerValue == 0) { subReaders[i] = new PackedInt32s.NullReader(blockSize); } else { int size = (int)Math.Min(blockSize, valueCount - (long)i * blockSize); if (direct) { long pointer = @in.GetFilePointer(); subReaders[i] = PackedInt32s.GetDirectReaderNoHeader(@in, PackedInt32s.Format.PACKED, packedIntsVersion, size, bitsPerValue); @in.Seek(pointer + PackedInt32s.Format.PACKED.ByteCount(packedIntsVersion, size, bitsPerValue)); } else { subReaders[i] = PackedInt32s.GetReaderNoHeader(@in, PackedInt32s.Format.PACKED, packedIntsVersion, size, bitsPerValue); } } } this.minValues = minValues; }
private static NumericEntry ReadNumericEntry(IndexInput meta) // LUCENENET: CA1822: Mark members as static { var entry = new NumericEntry { offset = meta.ReadInt64(), count = meta.ReadInt32(), missingOffset = meta.ReadInt64() }; if (entry.missingOffset != -1) { entry.missingBytes = meta.ReadInt64(); } else { entry.missingBytes = 0; } entry.byteWidth = meta.ReadByte(); return(entry); }
private static NumericEntry ReadNumericEntry(IndexInput meta) { var entry = new NumericEntry { offset = meta.ReadLong(), count = meta.ReadInt(), missingOffset = meta.ReadLong() }; if (entry.missingOffset != -1) { entry.missingBytes = meta.ReadLong(); } else { entry.missingBytes = 0; } entry.byteWidth = meta.ReadByte(); return(entry); }
private SegmentInfo ReadUpgradedSegmentInfo(string name, Directory dir, IndexInput input) { CodecUtil.CheckHeader(input, Lucene3xSegmentInfoFormat.UPGRADED_SI_CODEC_NAME, Lucene3xSegmentInfoFormat.UPGRADED_SI_VERSION_START, Lucene3xSegmentInfoFormat.UPGRADED_SI_VERSION_CURRENT); string version = input.ReadString(); int docCount = input.ReadInt(); IDictionary <string, string> attributes = input.ReadStringStringMap(); bool isCompoundFile = input.ReadByte() == SegmentInfo.YES; IDictionary <string, string> diagnostics = input.ReadStringStringMap(); ISet <string> files = input.ReadStringSet(); SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, null, diagnostics, CollectionsHelper.UnmodifiableMap(attributes)); info.Files = files; return(info); }
public override SegmentInfo Read(Directory dir, string segment, IOContext context) { string fileName = IndexFileNames.SegmentFileName(segment, "", Lucene40SegmentInfoFormat.SI_EXTENSION); IndexInput input = dir.OpenInput(fileName, context); bool success = false; try { CodecUtil.CheckHeader(input, Lucene40SegmentInfoFormat.CODEC_NAME, Lucene40SegmentInfoFormat.VERSION_START, Lucene40SegmentInfoFormat.VERSION_CURRENT); string version = input.ReadString(); int docCount = input.ReadInt32(); if (docCount < 0) { throw new CorruptIndexException("invalid docCount: " + docCount + " (resource=" + input + ")"); } bool isCompoundFile = input.ReadByte() == SegmentInfo.YES; IDictionary <string, string> diagnostics = input.ReadStringStringMap(); input.ReadStringStringMap(); // read deprecated attributes ISet <string> files = input.ReadStringSet(); CodecUtil.CheckEOF(input); SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics); si.SetFiles(files); success = true; return(si); } finally { if (!success) { IOUtils.CloseWhileHandlingException(input); } else { input.Dispose(); } } }
public virtual void TestReadPastEOF() { SetUp_2(); var cr = new CompoundFileDirectory(Dir, "f.comp", NewIOContext(Random), false); IndexInput @is = cr.OpenInput("f2", NewIOContext(Random)); @is.Seek(@is.Length - 10); var b = new byte[100]; @is.ReadBytes(b, 0, 10); try { @is.ReadByte(); Assert.Fail("Single byte read past end of file"); } #pragma warning disable 168 catch (IOException e) #pragma warning restore 168 { /* success */ //System.out.println("SUCCESS: single byte read past end of file: " + e); } @is.Seek(@is.Length - 10); try { @is.ReadBytes(b, 0, 50); Assert.Fail("Block read past end of file"); } #pragma warning disable 168 catch (IOException e) #pragma warning restore 168 { /* success */ //System.out.println("SUCCESS: block read past end of file: " + e); } @is.Dispose(); cr.Dispose(); }
/// <summary> /// Read as a d-gaps cleared bits list. </summary> private void ReadClearedDgaps(IndexInput input) { size = input.ReadInt32(); // (re)read size count = input.ReadInt32(); // read count bits = new byte[GetNumBytes(size)]; // allocate bits for (int i = 0; i < bits.Length; ++i) { bits[i] = 0xff; } ClearUnusedBits(); int last = 0; int numCleared = Length - Count(); while (numCleared > 0) { last += input.ReadVInt32(); bits[last] = input.ReadByte(); numCleared -= 8 - BitUtil.BitCount(bits[last]); Debug.Assert(numCleared >= 0 || (last == (bits.Length - 1) && numCleared == -(8 - (size & 7)))); } }
/// <summary> /// read as a d-gaps cleared bits list </summary> private void ReadClearedDgaps(IndexInput input) { Size_Renamed = input.ReadInt(); // (re)read size Count_Renamed = input.ReadInt(); // read count Bits = new byte[GetNumBytes(Size_Renamed)]; // allocate bits for (int i = 0; i < Bits.Length; ++i) { Bits[i] = 0xff; } ClearUnusedBits(); int last = 0; int numCleared = Size() - Count(); while (numCleared > 0) { last += input.ReadVInt(); Bits[last] = input.ReadByte(); numCleared -= 8 - BitUtil.BitCount(Bits[last]); Debug.Assert(numCleared >= 0 || (last == (Bits.Length - 1) && numCleared == -(8 - (Size_Renamed & 7)))); } }
/// <summary> /// Read the next block of data (<code>For</code> format). /// </summary> /// <param name="in"> the input to use to read data </param> /// <param name="encoded"> a buffer that can be used to store encoded data </param> /// <param name="decoded"> where to write decoded data </param> /// <exception cref="IOException"> If there is a low-level I/O error </exception> public void ReadBlock(IndexInput @in, sbyte[] encoded, int[] decoded) { int numBits = @in.ReadByte(); Debug.Assert(numBits <= 32, numBits.ToString()); if (numBits == ALL_VALUES_EQUAL) { int value = @in.ReadVInt(); CollectionsHelper.Fill(decoded, 0, Lucene41PostingsFormat.BLOCK_SIZE, value); return; } int encodedSize = EncodedSizes[numBits]; @in.ReadBytes(encoded, 0, encodedSize); PackedInts.Decoder decoder = Decoders[numBits]; int iters = Iterations[numBits]; Debug.Assert(iters * decoder.ByteValueCount() >= Lucene41PostingsFormat.BLOCK_SIZE); decoder.Decode(encoded, 0, decoded, 0, iters); }
/// <summary> /// Read the next block of data (<c>For</c> format). /// </summary> /// <param name="in"> The input to use to read data. </param> /// <param name="encoded"> A buffer that can be used to store encoded data. </param> /// <param name="decoded"> Where to write decoded data. </param> /// <exception cref="System.IO.IOException"> If there is a low-level I/O error. </exception> internal void ReadBlock(IndexInput @in, byte[] encoded, int[] decoded) { int numBits = @in.ReadByte(); Debug.Assert(numBits <= 32, numBits.ToString()); if (numBits == ALL_VALUES_EQUAL) { int value = @in.ReadVInt32(); Arrays.Fill(decoded, 0, Lucene41PostingsFormat.BLOCK_SIZE, value); return; } int encodedSize = encodedSizes[numBits]; @in.ReadBytes(encoded, 0, encodedSize); PackedInt32s.IDecoder decoder = decoders[numBits]; int iters = iterations[numBits]; Debug.Assert(iters * decoder.ByteValueCount >= Lucene41PostingsFormat.BLOCK_SIZE); decoder.Decode(encoded, 0, decoded, 0, iters); }
private NumericDocValues LoadVarIntsField(FieldInfo field, IndexInput input) { CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.VAR_INTS_CODEC_NAME, Lucene40DocValuesFormat.VAR_INTS_VERSION_START, Lucene40DocValuesFormat.VAR_INTS_VERSION_CURRENT); byte header = input.ReadByte(); if (header == Lucene40DocValuesFormat.VAR_INTS_FIXED_64) { int maxDoc = State.SegmentInfo.DocCount; long[] values = new long[maxDoc]; for (int i = 0; i < values.Length; i++) { values[i] = input.ReadLong(); } RamBytesUsed_Renamed.AddAndGet(RamUsageEstimator.SizeOf(values)); return new NumericDocValuesAnonymousInnerClassHelper(this, values); } else if (header == Lucene40DocValuesFormat.VAR_INTS_PACKED) { long minValue = input.ReadLong(); long defaultValue = input.ReadLong(); PackedInts.Reader reader = PackedInts.GetReader(input); RamBytesUsed_Renamed.AddAndGet(reader.RamBytesUsed()); return new NumericDocValuesAnonymousInnerClassHelper2(this, minValue, defaultValue, reader); } else { throw new CorruptIndexException("invalid VAR_INTS header byte: " + header + " (resource=" + input + ")"); } }
public override byte ReadByte() { return(_indexInput.ReadByte()); }
public override Fields Get(int doc) { EnsureOpen(); // seek to the right place { long startPointer = indexReader.GetStartPointer(doc); vectorsStream.Seek(startPointer); } // decode // - docBase: first doc ID of the chunk // - chunkDocs: number of docs of the chunk int docBase = vectorsStream.ReadVInt32(); int chunkDocs = vectorsStream.ReadVInt32(); if (doc < docBase || doc >= docBase + chunkDocs || docBase + chunkDocs > numDocs) { throw new CorruptIndexException("docBase=" + docBase + ",chunkDocs=" + chunkDocs + ",doc=" + doc + " (resource=" + vectorsStream + ")"); } int skip; // number of fields to skip int numFields; // number of fields of the document we're looking for int totalFields; // total number of fields of the chunk (sum for all docs) if (chunkDocs == 1) { skip = 0; numFields = totalFields = vectorsStream.ReadVInt32(); } else { reader.Reset(vectorsStream, chunkDocs); int sum = 0; for (int i = docBase; i < doc; ++i) { sum += (int)reader.Next(); } skip = sum; numFields = (int)reader.Next(); sum += numFields; for (int i = doc + 1; i < docBase + chunkDocs; ++i) { sum += (int)reader.Next(); } totalFields = sum; } if (numFields == 0) { // no vectors return(null); } // read field numbers that have term vectors int[] fieldNums; { int token = vectorsStream.ReadByte() & 0xFF; Debug.Assert(token != 0); // means no term vectors, cannot happen since we checked for numFields == 0 int bitsPerFieldNum = token & 0x1F; int totalDistinctFields = (int)((uint)token >> 5); if (totalDistinctFields == 0x07) { totalDistinctFields += vectorsStream.ReadVInt32(); } ++totalDistinctFields; PackedInt32s.IReaderIterator it = PackedInt32s.GetReaderIteratorNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalDistinctFields, bitsPerFieldNum, 1); fieldNums = new int[totalDistinctFields]; for (int i = 0; i < totalDistinctFields; ++i) { fieldNums[i] = (int)it.Next(); } } // read field numbers and flags int[] fieldNumOffs = new int[numFields]; PackedInt32s.Reader flags; { int bitsPerOff = PackedInt32s.BitsRequired(fieldNums.Length - 1); PackedInt32s.Reader allFieldNumOffs = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, bitsPerOff); switch (vectorsStream.ReadVInt32()) { case 0: PackedInt32s.Reader fieldFlags = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, fieldNums.Length, CompressingTermVectorsWriter.FLAGS_BITS); PackedInt32s.Mutable f = PackedInt32s.GetMutable(totalFields, CompressingTermVectorsWriter.FLAGS_BITS, PackedInt32s.COMPACT); for (int i = 0; i < totalFields; ++i) { int fieldNumOff = (int)allFieldNumOffs.Get(i); Debug.Assert(fieldNumOff >= 0 && fieldNumOff < fieldNums.Length); int fgs = (int)fieldFlags.Get(fieldNumOff); f.Set(i, fgs); } flags = f; break; case 1: flags = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, CompressingTermVectorsWriter.FLAGS_BITS); break; default: throw new Exception(); } for (int i = 0; i < numFields; ++i) { fieldNumOffs[i] = (int)allFieldNumOffs.Get(skip + i); } } // number of terms per field for all fields PackedInt32s.Reader numTerms; int totalTerms; { int bitsRequired = vectorsStream.ReadVInt32(); numTerms = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, bitsRequired); int sum = 0; for (int i = 0; i < totalFields; ++i) { sum += (int)numTerms.Get(i); } totalTerms = sum; } // term lengths int docOff = 0, docLen = 0, totalLen; int[] fieldLengths = new int[numFields]; int[][] prefixLengths = new int[numFields][]; int[][] suffixLengths = new int[numFields][]; { reader.Reset(vectorsStream, totalTerms); // skip int toSkip = 0; for (int i = 0; i < skip; ++i) { toSkip += (int)numTerms.Get(i); } reader.Skip(toSkip); // read prefix lengths for (int i = 0; i < numFields; ++i) { int termCount = (int)numTerms.Get(skip + i); int[] fieldPrefixLengths = new int[termCount]; prefixLengths[i] = fieldPrefixLengths; for (int j = 0; j < termCount;) { Int64sRef next = reader.Next(termCount - j); for (int k = 0; k < next.Length; ++k) { fieldPrefixLengths[j++] = (int)next.Int64s[next.Offset + k]; } } } reader.Skip(totalTerms - reader.Ord); reader.Reset(vectorsStream, totalTerms); // skip toSkip = 0; for (int i = 0; i < skip; ++i) { for (int j = 0; j < numTerms.Get(i); ++j) { docOff += (int)reader.Next(); } } for (int i = 0; i < numFields; ++i) { int termCount = (int)numTerms.Get(skip + i); int[] fieldSuffixLengths = new int[termCount]; suffixLengths[i] = fieldSuffixLengths; for (int j = 0; j < termCount;) { Int64sRef next = reader.Next(termCount - j); for (int k = 0; k < next.Length; ++k) { fieldSuffixLengths[j++] = (int)next.Int64s[next.Offset + k]; } } fieldLengths[i] = Sum(suffixLengths[i]); docLen += fieldLengths[i]; } totalLen = docOff + docLen; for (int i = skip + numFields; i < totalFields; ++i) { for (int j = 0; j < numTerms.Get(i); ++j) { totalLen += (int)reader.Next(); } } } // term freqs int[] termFreqs = new int[totalTerms]; { reader.Reset(vectorsStream, totalTerms); for (int i = 0; i < totalTerms;) { Int64sRef next = reader.Next(totalTerms - i); for (int k = 0; k < next.Length; ++k) { termFreqs[i++] = 1 + (int)next.Int64s[next.Offset + k]; } } } // total number of positions, offsets and payloads int totalPositions = 0, totalOffsets = 0, totalPayloads = 0; for (int i = 0, termIndex = 0; i < totalFields; ++i) { int f = (int)flags.Get(i); int termCount = (int)numTerms.Get(i); for (int j = 0; j < termCount; ++j) { int freq = termFreqs[termIndex++]; if ((f & CompressingTermVectorsWriter.POSITIONS) != 0) { totalPositions += freq; } if ((f & CompressingTermVectorsWriter.OFFSETS) != 0) { totalOffsets += freq; } if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0) { totalPayloads += freq; } } Debug.Assert(i != totalFields - 1 || termIndex == totalTerms, termIndex + " " + totalTerms); } int[][] positionIndex = PositionIndex(skip, numFields, numTerms, termFreqs); int[][] positions, startOffsets, lengths; if (totalPositions > 0) { positions = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.POSITIONS, totalPositions, positionIndex); } else { positions = new int[numFields][]; } if (totalOffsets > 0) { // average number of chars per term float[] charsPerTerm = new float[fieldNums.Length]; for (int i = 0; i < charsPerTerm.Length; ++i) { charsPerTerm[i] = J2N.BitConversion.Int32BitsToSingle(vectorsStream.ReadInt32()); } startOffsets = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.OFFSETS, totalOffsets, positionIndex); lengths = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.OFFSETS, totalOffsets, positionIndex); for (int i = 0; i < numFields; ++i) { int[] fStartOffsets = startOffsets[i]; int[] fPositions = positions[i]; // patch offsets from positions if (fStartOffsets != null && fPositions != null) { float fieldCharsPerTerm = charsPerTerm[fieldNumOffs[i]]; for (int j = 0; j < startOffsets[i].Length; ++j) { fStartOffsets[j] += (int)(fieldCharsPerTerm * fPositions[j]); } } if (fStartOffsets != null) { int[] fPrefixLengths = prefixLengths[i]; int[] fSuffixLengths = suffixLengths[i]; int[] fLengths = lengths[i]; for (int j = 0, end = (int)numTerms.Get(skip + i); j < end; ++j) { // delta-decode start offsets and patch lengths using term lengths int termLength = fPrefixLengths[j] + fSuffixLengths[j]; lengths[i][positionIndex[i][j]] += termLength; for (int k = positionIndex[i][j] + 1; k < positionIndex[i][j + 1]; ++k) { fStartOffsets[k] += fStartOffsets[k - 1]; fLengths[k] += termLength; } } } } } else { startOffsets = lengths = new int[numFields][]; } if (totalPositions > 0) { // delta-decode positions for (int i = 0; i < numFields; ++i) { int[] fPositions = positions[i]; int[] fpositionIndex = positionIndex[i]; if (fPositions != null) { for (int j = 0, end = (int)numTerms.Get(skip + i); j < end; ++j) { // delta-decode start offsets for (int k = fpositionIndex[j] + 1; k < fpositionIndex[j + 1]; ++k) { fPositions[k] += fPositions[k - 1]; } } } } } // payload lengths int[][] payloadIndex = new int[numFields][]; int totalPayloadLength = 0; int payloadOff = 0; int payloadLen = 0; if (totalPayloads > 0) { reader.Reset(vectorsStream, totalPayloads); // skip int termIndex = 0; for (int i = 0; i < skip; ++i) { int f = (int)flags.Get(i); int termCount = (int)numTerms.Get(i); if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0) { for (int j = 0; j < termCount; ++j) { int freq = termFreqs[termIndex + j]; for (int k = 0; k < freq; ++k) { int l = (int)reader.Next(); payloadOff += l; } } } termIndex += termCount; } totalPayloadLength = payloadOff; // read doc payload lengths for (int i = 0; i < numFields; ++i) { int f = (int)flags.Get(skip + i); int termCount = (int)numTerms.Get(skip + i); if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0) { int totalFreq = positionIndex[i][termCount]; payloadIndex[i] = new int[totalFreq + 1]; int posIdx = 0; payloadIndex[i][posIdx] = payloadLen; for (int j = 0; j < termCount; ++j) { int freq = termFreqs[termIndex + j]; for (int k = 0; k < freq; ++k) { int payloadLength = (int)reader.Next(); payloadLen += payloadLength; payloadIndex[i][posIdx + 1] = payloadLen; ++posIdx; } } Debug.Assert(posIdx == totalFreq); } termIndex += termCount; } totalPayloadLength += payloadLen; for (int i = skip + numFields; i < totalFields; ++i) { int f = (int)flags.Get(i); int termCount = (int)numTerms.Get(i); if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0) { for (int j = 0; j < termCount; ++j) { int freq = termFreqs[termIndex + j]; for (int k = 0; k < freq; ++k) { totalPayloadLength += (int)reader.Next(); } } } termIndex += termCount; } Debug.Assert(termIndex == totalTerms, termIndex + " " + totalTerms); } // decompress data BytesRef suffixBytes = new BytesRef(); decompressor.Decompress(vectorsStream, totalLen + totalPayloadLength, docOff + payloadOff, docLen + payloadLen, suffixBytes); suffixBytes.Length = docLen; BytesRef payloadBytes = new BytesRef(suffixBytes.Bytes, suffixBytes.Offset + docLen, payloadLen); int[] FieldFlags = new int[numFields]; for (int i = 0; i < numFields; ++i) { FieldFlags[i] = (int)flags.Get(skip + i); } int[] fieldNumTerms = new int[numFields]; for (int i = 0; i < numFields; ++i) { fieldNumTerms[i] = (int)numTerms.Get(skip + i); } int[][] fieldTermFreqs = new int[numFields][]; { int termIdx = 0; for (int i = 0; i < skip; ++i) { termIdx += (int)numTerms.Get(i); } for (int i = 0; i < numFields; ++i) { int termCount = (int)numTerms.Get(skip + i); fieldTermFreqs[i] = new int[termCount]; for (int j = 0; j < termCount; ++j) { fieldTermFreqs[i][j] = termFreqs[termIdx++]; } } } Debug.Assert(Sum(fieldLengths) == docLen, Sum(fieldLengths) + " != " + docLen); return(new TVFields(this, fieldNums, FieldFlags, fieldNumOffs, fieldNumTerms, fieldLengths, prefixLengths, suffixLengths, fieldTermFreqs, positionIndex, positions, startOffsets, lengths, payloadBytes, payloadIndex, suffixBytes)); }
private void ReadFields(IndexInput meta, FieldInfos infos) { int fieldNumber = meta.ReadVInt32(); while (fieldNumber != -1) { int fieldType = meta.ReadByte(); if (fieldType == NUMBER) { var entry = new NumericEntry { offset = meta.ReadInt64(), missingOffset = meta.ReadInt64() }; if (entry.missingOffset != -1) { entry.missingBytes = meta.ReadInt64(); } else { entry.missingBytes = 0; } entry.format = meta.ReadByte(); switch (entry.format) { case DELTA_COMPRESSED: case TABLE_COMPRESSED: case GCD_COMPRESSED: case UNCOMPRESSED: break; default: throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta); } if (entry.format != UNCOMPRESSED) { entry.packedIntsVersion = meta.ReadVInt32(); } numerics[fieldNumber] = entry; } else if (fieldType == BYTES) { var entry = new BinaryEntry { offset = meta.ReadInt64(), numBytes = meta.ReadInt64(), missingOffset = meta.ReadInt64() }; if (entry.missingOffset != -1) { entry.missingBytes = meta.ReadInt64(); } else { entry.missingBytes = 0; } entry.minLength = meta.ReadVInt32(); entry.maxLength = meta.ReadVInt32(); if (entry.minLength != entry.maxLength) { entry.packedIntsVersion = meta.ReadVInt32(); entry.blockSize = meta.ReadVInt32(); } binaries[fieldNumber] = entry; } else if (fieldType == FST) { var entry = new FSTEntry { offset = meta.ReadInt64(), numOrds = meta.ReadVInt64() }; fsts[fieldNumber] = entry; } else { throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta); } fieldNumber = meta.ReadVInt32(); } }
public virtual void TestRandomAccessClones() { SetUp_2(); CompoundFileDirectory cr = new CompoundFileDirectory(Dir, "f.comp", NewIOContext(Random), false); // Open two files IndexInput e1 = cr.OpenInput("f11", NewIOContext(Random)); IndexInput e2 = cr.OpenInput("f3", NewIOContext(Random)); IndexInput a1 = (IndexInput)e1.Clone(); IndexInput a2 = (IndexInput)e2.Clone(); // Seek the first pair e1.Seek(100); a1.Seek(100); Assert.AreEqual(100, e1.GetFilePointer()); Assert.AreEqual(100, a1.GetFilePointer()); byte be1 = e1.ReadByte(); byte ba1 = a1.ReadByte(); Assert.AreEqual(be1, ba1); // Now seek the second pair e2.Seek(1027); a2.Seek(1027); Assert.AreEqual(1027, e2.GetFilePointer()); Assert.AreEqual(1027, a2.GetFilePointer()); byte be2 = e2.ReadByte(); byte ba2 = a2.ReadByte(); Assert.AreEqual(be2, ba2); // Now make sure the first one didn't move Assert.AreEqual(101, e1.GetFilePointer()); Assert.AreEqual(101, a1.GetFilePointer()); be1 = e1.ReadByte(); ba1 = a1.ReadByte(); Assert.AreEqual(be1, ba1); // Now more the first one again, past the buffer length e1.Seek(1910); a1.Seek(1910); Assert.AreEqual(1910, e1.GetFilePointer()); Assert.AreEqual(1910, a1.GetFilePointer()); be1 = e1.ReadByte(); ba1 = a1.ReadByte(); Assert.AreEqual(be1, ba1); // Now make sure the second set didn't move Assert.AreEqual(1028, e2.GetFilePointer()); Assert.AreEqual(1028, a2.GetFilePointer()); be2 = e2.ReadByte(); ba2 = a2.ReadByte(); Assert.AreEqual(be2, ba2); // Move the second set back, again cross the buffer size e2.Seek(17); a2.Seek(17); Assert.AreEqual(17, e2.GetFilePointer()); Assert.AreEqual(17, a2.GetFilePointer()); be2 = e2.ReadByte(); ba2 = a2.ReadByte(); Assert.AreEqual(be2, ba2); // Finally, make sure the first set didn't move // Now make sure the first one didn't move Assert.AreEqual(1911, e1.GetFilePointer()); Assert.AreEqual(1911, a1.GetFilePointer()); be1 = e1.ReadByte(); ba1 = a1.ReadByte(); Assert.AreEqual(be1, ba1); e1.Dispose(); e2.Dispose(); a1.Dispose(); a2.Dispose(); cr.Dispose(); }
/// <summary> /// reads from legacy 3.x segments_N </summary> private SegmentCommitInfo ReadLegacySegmentInfo(Directory dir, int format, IndexInput input) { // check that it is a format we can understand if (format > Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS) { throw new IndexFormatTooOldException(input, format, Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1); } if (format < Lucene3xSegmentInfoFormat.FORMAT_3_1) { throw new IndexFormatTooNewException(input, format, Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1); } string version; if (format <= Lucene3xSegmentInfoFormat.FORMAT_3_1) { version = input.ReadString(); } else { version = null; } string name = input.ReadString(); int docCount = input.ReadInt(); long delGen = input.ReadLong(); int docStoreOffset = input.ReadInt(); IDictionary<string, string> attributes = new Dictionary<string, string>(); // parse the docstore stuff and shove it into attributes string docStoreSegment; bool docStoreIsCompoundFile; if (docStoreOffset != -1) { docStoreSegment = input.ReadString(); docStoreIsCompoundFile = input.ReadByte() == SegmentInfo.YES; attributes[Lucene3xSegmentInfoFormat.DS_OFFSET_KEY] = Convert.ToString(docStoreOffset); attributes[Lucene3xSegmentInfoFormat.DS_NAME_KEY] = docStoreSegment; attributes[Lucene3xSegmentInfoFormat.DS_COMPOUND_KEY] = Convert.ToString(docStoreIsCompoundFile); } else { docStoreSegment = name; docStoreIsCompoundFile = false; } // pre-4.0 indexes write a byte if there is a single norms file byte b = input.ReadByte(); //System.out.println("version=" + version + " name=" + name + " docCount=" + docCount + " delGen=" + delGen + " dso=" + docStoreOffset + " dss=" + docStoreSegment + " dssCFs=" + docStoreIsCompoundFile + " b=" + b + " format=" + format); Debug.Assert(1 == b, "expected 1 but was: " + b + " format: " + format); int numNormGen = input.ReadInt(); IDictionary<int, long> normGen; if (numNormGen == SegmentInfo.NO) { normGen = null; } else { normGen = new Dictionary<int, long>(); for (int j = 0; j < numNormGen; j++) { normGen[j] = input.ReadLong(); } } bool isCompoundFile = input.ReadByte() == SegmentInfo.YES; int delCount = input.ReadInt(); Debug.Assert(delCount <= docCount); bool hasProx = input.ReadByte() == 1; IDictionary<string, string> diagnostics = input.ReadStringStringMap(); if (format <= Lucene3xSegmentInfoFormat.FORMAT_HAS_VECTORS) { // NOTE: unused int hasVectors = input.ReadByte(); } // Replicate logic from 3.x's SegmentInfo.files(): ISet<string> files = new HashSet<string>(); if (isCompoundFile) { files.Add(IndexFileNames.SegmentFileName(name, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); } else { AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xFieldInfosReader.FIELD_INFOS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.FREQ_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.PROX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.TERMS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xNormsProducer.NORMS_EXTENSION)); } if (docStoreOffset != -1) { if (docStoreIsCompoundFile) { files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION)); } else { files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION)); } } else if (!isCompoundFile) { files.Add(IndexFileNames.SegmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); files.Add(IndexFileNames.SegmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION)); } // parse the normgen stuff and shove it into attributes if (normGen != null) { attributes[Lucene3xSegmentInfoFormat.NORMGEN_KEY] = Convert.ToString(numNormGen); foreach (KeyValuePair<int, long> ent in normGen) { long gen = ent.Value; if (gen >= SegmentInfo.YES) { // Definitely a separate norm file, with generation: files.Add(IndexFileNames.FileNameFromGeneration(name, "s" + ent.Key, gen)); attributes[Lucene3xSegmentInfoFormat.NORMGEN_PREFIX + ent.Key] = Convert.ToString(gen); } else if (gen == SegmentInfo.NO) { // No separate norm } else { // We should have already hit indexformat too old exception Debug.Assert(false); } } } SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, null, diagnostics, CollectionsHelper.UnmodifiableMap(attributes)); info.Files = files; SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, delCount, delGen, -1); return infoPerCommit; }
public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext) { string fileName = IndexFileNames.SegmentFileName(segmentName, "", Lucene42FieldInfosFormat.EXTENSION); IndexInput input = directory.OpenInput(fileName, iocontext); bool success = false; try { CodecUtil.CheckHeader(input, Lucene42FieldInfosFormat.CODEC_NAME, Lucene42FieldInfosFormat.FORMAT_START, Lucene42FieldInfosFormat.FORMAT_CURRENT); int size = input.ReadVInt32(); //read in the size FieldInfo[] infos = new FieldInfo[size]; for (int i = 0; i < size; i++) { string name = input.ReadString(); int fieldNumber = input.ReadVInt32(); sbyte bits = (sbyte)input.ReadByte(); bool isIndexed = (bits & Lucene42FieldInfosFormat.IS_INDEXED) != 0; bool storeTermVector = (bits & Lucene42FieldInfosFormat.STORE_TERMVECTOR) != 0; bool omitNorms = (bits & Lucene42FieldInfosFormat.OMIT_NORMS) != 0; bool storePayloads = (bits & Lucene42FieldInfosFormat.STORE_PAYLOADS) != 0; IndexOptions indexOptions; if (!isIndexed) { indexOptions = IndexOptions.NONE; } else if ((bits & Lucene42FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) { indexOptions = IndexOptions.DOCS_ONLY; } else if ((bits & Lucene42FieldInfosFormat.OMIT_POSITIONS) != 0) { indexOptions = IndexOptions.DOCS_AND_FREQS; } else if ((bits & Lucene42FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) { indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; } else { indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; } // DV Types are packed in one byte sbyte val = (sbyte)input.ReadByte(); DocValuesType docValuesType = GetDocValuesType(input, (byte)(val & 0x0F)); DocValuesType normsType = GetDocValuesType(input, (byte)((val.TripleShift(4)) & 0x0F)); IDictionary <string, string> attributes = input.ReadStringStringMap(); infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, normsType, attributes.AsReadOnly()); } CodecUtil.CheckEOF(input); FieldInfos fieldInfos = new FieldInfos(infos); success = true; return(fieldInfos); } finally { if (success) { input.Dispose(); } else { IOUtils.DisposeWhileHandlingException(input); } } }
public override long Get(int index) { long majorBitPos = (long)index * m_bitsPerValue; long elementPos = (long)((ulong)majorBitPos >> 3); try { @in.Seek(startPointer + elementPos); int bitPos = (int)(majorBitPos & 7); // round up bits to a multiple of 8 to find total bytes needed to read int roundedBits = ((bitPos + m_bitsPerValue + 7) & ~7); // the number of extra bits read at the end to shift out int shiftRightBits = roundedBits - bitPos - m_bitsPerValue; long rawValue; switch ((int)((uint)roundedBits >> 3)) { case 1: rawValue = @in.ReadByte(); break; case 2: rawValue = @in.ReadInt16(); break; case 3: rawValue = ((long)@in.ReadInt16() << 8) | (@in.ReadByte() & 0xFFL); break; case 4: rawValue = @in.ReadInt32(); break; case 5: rawValue = ((long)@in.ReadInt32() << 8) | (@in.ReadByte() & 0xFFL); break; case 6: rawValue = ((long)@in.ReadInt32() << 16) | (@in.ReadInt16() & 0xFFFFL); break; case 7: rawValue = ((long)@in.ReadInt32() << 24) | ((@in.ReadInt16() & 0xFFFFL) << 8) | (@in.ReadByte() & 0xFFL); break; case 8: rawValue = @in.ReadInt64(); break; case 9: // We must be very careful not to shift out relevant bits. So we account for right shift // we would normally do on return here, and reset it. rawValue = (@in.ReadInt64() << (8 - shiftRightBits)) | (((uint)(@in.ReadByte() & 0xFFL) >> shiftRightBits)); shiftRightBits = 0; break; default: throw new InvalidOperationException("bitsPerValue too large: " + m_bitsPerValue); } return(((long)((ulong)rawValue >> shiftRightBits)) & valueMask); } catch (IOException ioe) { throw new InvalidOperationException("failed", ioe); } }
private SegmentInfo ReadUpgradedSegmentInfo(string name, Directory dir, IndexInput input) { CodecUtil.CheckHeader(input, Lucene3xSegmentInfoFormat.UPGRADED_SI_CODEC_NAME, Lucene3xSegmentInfoFormat.UPGRADED_SI_VERSION_START, Lucene3xSegmentInfoFormat.UPGRADED_SI_VERSION_CURRENT); string version = input.ReadString(); int docCount = input.ReadInt(); IDictionary<string, string> attributes = input.ReadStringStringMap(); bool isCompoundFile = input.ReadByte() == SegmentInfo.YES; IDictionary<string, string> diagnostics = input.ReadStringStringMap(); ISet<string> files = input.ReadStringSet(); SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, null, diagnostics, CollectionsHelper.UnmodifiableMap(attributes)); info.Files = files; return info; }
private void ReadFields(IndexInput meta, FieldInfos infos) { int fieldNumber = meta.ReadVInt32(); while (fieldNumber != -1) { // check should be: infos.fieldInfo(fieldNumber) != null, which incorporates negative check // but docvalues updates are currently buggy here (loading extra stuff, etc): LUCENE-5616 if (fieldNumber < 0) { // trickier to validate more: because we re-use for norms, because we use multiple entries // for "composite" types like sortedset, etc. throw new CorruptIndexException("Invalid field number: " + fieldNumber + ", input=" + meta); } int fieldType = meta.ReadByte(); if (fieldType == NUMBER) { var entry = new NumericEntry(); entry.Offset = meta.ReadInt64(); entry.Format = (sbyte)meta.ReadByte(); switch (entry.Format) { case DELTA_COMPRESSED: case TABLE_COMPRESSED: case GCD_COMPRESSED: case UNCOMPRESSED: break; default: throw new CorruptIndexException("Unknown format: " + entry.Format + ", input=" + meta); } if (entry.Format != UNCOMPRESSED) { entry.PackedInt32sVersion = meta.ReadVInt32(); } numerics[fieldNumber] = entry; } else if (fieldType == BYTES) { BinaryEntry entry = new BinaryEntry(); entry.Offset = meta.ReadInt64(); entry.NumBytes = meta.ReadInt64(); entry.MinLength = meta.ReadVInt32(); entry.MaxLength = meta.ReadVInt32(); if (entry.MinLength != entry.MaxLength) { entry.PackedInt32sVersion = meta.ReadVInt32(); entry.BlockSize = meta.ReadVInt32(); } binaries[fieldNumber] = entry; } else if (fieldType == FST) { FSTEntry entry = new FSTEntry(); entry.Offset = meta.ReadInt64(); entry.NumOrds = meta.ReadVInt64(); fsts[fieldNumber] = entry; } else { throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta); } fieldNumber = meta.ReadVInt32(); } }
public override byte ReadByte() { return(_cacheDirIndexInput.ReadByte()); }
public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext) { string fileName = IndexFileNames.SegmentFileName(segmentName, "", FIELD_INFOS_EXTENSION); IndexInput input = directory.OpenInput(fileName, iocontext); bool success = false; try { int format = input.ReadVInt32(); if (format > FORMAT_MINIMUM) { throw new IndexFormatTooOldException(input, format, FORMAT_MINIMUM, FORMAT_CURRENT); } if (format < FORMAT_CURRENT) { throw new IndexFormatTooNewException(input, format, FORMAT_MINIMUM, FORMAT_CURRENT); } int size = input.ReadVInt32(); //read in the size FieldInfo[] infos = new FieldInfo[size]; for (int i = 0; i < size; i++) { string name = input.ReadString(); int fieldNumber = i; byte bits = input.ReadByte(); bool isIndexed = (bits & IS_INDEXED) != 0; bool storeTermVector = (bits & STORE_TERMVECTOR) != 0; bool omitNorms = (bits & OMIT_NORMS) != 0; bool storePayloads = (bits & STORE_PAYLOADS) != 0; IndexOptions indexOptions; if (!isIndexed) { indexOptions = IndexOptions.NONE; } else if ((bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0) { indexOptions = IndexOptions.DOCS_ONLY; } else if ((bits & OMIT_POSITIONS) != 0) { if (format <= FORMAT_OMIT_POSITIONS) { indexOptions = IndexOptions.DOCS_AND_FREQS; } else { throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format + " (resource: " + input + ")"); } } else { indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; } // LUCENE-3027: past indices were able to write // storePayloads=true when omitTFAP is also true, // which is invalid. We correct that, here: if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { storePayloads = false; } infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, DocValuesType.NONE, isIndexed && !omitNorms ? DocValuesType.NUMERIC : DocValuesType.NONE, Collections.EmptyMap <string, string>()); } if (input.GetFilePointer() != input.Length) { throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.GetFilePointer() + " vs size " + input.Length + " (resource: " + input + ")"); } FieldInfos fieldInfos = new FieldInfos(infos); success = true; return(fieldInfos); } finally { if (success) { input.Dispose(); } else { IOUtils.DisposeWhileHandlingException(input); } } }
public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext) { string fileName = IndexFileNames.SegmentFileName(segmentName, "", PreFlexRWFieldInfosWriter.FIELD_INFOS_EXTENSION); IndexInput input = directory.OpenInput(fileName, iocontext); try { int format = input.ReadVInt32(); if (format > FORMAT_MINIMUM) { throw new IndexFormatTooOldException(input, format, FORMAT_MINIMUM, PreFlexRWFieldInfosWriter.FORMAT_CURRENT); } if (format < PreFlexRWFieldInfosWriter.FORMAT_CURRENT && format != PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW) { throw new IndexFormatTooNewException(input, format, FORMAT_MINIMUM, PreFlexRWFieldInfosWriter.FORMAT_CURRENT); } int size = input.ReadVInt32(); //read in the size FieldInfo[] infos = new FieldInfo[size]; for (int i = 0; i < size; i++) { string name = input.ReadString(); int fieldNumber = format == PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW ? input.ReadInt32() : i; byte bits = input.ReadByte(); bool isIndexed = (bits & PreFlexRWFieldInfosWriter.IS_INDEXED) != 0; bool storeTermVector = (bits & PreFlexRWFieldInfosWriter.STORE_TERMVECTOR) != 0; bool omitNorms = (bits & PreFlexRWFieldInfosWriter.OMIT_NORMS) != 0; bool storePayloads = (bits & PreFlexRWFieldInfosWriter.STORE_PAYLOADS) != 0; IndexOptions indexOptions; if (!isIndexed) { indexOptions = IndexOptions.NONE; } else if ((bits & PreFlexRWFieldInfosWriter.OMIT_TERM_FREQ_AND_POSITIONS) != 0) { indexOptions = IndexOptions.DOCS_ONLY; } else if ((bits & PreFlexRWFieldInfosWriter.OMIT_POSITIONS) != 0) { if (format <= PreFlexRWFieldInfosWriter.FORMAT_OMIT_POSITIONS) { indexOptions = IndexOptions.DOCS_AND_FREQS; } else { throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format + " (resource: " + input + ")"); } } else { indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; } // LUCENE-3027: past indices were able to write // storePayloads=true when omitTFAP is also true, // which is invalid. We correct that, here: if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { storePayloads = false; } DocValuesType normType = isIndexed && !omitNorms ? DocValuesType.NUMERIC : DocValuesType.NONE; if (format == PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW && normType != DocValuesType.NONE) { // RW can have norms but doesn't write them normType = input.ReadByte() != 0 ? DocValuesType.NUMERIC : DocValuesType.NONE; } infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, DocValuesType.NONE, normType, null); } if (input.GetFilePointer() != input.Length) { throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.GetFilePointer() + " vs size " + input.Length + " (resource: " + input + ")"); } return(new FieldInfos(infos)); } finally { input.Dispose(); } }
public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext) { string fileName = IndexFileNames.SegmentFileName(segmentName, "", Lucene40FieldInfosFormat.FIELD_INFOS_EXTENSION); IndexInput input = directory.OpenInput(fileName, iocontext); bool success = false; try { CodecUtil.CheckHeader(input, Lucene40FieldInfosFormat.CODEC_NAME, Lucene40FieldInfosFormat.FORMAT_START, Lucene40FieldInfosFormat.FORMAT_CURRENT); int size = input.ReadVInt(); //read in the size FieldInfo[] infos = new FieldInfo[size]; for (int i = 0; i < size; i++) { string name = input.ReadString(); int fieldNumber = input.ReadVInt(); byte bits = input.ReadByte(); bool isIndexed = (bits & Lucene40FieldInfosFormat.IS_INDEXED) != 0; bool storeTermVector = (bits & Lucene40FieldInfosFormat.STORE_TERMVECTOR) != 0; bool omitNorms = (bits & Lucene40FieldInfosFormat.OMIT_NORMS) != 0; bool storePayloads = (bits & Lucene40FieldInfosFormat.STORE_PAYLOADS) != 0; FieldInfo.IndexOptions indexOptions; if (!isIndexed) { indexOptions = default(FieldInfo.IndexOptions); } else if ((bits & Lucene40FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) { indexOptions = FieldInfo.IndexOptions.DOCS_ONLY; } else if ((bits & Lucene40FieldInfosFormat.OMIT_POSITIONS) != 0) { indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; } else if ((bits & Lucene40FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) { indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; } else { indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; } // LUCENE-3027: past indices were able to write // storePayloads=true when omitTFAP is also true, // which is invalid. We correct that, here: if (isIndexed && indexOptions.CompareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { storePayloads = false; } // DV Types are packed in one byte byte val = input.ReadByte(); LegacyDocValuesType oldValuesType = GetDocValuesType((sbyte)(val & 0x0F)); LegacyDocValuesType oldNormsType = GetDocValuesType((sbyte)(((int)((uint)val >> 4)) & 0x0F)); IDictionary <string, string> attributes = input.ReadStringStringMap(); if (oldValuesType.Mapping != null) { attributes[LEGACY_DV_TYPE_KEY] = oldValuesType.Name; } if (oldNormsType.Mapping != null) { if (oldNormsType.Mapping != FieldInfo.DocValuesType_e.NUMERIC) { throw new CorruptIndexException("invalid norm type: " + oldNormsType + " (resource=" + input + ")"); } attributes[LEGACY_NORM_TYPE_KEY] = oldNormsType.Name; } infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, oldValuesType.Mapping, oldNormsType.Mapping, attributes); } CodecUtil.CheckEOF(input); FieldInfos fieldInfos = new FieldInfos(infos); success = true; return(fieldInfos); } finally { if (success) { input.Dispose(); } else { IOUtils.CloseWhileHandlingException(input); } } }
/// <summary> /// reads from legacy 3.x segments_N </summary> private SegmentCommitInfo ReadLegacySegmentInfo(Directory dir, int format, IndexInput input) { // check that it is a format we can understand if (format > Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS) { throw new IndexFormatTooOldException(input, format, Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1); } if (format < Lucene3xSegmentInfoFormat.FORMAT_3_1) { throw new IndexFormatTooNewException(input, format, Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1); } string version; if (format <= Lucene3xSegmentInfoFormat.FORMAT_3_1) { version = input.ReadString(); } else { version = null; } string name = input.ReadString(); int docCount = input.ReadInt(); long delGen = input.ReadLong(); int docStoreOffset = input.ReadInt(); IDictionary <string, string> attributes = new Dictionary <string, string>(); // parse the docstore stuff and shove it into attributes string docStoreSegment; bool docStoreIsCompoundFile; if (docStoreOffset != -1) { docStoreSegment = input.ReadString(); docStoreIsCompoundFile = input.ReadByte() == SegmentInfo.YES; attributes[Lucene3xSegmentInfoFormat.DS_OFFSET_KEY] = Convert.ToString(docStoreOffset); attributes[Lucene3xSegmentInfoFormat.DS_NAME_KEY] = docStoreSegment; attributes[Lucene3xSegmentInfoFormat.DS_COMPOUND_KEY] = Convert.ToString(docStoreIsCompoundFile); } else { docStoreSegment = name; docStoreIsCompoundFile = false; } // pre-4.0 indexes write a byte if there is a single norms file byte b = input.ReadByte(); //System.out.println("version=" + version + " name=" + name + " docCount=" + docCount + " delGen=" + delGen + " dso=" + docStoreOffset + " dss=" + docStoreSegment + " dssCFs=" + docStoreIsCompoundFile + " b=" + b + " format=" + format); Debug.Assert(1 == b, "expected 1 but was: " + b + " format: " + format); int numNormGen = input.ReadInt(); IDictionary <int, long> normGen; if (numNormGen == SegmentInfo.NO) { normGen = null; } else { normGen = new Dictionary <int, long>(); for (int j = 0; j < numNormGen; j++) { normGen[j] = input.ReadLong(); } } bool isCompoundFile = input.ReadByte() == SegmentInfo.YES; int delCount = input.ReadInt(); Debug.Assert(delCount <= docCount); bool hasProx = input.ReadByte() == 1; IDictionary <string, string> diagnostics = input.ReadStringStringMap(); if (format <= Lucene3xSegmentInfoFormat.FORMAT_HAS_VECTORS) { // NOTE: unused int hasVectors = input.ReadByte(); } // Replicate logic from 3.x's SegmentInfo.files(): ISet <string> files = new HashSet <string>(); if (isCompoundFile) { files.Add(IndexFileNames.SegmentFileName(name, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); } else { AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xFieldInfosReader.FIELD_INFOS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.FREQ_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.PROX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.TERMS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xNormsProducer.NORMS_EXTENSION)); } if (docStoreOffset != -1) { if (docStoreIsCompoundFile) { files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION)); } else { files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION)); } } else if (!isCompoundFile) { files.Add(IndexFileNames.SegmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); files.Add(IndexFileNames.SegmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION)); } // parse the normgen stuff and shove it into attributes if (normGen != null) { attributes[Lucene3xSegmentInfoFormat.NORMGEN_KEY] = Convert.ToString(numNormGen); foreach (KeyValuePair <int, long> ent in normGen) { long gen = ent.Value; if (gen >= SegmentInfo.YES) { // Definitely a separate norm file, with generation: files.Add(IndexFileNames.FileNameFromGeneration(name, "s" + ent.Key, gen)); attributes[Lucene3xSegmentInfoFormat.NORMGEN_PREFIX + ent.Key] = Convert.ToString(gen); } else if (gen == SegmentInfo.NO) { // No separate norm } else { // We should have already hit indexformat too old exception Debug.Assert(false); } } } SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, null, diagnostics, CollectionsHelper.UnmodifiableMap(attributes)); info.Files = files; SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, delCount, delGen, -1); return(infoPerCommit); }