internal static BinaryEntry ReadBinaryEntry(IndexInput meta) { BinaryEntry entry = new BinaryEntry(); entry.format = meta.ReadVInt32(); entry.missingOffset = meta.ReadInt64(); entry.minLength = meta.ReadVInt32(); entry.maxLength = meta.ReadVInt32(); entry.Count = meta.ReadVInt64(); entry.offset = meta.ReadInt64(); switch (entry.format) { case Lucene45DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED: break; case Lucene45DocValuesConsumer.BINARY_PREFIX_COMPRESSED: entry.AddressInterval = meta.ReadVInt32(); entry.AddressesOffset = meta.ReadInt64(); entry.PackedInt32sVersion = meta.ReadVInt32(); entry.BlockSize = meta.ReadVInt32(); break; case Lucene45DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED: entry.AddressesOffset = meta.ReadInt64(); entry.PackedInt32sVersion = meta.ReadVInt32(); entry.BlockSize = meta.ReadVInt32(); break; default: throw new Exception("Unknown format: " + entry.format + ", input=" + meta); } return(entry); }
/// <summary> /// NOTE: This was loadVarIntsField() in Lucene. /// </summary> private NumericDocValues LoadVarInt32sField(/*FieldInfo field, // LUCENENET: Never read */ IndexInput input) { CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.VAR_INTS_CODEC_NAME, Lucene40DocValuesFormat.VAR_INTS_VERSION_START, Lucene40DocValuesFormat.VAR_INTS_VERSION_CURRENT); var header = (sbyte)input.ReadByte(); if (header == Lucene40DocValuesFormat.VAR_INTS_FIXED_64) { int maxDoc = state.SegmentInfo.DocCount; var values = new long[maxDoc]; for (int i = 0; i < values.Length; i++) { values[i] = input.ReadInt64(); } ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(values)); return(new NumericDocValuesAnonymousClass(values)); } else if (header == Lucene40DocValuesFormat.VAR_INTS_PACKED) { long minValue = input.ReadInt64(); long defaultValue = input.ReadInt64(); PackedInt32s.Reader reader = PackedInt32s.GetReader(input); ramBytesUsed.AddAndGet(reader.RamBytesUsed()); return(new NumericDocValuesAnonymousClass2(minValue, defaultValue, reader)); } else { throw new CorruptIndexException("invalid VAR_INTS header byte: " + header + " (resource=" + input + ")"); } }
public override void VisitDocument(int n, StoredFieldVisitor visitor) { SeekIndex(n); fieldsStream.Seek(indexStream.ReadInt64()); int numFields = fieldsStream.ReadVInt32(); for (int fieldIDX = 0; fieldIDX < numFields; fieldIDX++) { int fieldNumber = fieldsStream.ReadVInt32(); FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber); int bits = fieldsStream.ReadByte() & 0xFF; Debug.Assert(bits <= (Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_MASK | Lucene40StoredFieldsWriter.FIELD_IS_BINARY), "bits=" + bits.ToString("x")); switch (visitor.NeedsField(fieldInfo)) { case StoredFieldVisitor.Status.YES: ReadField(visitor, fieldInfo, bits); break; case StoredFieldVisitor.Status.NO: SkipField(bits); break; case StoredFieldVisitor.Status.STOP: return; } } }
/// <summary> /// Retrieve the length (in bytes) of the tvd and tvf /// entries for the next <paramref name="numDocs"/> starting with /// <paramref name="startDocID"/>. This is used for bulk copying when /// merging segments, if the field numbers are /// congruent. Once this returns, the tvf & tvd streams /// are seeked to the <paramref name="startDocID"/>. /// </summary> internal void RawDocs(int[] tvdLengths, int[] tvfLengths, int startDocID, int numDocs) { if (tvx == null) { Arrays.Fill(tvdLengths, 0); Arrays.Fill(tvfLengths, 0); return; } SeekTvx(startDocID); long tvdPosition = tvx.ReadInt64(); tvd.Seek(tvdPosition); long tvfPosition = tvx.ReadInt64(); tvf.Seek(tvfPosition); long lastTvdPosition = tvdPosition; long lastTvfPosition = tvfPosition; int count = 0; while (count < numDocs) { int docID = startDocID + count + 1; if (Debugging.AssertsEnabled) { Debugging.Assert(docID <= numTotalDocs); } if (docID < numTotalDocs) { tvdPosition = tvx.ReadInt64(); tvfPosition = tvx.ReadInt64(); } else { tvdPosition = tvd.Length; tvfPosition = tvf.Length; if (Debugging.AssertsEnabled) { Debugging.Assert(count == numDocs - 1); } } tvdLengths[count] = (int)(tvdPosition - lastTvdPosition); tvfLengths[count] = (int)(tvfPosition - lastTvfPosition); count++; lastTvdPosition = tvdPosition; lastTvfPosition = tvfPosition; } }
private void SeekDir(IndexInput input, long dirOffset) { if (version >= BlockTermsWriter.VERSION_CHECKSUM) { input.Seek(input.Length - CodecUtil.FooterLength() - 8); dirOffset = input.ReadInt64(); } else if (version >= BlockTermsWriter.VERSION_APPEND_ONLY) { input.Seek(input.Length - 8); dirOffset = input.ReadInt64(); } input.Seek(dirOffset); }
public override sealed void VisitDocument(int n, StoredFieldVisitor visitor) { SeekIndex(n); fieldsStream.Seek(indexStream.ReadInt64()); int numFields = fieldsStream.ReadVInt32(); for (int fieldIDX = 0; fieldIDX < numFields; fieldIDX++) { int fieldNumber = fieldsStream.ReadVInt32(); FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber); int bits = fieldsStream.ReadByte() & 0xFF; if (Debugging.AssertsEnabled) { Debugging.Assert(bits <= (FIELD_IS_NUMERIC_MASK | FIELD_IS_BINARY), "bits={0:x}", bits); } switch (visitor.NeedsField(fieldInfo)) { case StoredFieldVisitor.Status.YES: ReadField(visitor, fieldInfo, bits); break; case StoredFieldVisitor.Status.NO: SkipField(bits); break; case StoredFieldVisitor.Status.STOP: return; } } }
protected override void SeekDir(IndexInput input, long dirOffset) { input.Seek(input.Length - sizeof(long) / 8); long offset = input.ReadInt64(); input.Seek(offset); }
public static void ReadLegacyInfos(SegmentInfos infos, Directory directory, IndexInput input, int format) { infos.Version = input.ReadInt64(); // read version infos.Counter = input.ReadInt32(); // read counter Lucene3xSegmentInfoReader reader = new Lucene3xSegmentInfoReader(); for (int i = input.ReadInt32(); i > 0; i--) // read segmentInfos { SegmentCommitInfo siPerCommit = reader.ReadLegacySegmentInfo(directory, format, input); SegmentInfo si = siPerCommit.Info; if (si.Version == null) { // Could be a 3.0 - try to open the doc stores - if it fails, it's a // 2.x segment, and an IndexFormatTooOldException will be thrown, // which is what we want. Directory dir = directory; if (Lucene3xSegmentInfoFormat.GetDocStoreOffset(si) != -1) { if (Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si)) { dir = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(Lucene3xSegmentInfoFormat.GetDocStoreSegment(si), "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), IOContext.READ_ONCE, false); } } else if (si.UseCompoundFile) { dir = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), IOContext.READ_ONCE, false); } try { Lucene3xStoredFieldsReader.CheckCodeVersion(dir, Lucene3xSegmentInfoFormat.GetDocStoreSegment(si)); } finally { // If we opened the directory, close it if (dir != directory) { dir.Dispose(); } } // Above call succeeded, so it's a 3.0 segment. Upgrade it so the next // time the segment is read, its version won't be null and we won't // need to open FieldsReader every time for each such segment. si.Version = "3.0"; } else if (si.Version.Equals("2.x", StringComparison.Ordinal)) { // If it's a 3x index touched by 3.1+ code, then segments record their // version, whether they are 2.x ones or not. We detect that and throw // appropriate exception. throw new IndexFormatTooOldException("segment " + si.Name + " in resource " + input, si.Version); } infos.Add(siPerCommit); } infos.UserData = input.ReadStringStringMap(); }
private void ReadField(StoredFieldVisitor visitor, FieldInfo info, int bits) { int numeric = bits & Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_MASK; if (numeric != 0) { switch (numeric) { case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_INT: visitor.Int32Field(info, fieldsStream.ReadInt32()); return; case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_LONG: visitor.Int64Field(info, fieldsStream.ReadInt64()); return; case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_FLOAT: visitor.SingleField(info, J2N.BitConversion.Int32BitsToSingle(fieldsStream.ReadInt32())); return; case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_DOUBLE: visitor.DoubleField(info, J2N.BitConversion.Int64BitsToDouble(fieldsStream.ReadInt64())); return; default: throw new CorruptIndexException("Invalid numeric type: " + numeric.ToString("x")); } } else { int length = fieldsStream.ReadVInt32(); var bytes = new byte[length]; fieldsStream.ReadBytes(bytes, 0, length); if ((bits & Lucene40StoredFieldsWriter.FIELD_IS_BINARY) != 0) { visitor.BinaryField(info, bytes); } else { #pragma warning disable 612, 618 visitor.StringField(info, IOUtils.CHARSET_UTF_8.GetString(bytes)); #pragma warning restore 612, 618 } } }
private NumericEntry ReadNumericEntry(IndexInput meta) { var entry = new NumericEntry { offset = meta.ReadInt64(), count = meta.ReadInt32(), missingOffset = meta.ReadInt64() }; if (entry.missingOffset != -1) { entry.missingBytes = meta.ReadInt64(); } else { entry.missingBytes = 0; } entry.byteWidth = meta.ReadByte(); return(entry); }
private static NumericEntry ReadNumericEntry(IndexInput meta) // LUCENENET: CA1822: Mark members as static { var entry = new NumericEntry { offset = meta.ReadInt64(), count = meta.ReadInt32(), missingOffset = meta.ReadInt64() }; if (entry.missingOffset != -1) { entry.missingBytes = meta.ReadInt64(); } else { entry.missingBytes = 0; } entry.byteWidth = meta.ReadByte(); return(entry); }
private static BinaryEntry ReadBinaryEntry(IndexInput meta) // LUCENENET: CA1822: Mark members as static { var entry = new BinaryEntry(); entry.offset = meta.ReadInt64(); entry.numBytes = meta.ReadInt32(); entry.count = meta.ReadInt32(); entry.missingOffset = meta.ReadInt64(); if (entry.missingOffset != -1) { entry.missingBytes = meta.ReadInt64(); } else { entry.missingBytes = 0; } return(entry); }
private BinaryEntry ReadBinaryEntry(IndexInput meta) { var entry = new BinaryEntry(); entry.offset = meta.ReadInt64(); entry.numBytes = meta.ReadInt32(); entry.count = meta.ReadInt32(); entry.missingOffset = meta.ReadInt64(); if (entry.missingOffset != -1) { entry.missingBytes = meta.ReadInt64(); } else { entry.missingBytes = 0; } return(entry); }
internal static NumericEntry ReadNumericEntry(IndexInput meta) { NumericEntry entry = new NumericEntry(); entry.format = meta.ReadVInt32(); entry.missingOffset = meta.ReadInt64(); entry.PackedInt32sVersion = meta.ReadVInt32(); entry.Offset = meta.ReadInt64(); entry.Count = meta.ReadVInt64(); entry.BlockSize = meta.ReadVInt32(); switch (entry.format) { case Lucene45DocValuesConsumer.GCD_COMPRESSED: entry.minValue = meta.ReadInt64(); entry.gcd = meta.ReadInt64(); break; case Lucene45DocValuesConsumer.TABLE_COMPRESSED: if (entry.Count > int.MaxValue) { throw new Exception("Cannot use TABLE_COMPRESSED with more than MAX_VALUE values, input=" + meta); } int uniqueValues = meta.ReadVInt32(); if (uniqueValues > 256) { throw new Exception("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + meta); } entry.table = new long[uniqueValues]; for (int i = 0; i < uniqueValues; ++i) { entry.table[i] = meta.ReadInt64(); } break; case Lucene45DocValuesConsumer.DELTA_COMPRESSED: break; default: throw new Exception("Unknown format: " + entry.format + ", input=" + meta); } return(entry); }
private int ReadHeader(IndexInput input) { int version = CodecUtil.CheckHeader(input, VariableGapTermsIndexWriter.CODEC_NAME, VariableGapTermsIndexWriter.VERSION_START, VariableGapTermsIndexWriter.VERSION_CURRENT); if (version < VariableGapTermsIndexWriter.VERSION_APPEND_ONLY) { dirOffset = input.ReadInt64(); } return(version); }
private void SeekDir(IndexInput @in) { if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) { @in.Seek(@in.Length - CodecUtil.FooterLength() - 8); } else { @in.Seek(@in.Length - 8); } @in.Seek(@in.ReadInt64()); }
public SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi) { input = i; fieldInfos = fis; isIndex = isi; maxSkipLevels = 1; // use single-level skip lists for formats > -3 int firstInt = input.ReadInt32(); if (firstInt >= 0) { // original-format file, without explicit format version number format = 0; size = firstInt; // back-compatible settings indexInterval = 128; skipInterval = int.MaxValue; // switch off skipTo optimization } else { // we have a format version number format = firstInt; // check that it is a format we can understand if (format > FORMAT_MINIMUM) { throw new IndexFormatTooOldException(input, format, FORMAT_MINIMUM, FORMAT_CURRENT); } if (format < FORMAT_CURRENT) { throw new IndexFormatTooNewException(input, format, FORMAT_MINIMUM, FORMAT_CURRENT); } size = input.ReadInt64(); // read the size indexInterval = input.ReadInt32(); skipInterval = input.ReadInt32(); maxSkipLevels = input.ReadInt32(); if (Debugging.AssertsEnabled) { Debugging.Assert(indexInterval > 0, "indexInterval={0} is negative; must be > 0", indexInterval); Debugging.Assert(skipInterval > 0, "skipInterval={0} is negative; must be > 0", skipInterval); } } }
public override long Get(int index) { int blockOffset = index / valuesPerBlock; long skip = ((long)blockOffset) << 3; try { @in.Seek(startPointer + skip); long block = @in.ReadInt64(); int offsetInBlock = index % valuesPerBlock; return((block.TripleShift(offsetInBlock * m_bitsPerValue)) & mask); } catch (Exception e) when(e.IsIOException()) { throw IllegalStateException.Create("failed", e); } }
public override long Get(int index) { int blockOffset = index / valuesPerBlock; long skip = ((long)blockOffset) << 3; try { @in.Seek(startPointer + skip); long block = @in.ReadInt64(); int offsetInBlock = index % valuesPerBlock; return(((long)((ulong)block >> (offsetInBlock * m_bitsPerValue))) & mask); } catch (IOException e) { throw new InvalidOperationException("failed", e); } }
private SortedDocValues LoadBytesVarSorted(/*FieldInfo field, // LUCENENET: Never read */ IndexInput data, IndexInput index) { CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); long maxAddress = index.ReadInt64(); PagedBytes bytes = new PagedBytes(16); bytes.Copy(data, maxAddress); PagedBytes.Reader bytesReader = bytes.Freeze(true); PackedInt32s.Reader addressReader = PackedInt32s.GetReader(index); PackedInt32s.Reader ordsReader = PackedInt32s.GetReader(index); int valueCount = addressReader.Count - 1; ramBytesUsed.AddAndGet(bytes.RamBytesUsed() + addressReader.RamBytesUsed() + ordsReader.RamBytesUsed()); return(CorrectBuggyOrds(new SortedDocValuesAnonymousClass2(bytesReader, addressReader, ordsReader, valueCount))); }
private NumericDocValues LoadDoubleField(/*FieldInfo field, // LUCENENET: Never read */ IndexInput input) { CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.FLOATS_CODEC_NAME, Lucene40DocValuesFormat.FLOATS_VERSION_START, Lucene40DocValuesFormat.FLOATS_VERSION_CURRENT); int valueSize = input.ReadInt32(); if (valueSize != 8) { throw new CorruptIndexException("invalid valueSize: " + valueSize); } int maxDoc = state.SegmentInfo.DocCount; long[] values = new long[maxDoc]; for (int i = 0; i < values.Length; i++) { values[i] = input.ReadInt64(); } ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(values)); return(new NumericDocValuesAnonymousClass8(values)); }
private BinaryDocValues LoadBytesVarDeref(FieldInfo field) { string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "dat"); string indexName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "idx"); IndexInput data = null; IndexInput index = null; bool success = false; try { data = dir.OpenInput(dataName, state.Context); CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT); index = dir.OpenInput(indexName, state.Context); CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT); long totalBytes = index.ReadInt64(); PagedBytes bytes = new PagedBytes(16); bytes.Copy(data, totalBytes); PagedBytes.Reader bytesReader = bytes.Freeze(true); PackedInt32s.Reader reader = PackedInt32s.GetReader(index); CodecUtil.CheckEOF(data); CodecUtil.CheckEOF(index); ramBytesUsed.AddAndGet(bytes.RamBytesUsed() + reader.RamBytesUsed()); success = true; return(new BinaryDocValuesAnonymousClass4(bytesReader, reader)); } finally { if (success) { IOUtils.Dispose(data, index); } else { IOUtils.DisposeWhileHandlingException(data, index); } } }
public override FieldsProducer FieldsProducer(SegmentReadState state) { string seedFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, SEED_EXT); IndexInput @in = state.Directory.OpenInput(seedFileName, state.Context); long seed = @in.ReadInt64(); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: reading from seg=" + state.SegmentInfo.Name + " formatID=" + state.SegmentSuffix + " seed=" + seed); } @in.Dispose(); Random random = new J2N.Randomizer(seed); int readBufferSize = TestUtil.NextInt32(random, 1, 4096); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: readBufferSize=" + readBufferSize); } PostingsReaderBase postingsReader; if (random.NextBoolean()) { if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: reading Sep postings"); } postingsReader = new SepPostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo, state.Context, new MockInt32StreamFactory(random), state.SegmentSuffix); } else { if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: reading Standard postings"); } postingsReader = new Lucene41PostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo, state.Context, state.SegmentSuffix); } if (random.NextBoolean()) { int totTFCutoff = TestUtil.NextInt32(random, 1, 20); if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: reading pulsing postings with totTFCutoff=" + totTFCutoff); } postingsReader = new PulsingPostingsReader(state, postingsReader); } FieldsProducer fields; int t1 = random.Next(4); if (t1 == 0) { bool success = false; try { fields = new FSTTermsReader(state, postingsReader); success = true; } finally { if (!success) { postingsReader.Dispose(); } } } else if (t1 == 1) { bool success = false; try { fields = new FSTOrdTermsReader(state, postingsReader); success = true; } finally { if (!success) { postingsReader.Dispose(); } } } else if (t1 == 2) { // Use BlockTree terms dict if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: reading BlockTree terms dict"); } bool success = false; try { fields = new BlockTreeTermsReader(state.Directory, state.FieldInfos, state.SegmentInfo, postingsReader, state.Context, state.SegmentSuffix, state.TermsIndexDivisor); success = true; } finally { if (!success) { postingsReader.Dispose(); } } } else { if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: reading Block terms dict"); } TermsIndexReaderBase indexReader; bool success = false; try { bool doFixedGap = random.NextBoolean(); // randomness diverges from writer, here: if (state.TermsIndexDivisor != -1) { state.TermsIndexDivisor = TestUtil.NextInt32(random, 1, 10); } if (doFixedGap) { // if termsIndexDivisor is set to -1, we should not touch it. It means a // test explicitly instructed not to load the terms index. if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: fixed-gap terms index (divisor=" + state.TermsIndexDivisor + ")"); } indexReader = new FixedGapTermsIndexReader(state.Directory, state.FieldInfos, state.SegmentInfo.Name, state.TermsIndexDivisor, BytesRef.UTF8SortedAsUnicodeComparer, state.SegmentSuffix, state.Context); } else { int n2 = random.Next(3); if (n2 == 1) { random.Next(); } else if (n2 == 2) { random.NextInt64(); } if (LuceneTestCase.Verbose) { Console.WriteLine("MockRandomCodec: variable-gap terms index (divisor=" + state.TermsIndexDivisor + ")"); } indexReader = new VariableGapTermsIndexReader(state.Directory, state.FieldInfos, state.SegmentInfo.Name, state.TermsIndexDivisor, state.SegmentSuffix, state.Context); } success = true; } finally { if (!success) { postingsReader.Dispose(); } } success = false; try { fields = new BlockTermsReader(indexReader, state.Directory, state.FieldInfos, state.SegmentInfo, postingsReader, state.Context, state.SegmentSuffix); success = true; } finally { if (!success) { try { postingsReader.Dispose(); } finally { indexReader.Dispose(); } } } } return(fields); }
private void ReadFields(IndexInput meta, FieldInfos infos) { int fieldNumber = meta.ReadVInt32(); while (fieldNumber != -1) { int fieldType = meta.ReadByte(); if (fieldType == NUMBER) { var entry = new NumericEntry { offset = meta.ReadInt64(), missingOffset = meta.ReadInt64() }; if (entry.missingOffset != -1) { entry.missingBytes = meta.ReadInt64(); } else { entry.missingBytes = 0; } entry.format = meta.ReadByte(); switch (entry.format) { case DELTA_COMPRESSED: case TABLE_COMPRESSED: case GCD_COMPRESSED: case UNCOMPRESSED: break; default: throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta); } if (entry.format != UNCOMPRESSED) { entry.packedIntsVersion = meta.ReadVInt32(); } numerics[fieldNumber] = entry; } else if (fieldType == BYTES) { var entry = new BinaryEntry { offset = meta.ReadInt64(), numBytes = meta.ReadInt64(), missingOffset = meta.ReadInt64() }; if (entry.missingOffset != -1) { entry.missingBytes = meta.ReadInt64(); } else { entry.missingBytes = 0; } entry.minLength = meta.ReadVInt32(); entry.maxLength = meta.ReadVInt32(); if (entry.minLength != entry.maxLength) { entry.packedIntsVersion = meta.ReadVInt32(); entry.blockSize = meta.ReadVInt32(); } binaries[fieldNumber] = entry; } else if (fieldType == FST) { var entry = new FSTEntry { offset = meta.ReadInt64(), numOrds = meta.ReadVInt64() }; fsts[fieldNumber] = entry; } else { throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta); } fieldNumber = meta.ReadVInt32(); } }
/// <summary> /// reads from legacy 3.x segments_N </summary> private SegmentCommitInfo ReadLegacySegmentInfo(Directory dir, int format, IndexInput input) { // check that it is a format we can understand if (format > Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS) { throw new IndexFormatTooOldException(input, format, Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1); } if (format < Lucene3xSegmentInfoFormat.FORMAT_3_1) { throw new IndexFormatTooNewException(input, format, Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1); } string version; if (format <= Lucene3xSegmentInfoFormat.FORMAT_3_1) { version = input.ReadString(); } else { version = null; } string name = input.ReadString(); int docCount = input.ReadInt32(); long delGen = input.ReadInt64(); int docStoreOffset = input.ReadInt32(); IDictionary <string, string> attributes = new Dictionary <string, string>(); // parse the docstore stuff and shove it into attributes string docStoreSegment; bool docStoreIsCompoundFile; if (docStoreOffset != -1) { docStoreSegment = input.ReadString(); docStoreIsCompoundFile = input.ReadByte() == SegmentInfo.YES; attributes[Lucene3xSegmentInfoFormat.DS_OFFSET_KEY] = Convert.ToString(docStoreOffset, CultureInfo.InvariantCulture); attributes[Lucene3xSegmentInfoFormat.DS_NAME_KEY] = docStoreSegment; attributes[Lucene3xSegmentInfoFormat.DS_COMPOUND_KEY] = Convert.ToString(docStoreIsCompoundFile, CultureInfo.InvariantCulture); } else { docStoreSegment = name; docStoreIsCompoundFile = false; } // pre-4.0 indexes write a byte if there is a single norms file byte b = input.ReadByte(); //System.out.println("version=" + version + " name=" + name + " docCount=" + docCount + " delGen=" + delGen + " dso=" + docStoreOffset + " dss=" + docStoreSegment + " dssCFs=" + docStoreIsCompoundFile + " b=" + b + " format=" + format); Debug.Assert(1 == b, "expected 1 but was: " + b + " format: " + format); int numNormGen = input.ReadInt32(); IDictionary <int, long> normGen; if (numNormGen == SegmentInfo.NO) { normGen = null; } else { normGen = new Dictionary <int, long>(); for (int j = 0; j < numNormGen; j++) { normGen[j] = input.ReadInt64(); } } bool isCompoundFile = input.ReadByte() == SegmentInfo.YES; int delCount = input.ReadInt32(); Debug.Assert(delCount <= docCount); bool hasProx = input.ReadByte() == 1; IDictionary <string, string> diagnostics = input.ReadStringStringMap(); if (format <= Lucene3xSegmentInfoFormat.FORMAT_HAS_VECTORS) { // NOTE: unused int hasVectors = input.ReadByte(); } // Replicate logic from 3.x's SegmentInfo.files(): ISet <string> files = new HashSet <string>(); if (isCompoundFile) { files.Add(IndexFileNames.SegmentFileName(name, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); } else { AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xFieldInfosReader.FIELD_INFOS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.FREQ_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.PROX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.TERMS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xNormsProducer.NORMS_EXTENSION)); } if (docStoreOffset != -1) { if (docStoreIsCompoundFile) { files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION)); } else { files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION)); } } else if (!isCompoundFile) { files.Add(IndexFileNames.SegmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); files.Add(IndexFileNames.SegmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION)); } // parse the normgen stuff and shove it into attributes if (normGen != null) { attributes[Lucene3xSegmentInfoFormat.NORMGEN_KEY] = Convert.ToString(numNormGen, CultureInfo.InvariantCulture); foreach (KeyValuePair <int, long> ent in normGen) { long gen = ent.Value; if (gen >= SegmentInfo.YES) { // Definitely a separate norm file, with generation: files.Add(IndexFileNames.FileNameFromGeneration(name, "s" + ent.Key, gen)); attributes[Lucene3xSegmentInfoFormat.NORMGEN_PREFIX + ent.Key] = Convert.ToString(gen, CultureInfo.InvariantCulture); } else if (gen == SegmentInfo.NO) { // No separate norm } else { // We should have already hit indexformat too old exception Debug.Assert(false); } } } SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, null, diagnostics, Collections.UnmodifiableMap(attributes)); info.SetFiles(files); SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, delCount, delGen, -1); return(infoPerCommit); }
/// <summary> /// Returns (but does not validate) the checksum previously written by <see cref="CheckFooter(ChecksumIndexInput)"/>. </summary> /// <returns> actual checksum value </returns> /// <exception cref="IOException"> If the footer is invalid. </exception> public static long RetrieveChecksum(IndexInput @in) { @in.Seek(@in.Length - FooterLength()); ValidateFooter(@in); return(@in.ReadInt64()); }
public override long Get(int index) { long majorBitPos = (long)index * m_bitsPerValue; long elementPos = (long)((ulong)majorBitPos >> 3); try { @in.Seek(startPointer + elementPos); int bitPos = (int)(majorBitPos & 7); // round up bits to a multiple of 8 to find total bytes needed to read int roundedBits = ((bitPos + m_bitsPerValue + 7) & ~7); // the number of extra bits read at the end to shift out int shiftRightBits = roundedBits - bitPos - m_bitsPerValue; long rawValue; switch ((int)((uint)roundedBits >> 3)) { case 1: rawValue = @in.ReadByte(); break; case 2: rawValue = @in.ReadInt16(); break; case 3: rawValue = ((long)@in.ReadInt16() << 8) | (@in.ReadByte() & 0xFFL); break; case 4: rawValue = @in.ReadInt32(); break; case 5: rawValue = ((long)@in.ReadInt32() << 8) | (@in.ReadByte() & 0xFFL); break; case 6: rawValue = ((long)@in.ReadInt32() << 16) | (@in.ReadInt16() & 0xFFFFL); break; case 7: rawValue = ((long)@in.ReadInt32() << 24) | ((@in.ReadInt16() & 0xFFFFL) << 8) | (@in.ReadByte() & 0xFFL); break; case 8: rawValue = @in.ReadInt64(); break; case 9: // We must be very careful not to shift out relevant bits. So we account for right shift // we would normally do on return here, and reset it. rawValue = (@in.ReadInt64() << (8 - shiftRightBits)) | (((uint)(@in.ReadByte() & 0xFFL) >> shiftRightBits)); shiftRightBits = 0; break; default: throw new InvalidOperationException("bitsPerValue too large: " + m_bitsPerValue); } return(((long)((ulong)rawValue >> shiftRightBits)) & valueMask); } catch (IOException ioe) { throw new InvalidOperationException("failed", ioe); } }
private void ReadFields(IndexInput meta, FieldInfos infos) { int fieldNumber = meta.ReadVInt32(); while (fieldNumber != -1) { // check should be: infos.fieldInfo(fieldNumber) != null, which incorporates negative check // but docvalues updates are currently buggy here (loading extra stuff, etc): LUCENE-5616 if (fieldNumber < 0) { // trickier to validate more: because we re-use for norms, because we use multiple entries // for "composite" types like sortedset, etc. throw new CorruptIndexException("Invalid field number: " + fieldNumber + ", input=" + meta); } int fieldType = meta.ReadByte(); if (fieldType == NUMBER) { var entry = new NumericEntry(); entry.Offset = meta.ReadInt64(); entry.Format = (sbyte)meta.ReadByte(); switch (entry.Format) { case DELTA_COMPRESSED: case TABLE_COMPRESSED: case GCD_COMPRESSED: case UNCOMPRESSED: break; default: throw new CorruptIndexException("Unknown format: " + entry.Format + ", input=" + meta); } if (entry.Format != UNCOMPRESSED) { entry.PackedInt32sVersion = meta.ReadVInt32(); } numerics[fieldNumber] = entry; } else if (fieldType == BYTES) { BinaryEntry entry = new BinaryEntry(); entry.Offset = meta.ReadInt64(); entry.NumBytes = meta.ReadInt64(); entry.MinLength = meta.ReadVInt32(); entry.MaxLength = meta.ReadVInt32(); if (entry.MinLength != entry.MaxLength) { entry.PackedInt32sVersion = meta.ReadVInt32(); entry.BlockSize = meta.ReadVInt32(); } binaries[fieldNumber] = entry; } else if (fieldType == FST) { FSTEntry entry = new FSTEntry(); entry.Offset = meta.ReadInt64(); entry.NumOrds = meta.ReadVInt64(); fsts[fieldNumber] = entry; } else { throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta); } fieldNumber = meta.ReadVInt32(); } }