public override void VisitDocument(int n, StoredFieldVisitor visitor) { SeekIndex(n); FieldsStream.Seek(IndexStream.ReadLong()); int numFields = FieldsStream.ReadVInt(); for (int fieldIDX = 0; fieldIDX < numFields; fieldIDX++) { int fieldNumber = FieldsStream.ReadVInt(); FieldInfo fieldInfo = FieldInfos.FieldInfo(fieldNumber); int bits = FieldsStream.ReadByte() & 0xFF; Debug.Assert(bits <= (Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_MASK | Lucene40StoredFieldsWriter.FIELD_IS_BINARY), "bits=" + bits.ToString("x")); switch (visitor.NeedsField(fieldInfo)) { case StoredFieldVisitor.Status.YES: ReadField(visitor, fieldInfo, bits); break; case StoredFieldVisitor.Status.NO: SkipField(bits); break; case StoredFieldVisitor.Status.STOP: return; } } }
private NumericDocValues LoadVarIntsField(FieldInfo field, IndexInput input) { CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.VAR_INTS_CODEC_NAME, Lucene40DocValuesFormat.VAR_INTS_VERSION_START, Lucene40DocValuesFormat.VAR_INTS_VERSION_CURRENT); byte header = input.ReadByte(); if (header == Lucene40DocValuesFormat.VAR_INTS_FIXED_64) { int maxDoc = State.SegmentInfo.DocCount; long[] values = new long[maxDoc]; for (int i = 0; i < values.Length; i++) { values[i] = input.ReadLong(); } RamBytesUsed_Renamed.AddAndGet(RamUsageEstimator.SizeOf(values)); return(new NumericDocValuesAnonymousInnerClassHelper(this, values)); } else if (header == Lucene40DocValuesFormat.VAR_INTS_PACKED) { long minValue = input.ReadLong(); long defaultValue = input.ReadLong(); PackedInts.Reader reader = PackedInts.GetReader(input); RamBytesUsed_Renamed.AddAndGet(reader.RamBytesUsed()); return(new NumericDocValuesAnonymousInnerClassHelper2(this, minValue, defaultValue, reader)); } else { throw new CorruptIndexException("invalid VAR_INTS header byte: " + header + " (resource=" + input + ")"); } }
internal static BinaryEntry ReadBinaryEntry(IndexInput meta) { BinaryEntry entry = new BinaryEntry(); entry.Format = meta.ReadVInt(); entry.MissingOffset = meta.ReadLong(); entry.MinLength = meta.ReadVInt(); entry.MaxLength = meta.ReadVInt(); entry.Count = meta.ReadVLong(); entry.Offset = meta.ReadLong(); switch (entry.Format) { case Lucene45DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED: break; case Lucene45DocValuesConsumer.BINARY_PREFIX_COMPRESSED: entry.AddressInterval = meta.ReadVInt(); entry.AddressesOffset = meta.ReadLong(); entry.PackedIntsVersion = meta.ReadVInt(); entry.BlockSize = meta.ReadVInt(); break; case Lucene45DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED: entry.AddressesOffset = meta.ReadLong(); entry.PackedIntsVersion = meta.ReadVInt(); entry.BlockSize = meta.ReadVInt(); break; default: throw new Exception("Unknown format: " + entry.Format + ", input=" + meta); } return(entry); }
/// <summary> /// Read the segments.gen file to get the generation number. /// </summary> private void ReadDirectory() { IndexInput genInput = null; try { genInput = directory.OpenInput(IndexFileNames.SEGMENTS_GEN); if (genInput != null) { int version = genInput.ReadInt(); if (version == Lucene.Net.Index.SegmentInfos.FORMAT_LOCKLESS) { long gen0 = genInput.ReadLong(); long gen1 = genInput.ReadLong(); if (gen0 == gen1) { // The file is consistent, use the generation this.generation = gen0; } } } } finally { genInput.Close(); } }
public /*internal*/ Document.Document Doc(int n, FieldSelector fieldSelector) { SeekIndex(n); long position = indexStream.ReadLong(); fieldsStream.Seek(position); var doc = new Document.Document(); int numFields = fieldsStream.ReadVInt(); for (int i = 0; i < numFields; i++) { int fieldNumber = fieldsStream.ReadVInt(); FieldInfo fi = fieldInfos.FieldInfo(fieldNumber); FieldSelectorResult acceptField = fieldSelector == null?FieldSelectorResult.LOAD:fieldSelector.Accept(fi.name); byte bits = fieldsStream.ReadByte(); System.Diagnostics.Debug.Assert(bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY); bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; System.Diagnostics.Debug.Assert( (!compressed || (format < FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS)), "compressed fields are only allowed in indexes of version <= 2.9"); bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; bool binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0; //TODO: Find an alternative approach here if this list continues to grow beyond the //list of 5 or 6 currently here. See Lucene 762 for discussion if (acceptField.Equals(FieldSelectorResult.LOAD)) { AddField(doc, fi, binary, compressed, tokenize); } else if (acceptField.Equals(FieldSelectorResult.LOAD_AND_BREAK)) { AddField(doc, fi, binary, compressed, tokenize); break; //Get out of this loop } else if (acceptField.Equals(FieldSelectorResult.LAZY_LOAD)) { AddFieldLazy(doc, fi, binary, compressed, tokenize); } else if (acceptField.Equals(FieldSelectorResult.SIZE)) { SkipField(binary, compressed, AddFieldSize(doc, fi, binary, compressed)); } else if (acceptField.Equals(FieldSelectorResult.SIZE_AND_BREAK)) { AddFieldSize(doc, fi, binary, compressed); break; } else { SkipField(binary, compressed); } } return(doc); }
/// <summary>Retrieve the length (in bytes) of the tvd and tvf /// entries for the next numDocs starting with /// startDocID. This is used for bulk copying when /// merging segments, if the field numbers are /// congruent. Once this returns, the tvf & tvd streams /// are seeked to the startDocID. /// </summary> internal void RawDocs(int[] tvdLengths, int[] tvfLengths, int startDocID, int numDocs) { if (tvx == null) { for (int i = 0; i < tvdLengths.Length; i++) { tvdLengths[i] = 0; } for (int i = 0; i < tvfLengths.Length; i++) { tvfLengths[i] = 0; } return; } // SegmentMerger calls canReadRawDocs() first and should // not call us if that returns false. if (format < FORMAT_VERSION2) { throw new System.SystemException("cannot read raw docs with older term vector formats"); } SeekTvx(startDocID); long tvdPosition = tvx.ReadLong(); tvd.Seek(tvdPosition); long tvfPosition = tvx.ReadLong(); tvf.Seek(tvfPosition); long lastTvdPosition = tvdPosition; long lastTvfPosition = tvfPosition; int count = 0; while (count < numDocs) { int docID = docStoreOffset + startDocID + count + 1; System.Diagnostics.Debug.Assert(docID <= numTotalDocs); if (docID < numTotalDocs) { tvdPosition = tvx.ReadLong(); tvfPosition = tvx.ReadLong(); } else { tvdPosition = tvd.Length(); tvfPosition = tvf.Length(); System.Diagnostics.Debug.Assert(count == numDocs - 1); } tvdLengths[count] = (int)(tvdPosition - lastTvdPosition); tvfLengths[count] = (int)(tvfPosition - lastTvfPosition); count++; lastTvdPosition = tvdPosition; lastTvfPosition = tvfPosition; } }
private void SeekDir(IndexInput input, long dirOffset) { if (_version >= BlockTermsWriter.VERSION_CHECKSUM) { input.Seek(input.Length() - CodecUtil.FooterLength() - 8); dirOffset = input.ReadLong(); } else if (_version >= BlockTermsWriter.VERSION_APPEND_ONLY) { input.Seek(input.Length() - 8); dirOffset = input.ReadLong(); } input.Seek(dirOffset); }
protected override void SeekDir(IndexInput input, long dirOffset) { input.Seek(input.Length() - sizeof(long) / 8); long offset = input.ReadLong(); input.Seek(offset); }
public static void ReadLegacyInfos(SegmentInfos infos, Directory directory, IndexInput input, int format) { infos.Version = input.ReadLong(); // read version infos.Counter = input.ReadInt(); // read counter Lucene3xSegmentInfoReader reader = new Lucene3xSegmentInfoReader(); for (int i = input.ReadInt(); i > 0; i--) // read segmentInfos { SegmentCommitInfo siPerCommit = reader.ReadLegacySegmentInfo(directory, format, input); SegmentInfo si = siPerCommit.Info; if (si.Version == null) { // Could be a 3.0 - try to open the doc stores - if it fails, it's a // 2.x segment, and an IndexFormatTooOldException will be thrown, // which is what we want. Directory dir = directory; if (Lucene3xSegmentInfoFormat.GetDocStoreOffset(si) != -1) { if (Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si)) { dir = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(Lucene3xSegmentInfoFormat.GetDocStoreSegment(si), "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), IOContext.READONCE, false); } } else if (si.UseCompoundFile) { dir = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), IOContext.READONCE, false); } try { Lucene3xStoredFieldsReader.CheckCodeVersion(dir, Lucene3xSegmentInfoFormat.GetDocStoreSegment(si)); } finally { // If we opened the directory, close it if (dir != directory) { dir.Dispose(); } } // Above call succeeded, so it's a 3.0 segment. Upgrade it so the next // time the segment is read, its version won't be null and we won't // need to open FieldsReader every time for each such segment. si.Version = "3.0"; } else if (si.Version.Equals("2.x")) { // If it's a 3x index touched by 3.1+ code, then segments record their // version, whether they are 2.x ones or not. We detect that and throw // appropriate exception. throw new IndexFormatTooOldException("segment " + si.Name + " in resource " + input, si.Version); } infos.Add(siPerCommit); } infos.UserData = input.ReadStringStringMap(); }
/// <summary> /// Retrieve the length (in bytes) of the tvd and tvf /// entries for the next numDocs starting with /// startDocID. this is used for bulk copying when /// merging segments, if the field numbers are /// congruent. Once this returns, the tvf & tvd streams /// are seeked to the startDocID. /// </summary> internal void RawDocs(int[] tvdLengths, int[] tvfLengths, int startDocID, int numDocs) { if (Tvx == null) { CollectionsHelper.Fill(tvdLengths, 0); CollectionsHelper.Fill(tvfLengths, 0); return; } SeekTvx(startDocID); long tvdPosition = Tvx.ReadLong(); Tvd.Seek(tvdPosition); long tvfPosition = Tvx.ReadLong(); Tvf.Seek(tvfPosition); long lastTvdPosition = tvdPosition; long lastTvfPosition = tvfPosition; int count = 0; while (count < numDocs) { int docID = startDocID + count + 1; Debug.Assert(docID <= NumTotalDocs); if (docID < NumTotalDocs) { tvdPosition = Tvx.ReadLong(); tvfPosition = Tvx.ReadLong(); } else { tvdPosition = Tvd.Length(); tvfPosition = Tvf.Length(); Debug.Assert(count == numDocs - 1); } tvdLengths[count] = (int)(tvdPosition - lastTvdPosition); tvfLengths[count] = (int)(tvfPosition - lastTvfPosition); count++; lastTvdPosition = tvdPosition; lastTvfPosition = tvfPosition; } }
private static NumericEntry ReadNumericEntry(IndexInput meta) { var entry = new NumericEntry { offset = meta.ReadLong(), count = meta.ReadInt(), missingOffset = meta.ReadLong() }; if (entry.missingOffset != -1) { entry.missingBytes = meta.ReadLong(); } else { entry.missingBytes = 0; } entry.byteWidth = meta.ReadByte(); return(entry); }
private BinaryEntry ReadBinaryEntry(IndexInput meta) { var entry = new BinaryEntry(); entry.offset = meta.ReadLong(); entry.numBytes = meta.ReadInt(); entry.count = meta.ReadInt(); entry.missingOffset = meta.ReadLong(); if (entry.missingOffset != -1) { entry.missingBytes = meta.ReadLong(); } else { entry.missingBytes = 0; } return(entry); }
private void ReadField(StoredFieldVisitor visitor, FieldInfo info, int bits) { int numeric = bits & FIELD_IS_NUMERIC_MASK; if (numeric != 0) { switch (numeric) { case FIELD_IS_NUMERIC_INT: visitor.IntField(info, FieldsStream.ReadInt()); return; case FIELD_IS_NUMERIC_LONG: visitor.LongField(info, FieldsStream.ReadLong()); return; case FIELD_IS_NUMERIC_FLOAT: visitor.FloatField(info, Number.IntBitsToFloat(FieldsStream.ReadInt())); return; case FIELD_IS_NUMERIC_DOUBLE: visitor.DoubleField(info, BitConverter.Int64BitsToDouble(FieldsStream.ReadLong())); return; default: throw new CorruptIndexException("Invalid numeric type: " + numeric.ToString("x")); } } else { int length = FieldsStream.ReadVInt(); var bytes = new byte[length]; FieldsStream.ReadBytes(bytes, 0, length); if ((bits & FIELD_IS_BINARY) != 0) { visitor.BinaryField(info, bytes); } else { visitor.StringField(info, IOUtils.CHARSET_UTF_8.GetString(bytes)); } } }
internal static NumericEntry ReadNumericEntry(IndexInput meta) { NumericEntry entry = new NumericEntry(); entry.Format = meta.ReadVInt(); entry.MissingOffset = meta.ReadLong(); entry.PackedIntsVersion = meta.ReadVInt(); entry.Offset = meta.ReadLong(); entry.Count = meta.ReadVLong(); entry.BlockSize = meta.ReadVInt(); switch (entry.Format) { case Lucene45DocValuesConsumer.GCD_COMPRESSED: entry.MinValue = meta.ReadLong(); entry.Gcd = meta.ReadLong(); break; case Lucene45DocValuesConsumer.TABLE_COMPRESSED: if (entry.Count > int.MaxValue) { throw new Exception("Cannot use TABLE_COMPRESSED with more than MAX_VALUE values, input=" + meta); } int uniqueValues = meta.ReadVInt(); if (uniqueValues > 256) { throw new Exception("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + meta); } entry.Table = new long[uniqueValues]; for (int i = 0; i < uniqueValues; ++i) { entry.Table[i] = meta.ReadLong(); } break; case Lucene45DocValuesConsumer.DELTA_COMPRESSED: break; default: throw new Exception("Unknown format: " + entry.Format + ", input=" + meta); } return(entry); }
private int ReadHeader(IndexInput input) { int version = CodecUtil.CheckHeader(input, VariableGapTermsIndexWriter.CODEC_NAME, VariableGapTermsIndexWriter.VERSION_START, VariableGapTermsIndexWriter.VERSION_CURRENT); if (version < VariableGapTermsIndexWriter.VERSION_APPEND_ONLY) { _dirOffset = input.ReadLong(); } return(version); }
public CompoundFileReader(Directory dir, System.String name, int readBufferSize) { directory = dir; fileName = name; this.readBufferSize = readBufferSize; bool success = false; try { stream = dir.OpenInput(name, readBufferSize); // read the directory and init files int count = stream.ReadVInt(); FileEntry entry = null; for (int i = 0; i < count; i++) { long offset = stream.ReadLong(); System.String id = stream.ReadString(); if (entry != null) { // set length of the previous entry entry.length = offset - entry.offset; } entry = new FileEntry { offset = offset }; entries[id] = entry; } // set the length of the final entry if (entry != null) { entry.length = stream.Length() - entry.offset; } success = true; } finally { if (!success && (stream != null)) { try { stream.Close(); } catch (System.IO.IOException) { } } } }
private void SeekDir(IndexInput @in) { if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) { @in.Seek(@in.Length() - CodecUtil.FooterLength() - 8); } else { @in.Seek(@in.Length() - 8); } @in.Seek(@in.ReadLong()); }
public SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi) { Input = i; FieldInfos = fis; IsIndex = isi; MaxSkipLevels = 1; // use single-level skip lists for formats > -3 int firstInt = Input.ReadInt(); if (firstInt >= 0) { // original-format file, without explicit format version number Format = 0; Size = firstInt; // back-compatible settings IndexInterval = 128; SkipInterval = int.MaxValue; // switch off skipTo optimization } else { // we have a format version number Format = firstInt; // check that it is a format we can understand if (Format > FORMAT_MINIMUM) { throw new IndexFormatTooOldException(Input, Format, FORMAT_MINIMUM, FORMAT_CURRENT); } if (Format < FORMAT_CURRENT) { throw new IndexFormatTooNewException(Input, Format, FORMAT_MINIMUM, FORMAT_CURRENT); } Size = Input.ReadLong(); // read the size IndexInterval = Input.ReadInt(); SkipInterval = Input.ReadInt(); MaxSkipLevels = Input.ReadInt(); Debug.Assert(IndexInterval > 0, "indexInterval=" + IndexInterval + " is negative; must be > 0"); Debug.Assert(SkipInterval > 0, "skipInterval=" + SkipInterval + " is negative; must be > 0"); } }
private NumericDocValues LoadDoubleField(FieldInfo field, IndexInput input) { CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.FLOATS_CODEC_NAME, Lucene40DocValuesFormat.FLOATS_VERSION_START, Lucene40DocValuesFormat.FLOATS_VERSION_CURRENT); int valueSize = input.ReadInt(); if (valueSize != 8) { throw new CorruptIndexException("invalid valueSize: " + valueSize); } int maxDoc = State.SegmentInfo.DocCount; long[] values = new long[maxDoc]; for (int i = 0; i < values.Length; i++) { values[i] = input.ReadLong(); } RamBytesUsed_Renamed.AddAndGet(RamUsageEstimator.SizeOf(values)); return(new NumericDocValuesAnonymousInnerClassHelper8(this, values)); }
private SortedDocValues LoadBytesVarSorted(FieldInfo field, IndexInput data, IndexInput index) { CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); long maxAddress = index.ReadLong(); PagedBytes bytes = new PagedBytes(16); bytes.Copy(data, maxAddress); PagedBytes.Reader bytesReader = bytes.Freeze(true); PackedInts.Reader addressReader = PackedInts.GetReader(index); PackedInts.Reader ordsReader = PackedInts.GetReader(index); int valueCount = addressReader.Size() - 1; RamBytesUsed_Renamed.AddAndGet(bytes.RamBytesUsed() + addressReader.RamBytesUsed() + ordsReader.RamBytesUsed()); return(CorrectBuggyOrds(new SortedDocValuesAnonymousInnerClassHelper2(this, bytesReader, addressReader, ordsReader, valueCount))); }
private BinaryDocValues LoadBytesVarDeref(FieldInfo field) { string dataName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "dat"); string indexName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "idx"); IndexInput data = null; IndexInput index = null; bool success = false; try { data = Dir.OpenInput(dataName, State.Context); CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT); index = Dir.OpenInput(indexName, State.Context); CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT); long totalBytes = index.ReadLong(); PagedBytes bytes = new PagedBytes(16); bytes.Copy(data, totalBytes); PagedBytes.Reader bytesReader = bytes.Freeze(true); PackedInts.Reader reader = PackedInts.GetReader(index); CodecUtil.CheckEOF(data); CodecUtil.CheckEOF(index); RamBytesUsed_Renamed.AddAndGet(bytes.RamBytesUsed() + reader.RamBytesUsed()); success = true; return(new BinaryDocValuesAnonymousInnerClassHelper4(this, bytesReader, reader)); } finally { if (success) { IOUtils.Close(data, index); } else { IOUtils.CloseWhileHandlingException(data, index); } } }
private SortedDocValues LoadBytesVarSorted(FieldInfo field, IndexInput data, IndexInput index) { CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); long maxAddress = index.ReadLong(); PagedBytes bytes = new PagedBytes(16); bytes.Copy(data, maxAddress); PagedBytes.Reader bytesReader = bytes.Freeze(true); PackedInts.Reader addressReader = PackedInts.GetReader(index); PackedInts.Reader ordsReader = PackedInts.GetReader(index); int valueCount = addressReader.Size() - 1; RamBytesUsed_Renamed.AddAndGet(bytes.RamBytesUsed() + addressReader.RamBytesUsed() + ordsReader.RamBytesUsed()); return CorrectBuggyOrds(new SortedDocValuesAnonymousInnerClassHelper2(this, bytesReader, addressReader, ordsReader, valueCount)); }
/// <summary> /// Seek {@code input} to the directory offset. </summary> protected internal virtual void SeekDir(IndexInput input, long dirOffset) { if (Version >= BlockTreeTermsWriter.VERSION_CHECKSUM) { input.Seek(input.Length() - CodecUtil.FooterLength() - 8); dirOffset = input.ReadLong(); } else if (Version >= BlockTreeTermsWriter.VERSION_APPEND_ONLY) { input.Seek(input.Length() - 8); dirOffset = input.ReadLong(); } input.Seek(dirOffset); }
private int ReadHeader(IndexInput input) { int version = CodecUtil.CheckHeader(input, VariableGapTermsIndexWriter.CODEC_NAME, VariableGapTermsIndexWriter.VERSION_START, VariableGapTermsIndexWriter.VERSION_CURRENT); if (version < VariableGapTermsIndexWriter.VERSION_APPEND_ONLY) { _dirOffset = input.ReadLong(); } return version; }
protected override void SeekDir(IndexInput input, long dirOffset) { input.Seek(input.Length() - sizeof(long)/8); long offset = input.ReadLong(); input.Seek(offset); }
/// <summary> /// reads from legacy 3.x segments_N </summary> private SegmentCommitInfo ReadLegacySegmentInfo(Directory dir, int format, IndexInput input) { // check that it is a format we can understand if (format > Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS) { throw new IndexFormatTooOldException(input, format, Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1); } if (format < Lucene3xSegmentInfoFormat.FORMAT_3_1) { throw new IndexFormatTooNewException(input, format, Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1); } string version; if (format <= Lucene3xSegmentInfoFormat.FORMAT_3_1) { version = input.ReadString(); } else { version = null; } string name = input.ReadString(); int docCount = input.ReadInt(); long delGen = input.ReadLong(); int docStoreOffset = input.ReadInt(); IDictionary<string, string> attributes = new Dictionary<string, string>(); // parse the docstore stuff and shove it into attributes string docStoreSegment; bool docStoreIsCompoundFile; if (docStoreOffset != -1) { docStoreSegment = input.ReadString(); docStoreIsCompoundFile = input.ReadByte() == SegmentInfo.YES; attributes[Lucene3xSegmentInfoFormat.DS_OFFSET_KEY] = Convert.ToString(docStoreOffset); attributes[Lucene3xSegmentInfoFormat.DS_NAME_KEY] = docStoreSegment; attributes[Lucene3xSegmentInfoFormat.DS_COMPOUND_KEY] = Convert.ToString(docStoreIsCompoundFile); } else { docStoreSegment = name; docStoreIsCompoundFile = false; } // pre-4.0 indexes write a byte if there is a single norms file byte b = input.ReadByte(); //System.out.println("version=" + version + " name=" + name + " docCount=" + docCount + " delGen=" + delGen + " dso=" + docStoreOffset + " dss=" + docStoreSegment + " dssCFs=" + docStoreIsCompoundFile + " b=" + b + " format=" + format); Debug.Assert(1 == b, "expected 1 but was: " + b + " format: " + format); int numNormGen = input.ReadInt(); IDictionary<int, long> normGen; if (numNormGen == SegmentInfo.NO) { normGen = null; } else { normGen = new Dictionary<int, long>(); for (int j = 0; j < numNormGen; j++) { normGen[j] = input.ReadLong(); } } bool isCompoundFile = input.ReadByte() == SegmentInfo.YES; int delCount = input.ReadInt(); Debug.Assert(delCount <= docCount); bool hasProx = input.ReadByte() == 1; IDictionary<string, string> diagnostics = input.ReadStringStringMap(); if (format <= Lucene3xSegmentInfoFormat.FORMAT_HAS_VECTORS) { // NOTE: unused int hasVectors = input.ReadByte(); } // Replicate logic from 3.x's SegmentInfo.files(): ISet<string> files = new HashSet<string>(); if (isCompoundFile) { files.Add(IndexFileNames.SegmentFileName(name, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); } else { AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xFieldInfosReader.FIELD_INFOS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.FREQ_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.PROX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.TERMS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xNormsProducer.NORMS_EXTENSION)); } if (docStoreOffset != -1) { if (docStoreIsCompoundFile) { files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION)); } else { files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION)); } } else if (!isCompoundFile) { files.Add(IndexFileNames.SegmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); files.Add(IndexFileNames.SegmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION)); } // parse the normgen stuff and shove it into attributes if (normGen != null) { attributes[Lucene3xSegmentInfoFormat.NORMGEN_KEY] = Convert.ToString(numNormGen); foreach (KeyValuePair<int, long> ent in normGen) { long gen = ent.Value; if (gen >= SegmentInfo.YES) { // Definitely a separate norm file, with generation: files.Add(IndexFileNames.FileNameFromGeneration(name, "s" + ent.Key, gen)); attributes[Lucene3xSegmentInfoFormat.NORMGEN_PREFIX + ent.Key] = Convert.ToString(gen); } else if (gen == SegmentInfo.NO) { // No separate norm } else { // We should have already hit indexformat too old exception Debug.Assert(false); } } } SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, null, diagnostics, CollectionsHelper.UnmodifiableMap(attributes)); info.Files = files; SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, delCount, delGen, -1); return infoPerCommit; }
/// <summary> Construct a new SegmentInfo instance by reading a /// previously saved SegmentInfo from input. /// /// </summary> /// <param name="dir">directory to load from /// </param> /// <param name="format">format of the segments info file /// </param> /// <param name="input">input handle to read segment info from /// </param> internal SegmentInfo(Directory dir, int format, IndexInput input) { this.dir = dir; name = input.ReadString(); docCount = input.ReadInt(); if (format <= SegmentInfos.FORMAT_LOCKLESS) { delGen = input.ReadLong(); if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE) { docStoreOffset = input.ReadInt(); if (docStoreOffset != -1) { docStoreSegment = input.ReadString(); docStoreIsCompoundFile = (1 == input.ReadByte()); } else { docStoreSegment = name; docStoreIsCompoundFile = false; } } else { docStoreOffset = -1; docStoreSegment = name; docStoreIsCompoundFile = false; } if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) { hasSingleNormFile = (1 == input.ReadByte()); } else { hasSingleNormFile = false; } int numNormGen = input.ReadInt(); if (numNormGen == NO) { normGen = null; } else { normGen = new long[numNormGen]; for (int j = 0; j < numNormGen; j++) { normGen[j] = input.ReadLong(); } } isCompoundFile = (sbyte)input.ReadByte(); preLockless = (isCompoundFile == CHECK_DIR); if (format <= SegmentInfos.FORMAT_DEL_COUNT) { delCount = input.ReadInt(); System.Diagnostics.Debug.Assert(delCount <= docCount); } else { delCount = -1; } if (format <= SegmentInfos.FORMAT_HAS_PROX) { hasProx = input.ReadByte() == 1; } else { hasProx = true; } if (format <= SegmentInfos.FORMAT_DIAGNOSTICS) { diagnostics = input.ReadStringStringMap(); } else { diagnostics = new Dictionary <string, string>(); } } else { delGen = CHECK_DIR; normGen = null; isCompoundFile = (sbyte)(CHECK_DIR); preLockless = true; hasSingleNormFile = false; docStoreOffset = -1; docStoreIsCompoundFile = false; docStoreSegment = null; delCount = -1; hasProx = true; diagnostics = new Dictionary <string, string>(); } }
private void ReadFields(IndexInput meta, FieldInfos infos) { int fieldNumber = meta.ReadVInt(); while (fieldNumber != -1) { // check should be: infos.fieldInfo(fieldNumber) != null, which incorporates negative check // but docvalues updates are currently buggy here (loading extra stuff, etc): LUCENE-5616 if (fieldNumber < 0) { // trickier to validate more: because we re-use for norms, because we use multiple entries // for "composite" types like sortedset, etc. throw new CorruptIndexException("Invalid field number: " + fieldNumber + ", input=" + meta); } int fieldType = meta.ReadByte(); if (fieldType == NUMBER) { var entry = new NumericEntry { Offset = meta.ReadLong(), Format = (sbyte)meta.ReadByte() }; switch (entry.Format) { case DELTA_COMPRESSED: case TABLE_COMPRESSED: case GCD_COMPRESSED: case UNCOMPRESSED: break; default: throw new CorruptIndexException("Unknown format: " + entry.Format + ", input=" + meta); } if (entry.Format != UNCOMPRESSED) { entry.PackedIntsVersion = meta.ReadVInt(); } Numerics[fieldNumber] = entry; } else if (fieldType == BYTES) { BinaryEntry entry = new BinaryEntry(); entry.Offset = meta.ReadLong(); entry.NumBytes = meta.ReadLong(); entry.MinLength = meta.ReadVInt(); entry.MaxLength = meta.ReadVInt(); if (entry.MinLength != entry.MaxLength) { entry.PackedIntsVersion = meta.ReadVInt(); entry.BlockSize = meta.ReadVInt(); } Binaries[fieldNumber] = entry; } else if (fieldType == FST) { FSTEntry entry = new FSTEntry(); entry.Offset = meta.ReadLong(); entry.NumOrds = meta.ReadVLong(); Fsts[fieldNumber] = entry; } else { throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta); } fieldNumber = meta.ReadVInt(); } }
/// <summary> /// Returns (but does not validate) the checksum previously written by <seealso cref="#checkFooter"/>. </summary> /// <returns> actual checksum value </returns> /// <exception cref="IOException"> if the footer is invalid </exception> public static long RetrieveChecksum(IndexInput @in) { @in.Seek(@in.Length() - FooterLength()); ValidateFooter(@in); return(@in.ReadLong()); }
internal SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi) { input = i; fieldInfos = fis; isIndex = isi; maxSkipLevels = 1; // use single-level skip lists for formats > -3 int firstInt = input.ReadInt(); if (firstInt >= 0) { // original-format file, without explicit format version number format = 0; size = firstInt; // back-compatible settings indexInterval = 128; skipInterval = System.Int32.MaxValue; // switch off skipTo optimization } else { // we have a format version number format = firstInt; // check that it is a format we can understand if (format < TermInfosWriter.FORMAT_CURRENT) { throw new CorruptIndexException("Unknown format version:" + format + " expected " + TermInfosWriter.FORMAT_CURRENT + " or higher"); } size = input.ReadLong(); // read the size if (format == -1) { if (!isIndex) { indexInterval = input.ReadInt(); formatM1SkipInterval = input.ReadInt(); } // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in // skipTo implementation of these versions skipInterval = System.Int32.MaxValue; } else { indexInterval = input.ReadInt(); skipInterval = input.ReadInt(); if (format <= TermInfosWriter.FORMAT) { // this new format introduces multi-level skipping maxSkipLevels = input.ReadInt(); } } System.Diagnostics.Debug.Assert(indexInterval > 0, "indexInterval=" + indexInterval + " is negative; must be > 0"); System.Diagnostics.Debug.Assert(skipInterval > 0, "skipInterval=" + skipInterval + " is negative; must be > 0"); } if (format > TermInfosWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { termBuffer.SetPreUTF8Strings(); scanBuffer.SetPreUTF8Strings(); prevBuffer.SetPreUTF8Strings(); } }
/// <summary> /// Reads index file header. </summary> protected internal virtual int ReadIndexHeader(IndexInput input) { int version = CodecUtil.CheckHeader(input, BlockTreeTermsWriter.TERMS_INDEX_CODEC_NAME, BlockTreeTermsWriter.VERSION_START, BlockTreeTermsWriter.VERSION_CURRENT); if (version < BlockTreeTermsWriter.VERSION_APPEND_ONLY) { IndexDirOffset = input.ReadLong(); } return version; }
private NumericDocValues LoadDoubleField(FieldInfo field, IndexInput input) { CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.FLOATS_CODEC_NAME, Lucene40DocValuesFormat.FLOATS_VERSION_START, Lucene40DocValuesFormat.FLOATS_VERSION_CURRENT); int valueSize = input.ReadInt(); if (valueSize != 8) { throw new CorruptIndexException("invalid valueSize: " + valueSize); } int maxDoc = State.SegmentInfo.DocCount; long[] values = new long[maxDoc]; for (int i = 0; i < values.Length; i++) { values[i] = input.ReadLong(); } RamBytesUsed_Renamed.AddAndGet(RamUsageEstimator.SizeOf(values)); return new NumericDocValuesAnonymousInnerClassHelper8(this, values); }
private void ReadFields(IndexInput meta, FieldInfos infos) { int fieldNumber = meta.ReadVInt(); while (fieldNumber != -1) { int fieldType = meta.ReadByte(); if (fieldType == NUMBER) { var entry = new NumericEntry { offset = meta.ReadLong(), missingOffset = meta.ReadLong() }; if (entry.missingOffset != -1) { entry.missingBytes = meta.ReadLong(); } else { entry.missingBytes = 0; } entry.format = meta.ReadByte(); switch (entry.format) { case DELTA_COMPRESSED: case TABLE_COMPRESSED: case GCD_COMPRESSED: case UNCOMPRESSED: break; default: throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta); } if (entry.format != UNCOMPRESSED) { entry.packedIntsVersion = meta.ReadVInt(); } numerics[fieldNumber] = entry; } else if (fieldType == BYTES) { var entry = new BinaryEntry { offset = meta.ReadLong(), numBytes = meta.ReadLong(), missingOffset = meta.ReadLong() }; if (entry.missingOffset != -1) { entry.missingBytes = meta.ReadLong(); } else { entry.missingBytes = 0; } entry.minLength = meta.ReadVInt(); entry.maxLength = meta.ReadVInt(); if (entry.minLength != entry.maxLength) { entry.packedIntsVersion = meta.ReadVInt(); entry.blockSize = meta.ReadVInt(); } binaries[fieldNumber] = entry; } else if (fieldType == FST) { var entry = new FSTEntry { offset = meta.ReadLong(), numOrds = meta.ReadVLong() }; fsts[fieldNumber] = entry; } else { throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta); } fieldNumber = meta.ReadVInt(); } }
/// <summary> /// reads from legacy 3.x segments_N </summary> private SegmentCommitInfo ReadLegacySegmentInfo(Directory dir, int format, IndexInput input) { // check that it is a format we can understand if (format > Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS) { throw new IndexFormatTooOldException(input, format, Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1); } if (format < Lucene3xSegmentInfoFormat.FORMAT_3_1) { throw new IndexFormatTooNewException(input, format, Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1); } string version; if (format <= Lucene3xSegmentInfoFormat.FORMAT_3_1) { version = input.ReadString(); } else { version = null; } string name = input.ReadString(); int docCount = input.ReadInt(); long delGen = input.ReadLong(); int docStoreOffset = input.ReadInt(); IDictionary <string, string> attributes = new Dictionary <string, string>(); // parse the docstore stuff and shove it into attributes string docStoreSegment; bool docStoreIsCompoundFile; if (docStoreOffset != -1) { docStoreSegment = input.ReadString(); docStoreIsCompoundFile = input.ReadByte() == SegmentInfo.YES; attributes[Lucene3xSegmentInfoFormat.DS_OFFSET_KEY] = Convert.ToString(docStoreOffset); attributes[Lucene3xSegmentInfoFormat.DS_NAME_KEY] = docStoreSegment; attributes[Lucene3xSegmentInfoFormat.DS_COMPOUND_KEY] = Convert.ToString(docStoreIsCompoundFile); } else { docStoreSegment = name; docStoreIsCompoundFile = false; } // pre-4.0 indexes write a byte if there is a single norms file byte b = input.ReadByte(); //System.out.println("version=" + version + " name=" + name + " docCount=" + docCount + " delGen=" + delGen + " dso=" + docStoreOffset + " dss=" + docStoreSegment + " dssCFs=" + docStoreIsCompoundFile + " b=" + b + " format=" + format); Debug.Assert(1 == b, "expected 1 but was: " + b + " format: " + format); int numNormGen = input.ReadInt(); IDictionary <int, long> normGen; if (numNormGen == SegmentInfo.NO) { normGen = null; } else { normGen = new Dictionary <int, long>(); for (int j = 0; j < numNormGen; j++) { normGen[j] = input.ReadLong(); } } bool isCompoundFile = input.ReadByte() == SegmentInfo.YES; int delCount = input.ReadInt(); Debug.Assert(delCount <= docCount); bool hasProx = input.ReadByte() == 1; IDictionary <string, string> diagnostics = input.ReadStringStringMap(); if (format <= Lucene3xSegmentInfoFormat.FORMAT_HAS_VECTORS) { // NOTE: unused int hasVectors = input.ReadByte(); } // Replicate logic from 3.x's SegmentInfo.files(): ISet <string> files = new HashSet <string>(); if (isCompoundFile) { files.Add(IndexFileNames.SegmentFileName(name, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); } else { AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xFieldInfosReader.FIELD_INFOS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.FREQ_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.PROX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.TERMS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xNormsProducer.NORMS_EXTENSION)); } if (docStoreOffset != -1) { if (docStoreIsCompoundFile) { files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION)); } else { files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION)); } } else if (!isCompoundFile) { files.Add(IndexFileNames.SegmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); files.Add(IndexFileNames.SegmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION)); } // parse the normgen stuff and shove it into attributes if (normGen != null) { attributes[Lucene3xSegmentInfoFormat.NORMGEN_KEY] = Convert.ToString(numNormGen); foreach (KeyValuePair <int, long> ent in normGen) { long gen = ent.Value; if (gen >= SegmentInfo.YES) { // Definitely a separate norm file, with generation: files.Add(IndexFileNames.FileNameFromGeneration(name, "s" + ent.Key, gen)); attributes[Lucene3xSegmentInfoFormat.NORMGEN_PREFIX + ent.Key] = Convert.ToString(gen); } else if (gen == SegmentInfo.NO) { // No separate norm } else { // We should have already hit indexformat too old exception Debug.Assert(false); } } } SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, null, diagnostics, CollectionsHelper.UnmodifiableMap(attributes)); info.Files = files; SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, delCount, delGen, -1); return(infoPerCommit); }
private NumericDocValues LoadVarIntsField(FieldInfo field, IndexInput input) { CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.VAR_INTS_CODEC_NAME, Lucene40DocValuesFormat.VAR_INTS_VERSION_START, Lucene40DocValuesFormat.VAR_INTS_VERSION_CURRENT); byte header = input.ReadByte(); if (header == Lucene40DocValuesFormat.VAR_INTS_FIXED_64) { int maxDoc = State.SegmentInfo.DocCount; long[] values = new long[maxDoc]; for (int i = 0; i < values.Length; i++) { values[i] = input.ReadLong(); } RamBytesUsed_Renamed.AddAndGet(RamUsageEstimator.SizeOf(values)); return new NumericDocValuesAnonymousInnerClassHelper(this, values); } else if (header == Lucene40DocValuesFormat.VAR_INTS_PACKED) { long minValue = input.ReadLong(); long defaultValue = input.ReadLong(); PackedInts.Reader reader = PackedInts.GetReader(input); RamBytesUsed_Renamed.AddAndGet(reader.RamBytesUsed()); return new NumericDocValuesAnonymousInnerClassHelper2(this, minValue, defaultValue, reader); } else { throw new CorruptIndexException("invalid VAR_INTS header byte: " + header + " (resource=" + input + ")"); } }
private void SeekDir(IndexInput input, long dirOffset) { if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) { input.Seek(input.Length() - CodecUtil.FooterLength() - 8); dirOffset = input.ReadLong(); } else if (_version >= FixedGapTermsIndexWriter.VERSION_APPEND_ONLY) { input.Seek(input.Length() - 8); dirOffset = input.ReadLong(); } input.Seek(dirOffset); }
public System.Object Run(IndexCommit commit) { if (commit != null) { if (directory != commit.Directory) { throw new System.IO.IOException("the specified commit does not match the specified Directory"); } return(DoBody(commit.SegmentsFileName)); } System.String segmentFileName = null; long lastGen = -1; long gen = 0; int genLookaheadCount = 0; System.IO.IOException exc = null; bool retry = false; int method = 0; // Loop until we succeed in calling doBody() without // hitting an IOException. An IOException most likely // means a commit was in process and has finished, in // the time it took us to load the now-old infos files // (and segments files). It's also possible it's a // true error (corrupt index). To distinguish these, // on each retry we must see "forward progress" on // which generation we are trying to load. If we // don't, then the original error is real and we throw // it. // We have three methods for determining the current // generation. We try the first two in parallel, and // fall back to the third when necessary. while (true) { if (0 == method) { // Method 1: list the directory and use the highest // segments_N file. This method works well as long // as there is no stale caching on the directory // contents (NOTE: NFS clients often have such stale // caching): System.String[] files = null; long genA = -1; files = directory.ListAll(); if (files != null) { genA = Lucene.Net.Index.SegmentInfos.GetCurrentSegmentGeneration(files); } Lucene.Net.Index.SegmentInfos.Message("directory listing genA=" + genA); // Method 2: open segments.gen and read its // contents. Then we take the larger of the two // gens. This way, if either approach is hitting // a stale cache (NFS) we have a better chance of // getting the right generation. long genB = -1; for (int i = 0; i < Lucene.Net.Index.SegmentInfos.defaultGenFileRetryCount; i++) { IndexInput genInput = null; try { genInput = directory.OpenInput(IndexFileNames.SEGMENTS_GEN); } catch (System.IO.FileNotFoundException e) { Lucene.Net.Index.SegmentInfos.Message("segments.gen open: FileNotFoundException " + e); break; } catch (System.IO.IOException e) { Lucene.Net.Index.SegmentInfos.Message("segments.gen open: IOException " + e); } if (genInput != null) { try { int version = genInput.ReadInt(); if (version == Lucene.Net.Index.SegmentInfos.FORMAT_LOCKLESS) { long gen0 = genInput.ReadLong(); long gen1 = genInput.ReadLong(); Lucene.Net.Index.SegmentInfos.Message("fallback check: " + gen0 + "; " + gen1); if (gen0 == gen1) { // The file is consistent. genB = gen0; break; } } } catch (System.IO.IOException) { // will retry } finally { genInput.Close(); } } System.Threading.Thread.Sleep(new TimeSpan((System.Int64) 10000 * Lucene.Net.Index.SegmentInfos.defaultGenFileRetryPauseMsec)); } Lucene.Net.Index.SegmentInfos.Message(IndexFileNames.SEGMENTS_GEN + " check: genB=" + genB); // Pick the larger of the two gen's: if (genA > genB) { gen = genA; } else { gen = genB; } if (gen == -1) { throw new System.IO.FileNotFoundException("no segments* file found in " + directory + ": files:" + string.Join(" ", files)); } } // Third method (fallback if first & second methods // are not reliable): since both directory cache and // file contents cache seem to be stale, just // advance the generation. if (1 == method || (0 == method && lastGen == gen && retry)) { method = 1; if (genLookaheadCount < Lucene.Net.Index.SegmentInfos.defaultGenLookaheadCount) { gen++; genLookaheadCount++; Lucene.Net.Index.SegmentInfos.Message("look ahead increment gen to " + gen); } } if (lastGen == gen) { // This means we're about to try the same // segments_N last tried. This is allowed, // exactly once, because writer could have been in // the process of writing segments_N last time. if (retry) { // OK, we've tried the same segments_N file // twice in a row, so this must be a real // error. We throw the original exception we // got. throw exc; } retry = true; } else if (0 == method) { // Segment file has advanced since our last loop, so // reset retry: retry = false; } lastGen = gen; segmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); try { System.Object v = DoBody(segmentFileName); Lucene.Net.Index.SegmentInfos.Message("success on " + segmentFileName); return(v); } catch (System.IO.IOException err) { // Save the original root cause: if (exc == null) { exc = err; } Lucene.Net.Index.SegmentInfos.Message("primary Exception on '" + segmentFileName + "': " + err + "'; will retry: retry=" + retry + "; gen = " + gen); if (!retry && gen > 1) { // This is our first time trying this segments // file (because retry is false), and, there is // possibly a segments_(N-1) (because gen > 1). // So, check if the segments_(N-1) exists and // try it if so: System.String prevSegmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen - 1); bool prevExists; prevExists = directory.FileExists(prevSegmentFileName); if (prevExists) { Lucene.Net.Index.SegmentInfos.Message("fallback to prior segment file '" + prevSegmentFileName + "'"); try { System.Object v = DoBody(prevSegmentFileName); if (exc != null) { Lucene.Net.Index.SegmentInfos.Message("success on fallback " + prevSegmentFileName); } return(v); } catch (System.IO.IOException err2) { Lucene.Net.Index.SegmentInfos.Message("secondary Exception on '" + prevSegmentFileName + "': " + err2 + "'; will retry"); } } } } } }