public /*internal*/ Document Doc(int n, FieldSelector fieldSelector, IState state) { SeekIndex(n, state); long position = indexStream.ReadLong(state); fieldsStream.Seek(position, state); var doc = new Document(); int numFields = fieldsStream.ReadVInt(state); for (int i = 0; i < numFields; i++) { int fieldNumber = fieldsStream.ReadVInt(state); FieldInfo fi = fieldInfos.FieldInfo(fieldNumber); FieldSelectorResult acceptField = fieldSelector == null?FieldSelectorResult.LOAD:fieldSelector.Accept(fi.name); byte bits = fieldsStream.ReadByte(state); System.Diagnostics.Debug.Assert(bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY); bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; System.Diagnostics.Debug.Assert( (!compressed || (format < FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS)), "compressed fields are only allowed in indexes of version <= 2.9"); bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; bool binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0; //TODO: Find an alternative approach here if this list continues to grow beyond the //list of 5 or 6 currently here. See Lucene 762 for discussion if (acceptField.Equals(FieldSelectorResult.LOAD)) { AddField(doc, fi, binary, compressed, tokenize, state); } else if (acceptField.Equals(FieldSelectorResult.LOAD_AND_BREAK)) { AddField(doc, fi, binary, compressed, tokenize, state); break; //Get out of this loop } else if (acceptField.Equals(FieldSelectorResult.LAZY_LOAD)) { AddFieldLazy(doc, fi, binary, compressed, tokenize, state); } else if (acceptField.Equals(FieldSelectorResult.SIZE)) { SkipField(binary, compressed, AddFieldSize(doc, fi, binary, compressed, state), state); } else if (acceptField.Equals(FieldSelectorResult.SIZE_AND_BREAK)) { AddFieldSize(doc, fi, binary, compressed, state); break; } else { SkipField(binary, compressed, state); } } return(doc); }
/// <summary>Retrieve the length (in bytes) of the tvd and tvf /// entries for the next numDocs starting with /// startDocID. This is used for bulk copying when /// merging segments, if the field numbers are /// congruent. Once this returns, the tvf & tvd streams /// are seeked to the startDocID. /// </summary> internal void RawDocs(int[] tvdLengths, int[] tvfLengths, int startDocID, int numDocs, IState state) { if (tvx == null) { for (int i = 0; i < tvdLengths.Length; i++) { tvdLengths[i] = 0; } for (int i = 0; i < tvfLengths.Length; i++) { tvfLengths[i] = 0; } return; } // SegmentMerger calls canReadRawDocs() first and should // not call us if that returns false. if (format < FORMAT_VERSION2) { throw new System.SystemException("cannot read raw docs with older term vector formats"); } SeekTvx(startDocID, state); long tvdPosition = tvx.ReadLong(state); tvd.Seek(tvdPosition, state); long tvfPosition = tvx.ReadLong(state); tvf.Seek(tvfPosition, state); long lastTvdPosition = tvdPosition; long lastTvfPosition = tvfPosition; int count = 0; while (count < numDocs) { int docID = docStoreOffset + startDocID + count + 1; System.Diagnostics.Debug.Assert(docID <= numTotalDocs); if (docID < numTotalDocs) { tvdPosition = tvx.ReadLong(state); tvfPosition = tvx.ReadLong(state); } else { tvdPosition = tvd.Length(state); tvfPosition = tvf.Length(state); System.Diagnostics.Debug.Assert(count == numDocs - 1); } tvdLengths[count] = (int)(tvdPosition - lastTvdPosition); tvfLengths[count] = (int)(tvfPosition - lastTvfPosition); count++; lastTvdPosition = tvdPosition; lastTvfPosition = tvfPosition; } }
public CompoundFileReader(Directory dir, System.String name, int readBufferSize, IState state) { directory = dir; fileName = name; this.readBufferSize = readBufferSize; bool success = false; try { stream = dir.OpenInput(name, readBufferSize, state); // read the directory and init files int count = stream.ReadVInt(state); FileEntry entry = null; for (int i = 0; i < count; i++) { long offset = stream.ReadLong(state); System.String id = stream.ReadString(state); if (entry != null) { // set length of the previous entry entry.length = offset - entry.offset; } entry = new FileEntry { offset = offset }; entries[id] = entry; } // set the length of the final entry if (entry != null) { entry.length = stream.Length(state) - entry.offset; } success = true; } finally { if (!success && (stream != null)) { try { stream.Close(); } catch (System.IO.IOException) { } } } }
private BinaryEntry ReadBinaryEntry(IndexInput meta) { var entry = new BinaryEntry(); entry.offset = meta.ReadLong(); entry.numBytes = meta.ReadInt(); entry.count = meta.ReadInt(); entry.missingOffset = meta.ReadLong(); if (entry.missingOffset != -1) { entry.missingBytes = meta.ReadLong(); } else { entry.missingBytes = 0; } return entry; }
private static NumericEntry ReadNumericEntry(IndexInput meta) { var entry = new NumericEntry { offset = meta.ReadLong(), count = meta.ReadInt(), missingOffset = meta.ReadLong() }; if (entry.missingOffset != -1) { entry.missingBytes = meta.ReadLong(); } else { entry.missingBytes = 0; } entry.byteWidth = meta.ReadByte(); return entry; }
protected override void SeekDir(IndexInput input, long dirOffset) { input.Seek(input.Length() - sizeof(long)/8); long offset = input.ReadLong(); input.Seek(offset); }
internal SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi, IState state) { input = i; fieldInfos = fis; isIndex = isi; maxSkipLevels = 1; // use single-level skip lists for formats > -3 int firstInt = input.ReadInt(state); if (firstInt >= 0) { // original-format file, without explicit format version number format = 0; size = firstInt; // back-compatible settings indexInterval = 128; skipInterval = System.Int32.MaxValue; // switch off skipTo optimization } else { // we have a format version number format = firstInt; // check that it is a format we can understand if (format < TermInfosWriter.FORMAT_CURRENT) { throw new CorruptIndexException("Unknown format version:" + format + " expected " + TermInfosWriter.FORMAT_CURRENT + " or higher"); } size = input.ReadLong(state); // read the size if (format == -1) { if (!isIndex) { indexInterval = input.ReadInt(state); formatM1SkipInterval = input.ReadInt(state); } // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in // skipTo implementation of these versions skipInterval = System.Int32.MaxValue; } else { indexInterval = input.ReadInt(state); skipInterval = input.ReadInt(state); if (format <= TermInfosWriter.FORMAT) { // this new format introduces multi-level skipping maxSkipLevels = input.ReadInt(state); } } System.Diagnostics.Debug.Assert(indexInterval > 0, "indexInterval=" + indexInterval + " is negative; must be > 0"); System.Diagnostics.Debug.Assert(skipInterval > 0, "skipInterval=" + skipInterval + " is negative; must be > 0"); } if (format > TermInfosWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { termBuffer.SetPreUTF8Strings(); scanBuffer.SetPreUTF8Strings(); prevBuffer.SetPreUTF8Strings(); } }
public override long ReadLong() { EnsureOpen(); return(@delegate.ReadLong()); }
private void ReadFields(IndexInput meta, FieldInfos infos) { int fieldNumber = meta.ReadVInt(); while (fieldNumber != -1) { // check should be: infos.fieldInfo(fieldNumber) != null, which incorporates negative check // but docvalues updates are currently buggy here (loading extra stuff, etc): LUCENE-5616 if (fieldNumber < 0) { // trickier to validate more: because we re-use for norms, because we use multiple entries // for "composite" types like sortedset, etc. throw new CorruptIndexException("Invalid field number: " + fieldNumber + ", input=" + meta); } int fieldType = meta.ReadByte(); if (fieldType == NUMBER) { var entry = new NumericEntry {Offset = meta.ReadLong(), Format = (sbyte)meta.ReadByte()}; switch (entry.Format) { case DELTA_COMPRESSED: case TABLE_COMPRESSED: case GCD_COMPRESSED: case UNCOMPRESSED: break; default: throw new CorruptIndexException("Unknown format: " + entry.Format + ", input=" + meta); } if (entry.Format != UNCOMPRESSED) { entry.PackedIntsVersion = meta.ReadVInt(); } Numerics[fieldNumber] = entry; } else if (fieldType == BYTES) { BinaryEntry entry = new BinaryEntry(); entry.Offset = meta.ReadLong(); entry.NumBytes = meta.ReadLong(); entry.MinLength = meta.ReadVInt(); entry.MaxLength = meta.ReadVInt(); if (entry.MinLength != entry.MaxLength) { entry.PackedIntsVersion = meta.ReadVInt(); entry.BlockSize = meta.ReadVInt(); } Binaries[fieldNumber] = entry; } else if (fieldType == FST) { FSTEntry entry = new FSTEntry(); entry.Offset = meta.ReadLong(); entry.NumOrds = meta.ReadVLong(); Fsts[fieldNumber] = entry; } else { throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta); } fieldNumber = meta.ReadVInt(); } }
private static IDictionary <string, FileEntry> ReadLegacyEntries(IndexInput stream, int firstInt) { IDictionary <string, FileEntry> entries = new Dictionary <string, FileEntry>(); int count; bool stripSegmentName; if (firstInt < CompoundFileWriter.FORMAT_PRE_VERSION) { if (firstInt < CompoundFileWriter.FORMAT_NO_SEGMENT_PREFIX) { throw new CorruptIndexException("Incompatible format version: " + firstInt + " expected >= " + CompoundFileWriter.FORMAT_NO_SEGMENT_PREFIX + " (resource: " + stream + ")"); } // It's a post-3.1 index, read the count. count = stream.ReadVInt(); stripSegmentName = false; } else { count = firstInt; stripSegmentName = true; } // read the directory and init files long streamLength = stream.Length(); FileEntry entry = null; for (int i = 0; i < count; i++) { long offset = stream.ReadLong(); if (offset < 0 || offset > streamLength) { throw new CorruptIndexException("Invalid CFS entry offset: " + offset + " (resource: " + stream + ")"); } string id = stream.ReadString(); if (stripSegmentName) { // Fix the id to not include the segment names. this is relevant for // pre-3.1 indexes. id = IndexFileNames.StripSegmentName(id); } if (entry != null) { // set length of the previous entry entry.Length = offset - entry.Offset; } entry = new FileEntry(); entry.Offset = offset; FileEntry previous = entries[id] = entry; if (previous != null) { throw new CorruptIndexException("Duplicate cfs entry id=" + id + " in CFS: " + stream); } } // set the length of the final entry if (entry != null) { entry.Length = streamLength - entry.Offset; } return(entries); }
private static IDictionary<string, FileEntry> ReadLegacyEntries(IndexInput stream, int firstInt) { IDictionary<string, FileEntry> entries = new Dictionary<string, FileEntry>(); int count; bool stripSegmentName; if (firstInt < CompoundFileWriter.FORMAT_PRE_VERSION) { if (firstInt < CompoundFileWriter.FORMAT_NO_SEGMENT_PREFIX) { throw new CorruptIndexException("Incompatible format version: " + firstInt + " expected >= " + CompoundFileWriter.FORMAT_NO_SEGMENT_PREFIX + " (resource: " + stream + ")"); } // It's a post-3.1 index, read the count. count = stream.ReadVInt(); stripSegmentName = false; } else { count = firstInt; stripSegmentName = true; } // read the directory and init files long streamLength = stream.Length(); FileEntry entry = null; for (int i = 0; i < count; i++) { long offset = stream.ReadLong(); if (offset < 0 || offset > streamLength) { throw new CorruptIndexException("Invalid CFS entry offset: " + offset + " (resource: " + stream + ")"); } string id = stream.ReadString(); if (stripSegmentName) { // Fix the id to not include the segment names. this is relevant for // pre-3.1 indexes. id = IndexFileNames.StripSegmentName(id); } if (entry != null) { // set length of the previous entry entry.Length = offset - entry.Offset; } entry = new FileEntry(); entry.Offset = offset; FileEntry previous = entries[id] = entry; if (previous != null) { throw new CorruptIndexException("Duplicate cfs entry id=" + id + " in CFS: " + stream); } } // set the length of the final entry if (entry != null) { entry.Length = streamLength - entry.Offset; } return entries; }
/// <summary> Construct a new SegmentInfo instance by reading a /// previously saved SegmentInfo from input. /// /// </summary> /// <param name="dir">directory to load from /// </param> /// <param name="format">format of the segments info file /// </param> /// <param name="input">input handle to read segment info from /// </param> internal SegmentInfo(Directory dir, int format, IndexInput input, IState state) { this.dir = dir; name = input.ReadString(state); docCount = input.ReadInt(state); if (format <= SegmentInfos.FORMAT_LOCKLESS) { delGen = input.ReadLong(state); if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE) { docStoreOffset = input.ReadInt(state); if (docStoreOffset != -1) { docStoreSegment = input.ReadString(state); docStoreIsCompoundFile = (1 == input.ReadByte(state)); } else { docStoreSegment = name; docStoreIsCompoundFile = false; } } else { docStoreOffset = -1; docStoreSegment = name; docStoreIsCompoundFile = false; } if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) { hasSingleNormFile = (1 == input.ReadByte(state)); } else { hasSingleNormFile = false; } int numNormGen = input.ReadInt(state); if (numNormGen == NO) { normGen = null; } else { normGen = new long[numNormGen]; for (int j = 0; j < numNormGen; j++) { normGen[j] = input.ReadLong(state); } } isCompoundFile = (sbyte)input.ReadByte(state); preLockless = (isCompoundFile == CHECK_DIR); if (format <= SegmentInfos.FORMAT_DEL_COUNT) { delCount = input.ReadInt(state); System.Diagnostics.Debug.Assert(delCount <= docCount); } else { delCount = -1; } if (format <= SegmentInfos.FORMAT_HAS_PROX) { hasProx = input.ReadByte(state) == 1; } else { hasProx = true; } if (format <= SegmentInfos.FORMAT_DIAGNOSTICS) { diagnostics = input.ReadStringStringMap(state); } else { diagnostics = new Dictionary <string, string>(); } } else { delGen = CHECK_DIR; normGen = null; isCompoundFile = (sbyte)(CHECK_DIR); preLockless = true; hasSingleNormFile = false; docStoreOffset = -1; docStoreIsCompoundFile = false; docStoreSegment = null; delCount = -1; hasProx = true; diagnostics = new Dictionary <string, string>(); } }
/// <summary> /// Returns (but does not validate) the checksum previously written by <seealso cref="#checkFooter"/>. </summary> /// <returns> actual checksum value </returns> /// <exception cref="IOException"> if the footer is invalid </exception> public static long RetrieveChecksum(IndexInput @in) { @in.Seek(@in.Length() - FooterLength()); ValidateFooter(@in); return @in.ReadLong(); }
private void ReadFields(IndexInput meta, FieldInfos infos) { int fieldNumber = meta.ReadVInt(); while (fieldNumber != -1) { int fieldType = meta.ReadByte(); if (fieldType == NUMBER) { var entry = new NumericEntry {offset = meta.ReadLong(), missingOffset = meta.ReadLong()}; if (entry.missingOffset != -1) { entry.missingBytes = meta.ReadLong(); } else { entry.missingBytes = 0; } entry.format = meta.ReadByte(); switch (entry.format) { case DELTA_COMPRESSED: case TABLE_COMPRESSED: case GCD_COMPRESSED: case UNCOMPRESSED: break; default: throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta); } if (entry.format != UNCOMPRESSED) { entry.packedIntsVersion = meta.ReadVInt(); } numerics[fieldNumber] = entry; } else if (fieldType == BYTES) { var entry = new BinaryEntry { offset = meta.ReadLong(), numBytes = meta.ReadLong(), missingOffset = meta.ReadLong() }; if (entry.missingOffset != -1) { entry.missingBytes = meta.ReadLong(); } else { entry.missingBytes = 0; } entry.minLength = meta.ReadVInt(); entry.maxLength = meta.ReadVInt(); if (entry.minLength != entry.maxLength) { entry.packedIntsVersion = meta.ReadVInt(); entry.blockSize = meta.ReadVInt(); } binaries[fieldNumber] = entry; } else if (fieldType == FST) { var entry = new FSTEntry {offset = meta.ReadLong(), numOrds = meta.ReadVLong()}; fsts[fieldNumber] = entry; } else { throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta); } fieldNumber = meta.ReadVInt(); } }
public System.Object Run(IndexCommit commit, IState state) { if (commit != null) { if (directory != commit.Directory) { throw new System.IO.IOException("the specified commit does not match the specified Directory"); } return(DoBody(commit.SegmentsFileName, state)); } System.String segmentFileName = null; long lastGen = -1; long gen = 0; int genLookaheadCount = 0; System.IO.IOException exc = null; bool retry = false; int method = 0; // Loop until we succeed in calling doBody() without // hitting an IOException. An IOException most likely // means a commit was in process and has finished, in // the time it took us to load the now-old infos files // (and segments files). It's also possible it's a // true error (corrupt index). To distinguish these, // on each retry we must see "forward progress" on // which generation we are trying to load. If we // don't, then the original error is real and we throw // it. // We have three methods for determining the current // generation. We try the first two in parallel, and // fall back to the third when necessary. while (true) { if (0 == method) { // Method 1: list the directory and use the highest // segments_N file. This method works well as long // as there is no stale caching on the directory // contents (NOTE: NFS clients often have such stale // caching): System.String[] files = null; long genA = -1; files = directory.ListAll(state); if (files != null) { genA = Lucene.Net.Index.SegmentInfos.GetCurrentSegmentGeneration(files); } Lucene.Net.Index.SegmentInfos.Message("directory listing genA=" + genA); // Method 2: open segments.gen and read its // contents. Then we take the larger of the two // gens. This way, if either approach is hitting // a stale cache (NFS) we have a better chance of // getting the right generation. long genB = -1; for (int i = 0; i < Lucene.Net.Index.SegmentInfos.defaultGenFileRetryCount; i++) { IndexInput genInput = null; try { genInput = directory.OpenInput(IndexFileNames.SEGMENTS_GEN, state); } catch (System.IO.FileNotFoundException e) { Lucene.Net.Index.SegmentInfos.Message("segments.gen open: FileNotFoundException " + e); break; } catch (System.IO.IOException e) { Lucene.Net.Index.SegmentInfos.Message("segments.gen open: IOException " + e); } if (genInput != null) { try { int version = genInput.ReadInt(state); if (version == Lucene.Net.Index.SegmentInfos.FORMAT_LOCKLESS) { long gen0 = genInput.ReadLong(state); long gen1 = genInput.ReadLong(state); Lucene.Net.Index.SegmentInfos.Message("fallback check: " + gen0 + "; " + gen1); if (gen0 == gen1) { // The file is consistent. genB = gen0; break; } } } catch (System.IO.IOException) { // will retry } finally { genInput.Close(); } } System.Threading.Thread.Sleep(new TimeSpan((System.Int64) 10000 * Lucene.Net.Index.SegmentInfos.defaultGenFileRetryPauseMsec)); } Lucene.Net.Index.SegmentInfos.Message(IndexFileNames.SEGMENTS_GEN + " check: genB=" + genB); // Pick the larger of the two gen's: if (genA > genB) { gen = genA; } else { gen = genB; } if (gen == -1) { throw new System.IO.FileNotFoundException("no segments* file found in " + directory + ": files:" + string.Join(" ", files)); } } // Third method (fallback if first & second methods // are not reliable): since both directory cache and // file contents cache seem to be stale, just // advance the generation. if (1 == method || (0 == method && lastGen == gen && retry)) { method = 1; if (genLookaheadCount < Lucene.Net.Index.SegmentInfos.defaultGenLookaheadCount) { gen++; genLookaheadCount++; Lucene.Net.Index.SegmentInfos.Message("look ahead increment gen to " + gen); } } if (lastGen == gen) { // This means we're about to try the same // segments_N last tried. This is allowed, // exactly once, because writer could have been in // the process of writing segments_N last time. if (retry) { // OK, we've tried the same segments_N file // twice in a row, so this must be a real // error. We throw the original exception we // got. throw exc; } retry = true; } else if (0 == method) { // Segment file has advanced since our last loop, so // reset retry: retry = false; } lastGen = gen; segmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); try { System.Object v = DoBody(segmentFileName, state); Lucene.Net.Index.SegmentInfos.Message("success on " + segmentFileName); return(v); } catch (System.IO.IOException err) { // Save the original root cause: if (exc == null) { exc = err; } Lucene.Net.Index.SegmentInfos.Message("primary Exception on '" + segmentFileName + "': " + err + "'; will retry: retry=" + retry + "; gen = " + gen); if (!retry && gen > 1) { // This is our first time trying this segments // file (because retry is false), and, there is // possibly a segments_(N-1) (because gen > 1). // So, check if the segments_(N-1) exists and // try it if so: System.String prevSegmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen - 1); bool prevExists; prevExists = directory.FileExists(prevSegmentFileName, state); if (prevExists) { Lucene.Net.Index.SegmentInfos.Message("fallback to prior segment file '" + prevSegmentFileName + "'"); try { System.Object v = DoBody(prevSegmentFileName, state); if (exc != null) { Lucene.Net.Index.SegmentInfos.Message("success on fallback " + prevSegmentFileName); } return(v); } catch (System.IO.IOException err2) { Lucene.Net.Index.SegmentInfos.Message("secondary Exception on '" + prevSegmentFileName + "': " + err2 + "'; will retry"); } } } } } }