internal SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi) { input = i; fieldInfos = fis; isIndex = isi; maxSkipLevels = 1; // use single-level skip lists for formats > -3 int firstInt = input.ReadInt(); if (firstInt >= 0) { // original-format file, without explicit format version number format = 0; size = firstInt; // back-compatible settings indexInterval = 128; skipInterval = System.Int32.MaxValue; // switch off skipTo optimization } else { // we have a format version number format = firstInt; // check that it is a format we can understand if (format < TermInfosWriter.FORMAT_CURRENT) throw new CorruptIndexException("Unknown format version:" + format + " expected " + TermInfosWriter.FORMAT_CURRENT + " or higher"); size = input.ReadLong(); // read the size if (format == - 1) { if (!isIndex) { indexInterval = input.ReadInt(); formatM1SkipInterval = input.ReadInt(); } // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in // skipTo implementation of these versions skipInterval = System.Int32.MaxValue; } else { indexInterval = input.ReadInt(); skipInterval = input.ReadInt(); if (format <= TermInfosWriter.FORMAT) { // this new format introduces multi-level skipping maxSkipLevels = input.ReadInt(); } } System.Diagnostics.Debug.Assert(indexInterval > 0, "indexInterval=" + indexInterval + " is negative; must be > 0"); System.Diagnostics.Debug.Assert(skipInterval > 0, "skipInterval=" + skipInterval + " is negative; must be > 0"); } if (format > TermInfosWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { termBuffer.SetPreUTF8Strings(); scanBuffer.SetPreUTF8Strings(); prevBuffer.SetPreUTF8Strings(); } }
internal SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi) { input = i; fieldInfos = fis; isIndex = isi; int firstInt = input.ReadInt(); if (firstInt >= 0) { // original-format file, without explicit format version number format = 0; size = firstInt; // back-compatible settings indexInterval = 128; skipInterval = System.Int32.MaxValue; // switch off skipTo optimization } else { // we have a format version number format = firstInt; // check that it is a format we can understand if (format < TermInfosWriter.FORMAT) throw new System.IO.IOException("Unknown format version:" + format); size = input.ReadLong(); // read the size if (format == - 1) { if (!isIndex) { indexInterval = input.ReadInt(); formatM1SkipInterval = input.ReadInt(); } // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in // skipTo implementation of these versions skipInterval = System.Int32.MaxValue; } else { indexInterval = input.ReadInt(); skipInterval = input.ReadInt(); } } }
private int CheckValidFormat(IndexInput in_Renamed) { int format = in_Renamed.ReadInt(); if (format > TermVectorsWriter.FORMAT_VERSION) { throw new System.IO.IOException("Incompatible format version: " + format + " expected " + TermVectorsWriter.FORMAT_VERSION + " or less"); } return format; }
/// <summary> Construct a new SegmentInfo instance by reading a /// previously saved SegmentInfo from input. /// /// </summary> /// <param name="dir">directory to load from /// </param> /// <param name="format">format of the segments info file /// </param> /// <param name="input">input handle to read segment info from /// </param> internal SegmentInfo(Directory dir, int format, IndexInput input) { this.dir = dir; name = input.ReadString(); docCount = input.ReadInt(); if (format <= SegmentInfos.FORMAT_LOCKLESS) { delGen = input.ReadLong(); if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE) { docStoreOffset = input.ReadInt(); if (docStoreOffset != -1) { docStoreSegment = input.ReadString(); docStoreIsCompoundFile = (1 == input.ReadByte()); } else { docStoreSegment = name; docStoreIsCompoundFile = false; } } else { docStoreOffset = -1; docStoreSegment = name; docStoreIsCompoundFile = false; } if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) { hasSingleNormFile = (1 == input.ReadByte()); } else { hasSingleNormFile = false; } int numNormGen = input.ReadInt(); if (numNormGen == NO) { normGen = null; } else { normGen = new long[numNormGen]; for (int j = 0; j < numNormGen; j++) { normGen[j] = input.ReadLong(); } } isCompoundFile = (sbyte)input.ReadByte(); preLockless = (isCompoundFile == CHECK_DIR); } else { delGen = CHECK_DIR; normGen = null; isCompoundFile = (sbyte)(CHECK_DIR); preLockless = true; hasSingleNormFile = false; docStoreOffset = -1; docStoreIsCompoundFile = false; docStoreSegment = null; } }
/// <summary> Construct a new SegmentInfo instance by reading a /// previously saved SegmentInfo from input. /// /// </summary> /// <param name="dir">directory to load from /// </param> /// <param name="format">format of the segments info file /// </param> /// <param name="input">input handle to read segment info from /// </param> public SegmentInfo(Directory dir, int format, IndexInput input) { this.dir = dir; name = input.ReadString(); docCount = input.ReadInt(); if (format <= SegmentInfos.FORMAT_LOCKLESS) { delGen = input.ReadLong(); if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) { hasSingleNormFile = (1 == input.ReadByte()); } else { hasSingleNormFile = false; } int numNormGen = input.ReadInt(); if (numNormGen == - 1) { normGen = null; } else { normGen = new long[numNormGen]; for (int j = 0; j < numNormGen; j++) { normGen[j] = input.ReadLong(); } } isCompoundFile = (sbyte) input.ReadByte(); preLockless = isCompoundFile == 0; } else { delGen = 0; normGen = null; isCompoundFile = 0; preLockless = true; hasSingleNormFile = false; } }
/// <summary>Returns a {@link Status} instance detailing /// the state of the index. /// /// </summary> /// <param name="onlySegments">list of specific segment names to check /// /// <p/>As this method checks every byte in the specified /// segments, on a large index it can take quite a long /// time to run. /// /// <p/><b>WARNING</b>: make sure /// you only call this when the index is not opened by any /// writer. /// </param> public virtual Status CheckIndex_Renamed_Method(System.Collections.IList onlySegments) { System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); Status result = new Status(); result.dir = dir; try { sis.Read(dir); } catch (System.Exception t) { Msg("ERROR: could not read any segments file in directory"); result.missingSegments = true; if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } return(result); } int numSegments = sis.Count; System.String segmentsFileName = sis.GetCurrentSegmentFileName(); IndexInput input = null; try { input = dir.OpenInput(segmentsFileName); } catch (System.Exception t) { Msg("ERROR: could not open segments file in directory"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } result.cantOpenSegments = true; return(result); } int format = 0; try { format = input.ReadInt(); } catch (System.Exception t) { Msg("ERROR: could not read segment file version in directory"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } result.missingSegmentVersion = true; return(result); } finally { if (input != null) { input.Close(); } } System.String sFormat = ""; bool skip = false; if (format == SegmentInfos.FORMAT) { sFormat = "FORMAT [Lucene Pre-2.1]"; } if (format == SegmentInfos.FORMAT_LOCKLESS) { sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; } else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) { sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; } else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) { sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; } else { if (format == SegmentInfos.FORMAT_CHECKSUM) { sFormat = "FORMAT_CHECKSUM [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_DEL_COUNT) { sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_HAS_PROX) { sFormat = "FORMAT_HAS_PROX [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_USER_DATA) { sFormat = "FORMAT_USER_DATA [Lucene 2.9]"; } else if (format == SegmentInfos.FORMAT_DIAGNOSTICS) { sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]"; } else if (format < SegmentInfos.CURRENT_FORMAT) { sFormat = "int=" + format + " [newer version of Lucene than this tool]"; skip = true; } else { sFormat = format + " [Lucene 1.3 or prior]"; } } result.segmentsFileName = segmentsFileName; result.numSegments = numSegments; result.segmentFormat = sFormat; result.userData = sis.GetUserData(); System.String userDataString; if (sis.GetUserData().Count > 0) { userDataString = " userData=" + SupportClass.CollectionsHelper.CollectionToString(sis.GetUserData()); } else { userDataString = ""; } Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString); if (onlySegments != null) { result.partial = true; if (infoStream != null) { infoStream.Write("\nChecking only these segments:"); } System.Collections.IEnumerator it = onlySegments.GetEnumerator(); while (it.MoveNext()) { if (infoStream != null) { infoStream.Write(" " + it.Current); } } System.Collections.IEnumerator e = onlySegments.GetEnumerator(); while (e.MoveNext() == true) { result.segmentsChecked.Add(e.Current); } Msg(":"); } if (skip) { Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); result.toolOutOfDate = true; return(result); } result.newSegments = (SegmentInfos)sis.Clone(); result.newSegments.Clear(); for (int i = 0; i < numSegments; i++) { SegmentInfo info = sis.Info(i); if (onlySegments != null && !onlySegments.Contains(info.name)) { continue; } Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus(); result.segmentInfos.Add(segInfoStat); Msg(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); segInfoStat.name = info.name; segInfoStat.docCount = info.docCount; int toLoseDocCount = info.docCount; SegmentReader reader = null; try { Msg(" compound=" + info.GetUseCompoundFile()); segInfoStat.compound = info.GetUseCompoundFile(); Msg(" hasProx=" + info.GetHasProx()); segInfoStat.hasProx = info.GetHasProx(); Msg(" numFiles=" + info.Files().Count); segInfoStat.numFiles = info.Files().Count; Msg(System.String.Format(nf, " size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) })); segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0); System.Collections.Generic.IDictionary <string, string> diagnostics = info.GetDiagnostics(); segInfoStat.diagnostics = diagnostics; if (diagnostics.Count > 0) { Msg(" diagnostics = " + SupportClass.CollectionsHelper.CollectionToString(diagnostics)); } int docStoreOffset = info.GetDocStoreOffset(); if (docStoreOffset != -1) { Msg(" docStoreOffset=" + docStoreOffset); segInfoStat.docStoreOffset = docStoreOffset; Msg(" docStoreSegment=" + info.GetDocStoreSegment()); segInfoStat.docStoreSegment = info.GetDocStoreSegment(); Msg(" docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile()); segInfoStat.docStoreCompoundFile = info.GetDocStoreIsCompoundFile(); } System.String delFileName = info.GetDelFileName(); if (delFileName == null) { Msg(" no deletions"); segInfoStat.hasDeletions = false; } else { Msg(" has deletions [delFileName=" + delFileName + "]"); segInfoStat.hasDeletions = true; segInfoStat.deletionsFileName = delFileName; } if (infoStream != null) { infoStream.Write(" test: open reader........."); } reader = SegmentReader.Get(info); segInfoStat.openReaderPassed = true; int numDocs = reader.NumDocs(); toLoseDocCount = numDocs; if (reader.HasDeletions()) { if (reader.deletedDocs.Count() != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (reader.deletedDocs.Count() > reader.MaxDoc()) { throw new System.SystemException("too many deleted docs: maxDoc()=" + reader.MaxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (info.docCount - numDocs != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } segInfoStat.numDeleted = info.docCount - numDocs; Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]"); } else { if (info.GetDelCount() != 0) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } Msg("OK"); } if (reader.MaxDoc() != info.docCount) { throw new System.SystemException("SegmentReader.maxDoc() " + reader.MaxDoc() + " != SegmentInfos.docCount " + info.docCount); } // Test getFieldNames() if (infoStream != null) { infoStream.Write(" test: fields.............."); } System.Collections.Generic.ICollection <string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL); Msg("OK [" + fieldNames.Count + " fields]"); segInfoStat.numFields = fieldNames.Count; // Test Field Norms segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader); // Test the Term Index segInfoStat.termIndexStatus = TestTermIndex(info, reader); // Test Stored Fields segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf); // Test Term Vectors segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf); // Rethrow the first exception we encountered // This will cause stats for failed segments to be incremented properly if (segInfoStat.fieldNormStatus.error != null) { throw new System.SystemException("Field Norm test failed"); } else if (segInfoStat.termIndexStatus.error != null) { throw new System.SystemException("Term Index test failed"); } else if (segInfoStat.storedFieldStatus.error != null) { throw new System.SystemException("Stored Field test failed"); } else if (segInfoStat.termVectorStatus.error != null) { throw new System.SystemException("Term Vector test failed"); } Msg(""); } catch (System.Exception t) { Msg("FAILED"); System.String comment; comment = "fixIndex() would remove reference to this segment"; Msg(" WARNING: " + comment + "; full exception:"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } Msg(""); result.totLoseDocCount += toLoseDocCount; result.numBadSegments++; continue; } finally { if (reader != null) { reader.Close(); } } // Keeper result.newSegments.Add(info.Clone()); } if (0 == result.numBadSegments) { result.clean = true; Msg("No problems were detected with this index.\n"); } else { Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected"); } return(result); }
/// <summary> Construct a new SegmentInfo instance by reading a /// previously saved SegmentInfo from input. /// /// </summary> /// <param name="dir">directory to load from /// </param> /// <param name="format">format of the segments info file /// </param> /// <param name="input">input handle to read segment info from /// </param> internal SegmentInfo(Directory dir, int format, IndexInput input) { this.dir = dir; name = input.ReadString(); docCount = input.ReadInt(); if (format <= SegmentInfos.FORMAT_LOCKLESS) { delGen = input.ReadLong(); if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE) { docStoreOffset = input.ReadInt(); if (docStoreOffset != -1) { docStoreSegment = input.ReadString(); docStoreIsCompoundFile = (1 == input.ReadByte()); } else { docStoreSegment = name; docStoreIsCompoundFile = false; } } else { docStoreOffset = -1; docStoreSegment = name; docStoreIsCompoundFile = false; } if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) { hasSingleNormFile = (1 == input.ReadByte()); } else { hasSingleNormFile = false; } int numNormGen = input.ReadInt(); if (numNormGen == NO) { normGen = null; } else { normGen = new long[numNormGen]; for (int j = 0; j < numNormGen; j++) { normGen[j] = input.ReadLong(); } } isCompoundFile = (sbyte)input.ReadByte(); preLockless = (isCompoundFile == CHECK_DIR); if (format <= SegmentInfos.FORMAT_DEL_COUNT) { delCount = input.ReadInt(); System.Diagnostics.Debug.Assert(delCount <= docCount); } else { delCount = -1; } if (format <= SegmentInfos.FORMAT_HAS_PROX) { hasProx = input.ReadByte() == 1; } else { hasProx = true; } if (format <= SegmentInfos.FORMAT_DIAGNOSTICS) { diagnostics = input.ReadStringStringMap(); } else { diagnostics = new Dictionary <string, string>(); } } else { delGen = CHECK_DIR; normGen = null; isCompoundFile = (sbyte)(CHECK_DIR); preLockless = true; hasSingleNormFile = false; docStoreOffset = -1; docStoreIsCompoundFile = false; docStoreSegment = null; delCount = -1; hasProx = true; diagnostics = new Dictionary <string, string>(); } }
internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) { bool success = false; isOriginal = true; try { fieldInfos = fn; cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize); cloneableIndexStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize); // First version of fdx did not include a format // header, but, the first int will always be 0 in that // case int firstInt = cloneableIndexStream.ReadInt(); if (firstInt == 0) { format = 0; } else { format = firstInt; } if (format > FieldsWriter.FORMAT_CURRENT) { throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FieldsWriter.FORMAT_CURRENT + " or lower"); } if (format > FieldsWriter.FORMAT) { formatSize = 4; } else { formatSize = 0; } if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { cloneableFieldsStream.SetModifiedUTF8StringsMode(); } fieldsStream = (IndexInput)cloneableFieldsStream.Clone(); long indexSize = cloneableIndexStream.Length() - formatSize; if (docStoreOffset != -1) { // We read only a slice out of this shared fields file this.docStoreOffset = docStoreOffset; this.size = size; // Verify the file is long enough to hold all of our // docs System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset); } else { this.docStoreOffset = 0; this.size = (int)(indexSize >> 3); } indexStream = (IndexInput)cloneableIndexStream.Clone(); numTotalDocs = (int)(indexSize >> 3); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Close(); } } }
/// <summary> /// Read as a bit set </summary> private void ReadBits(IndexInput input) { Count_Renamed = input.ReadInt(); // read count Bits = new byte[GetNumBytes(Size_Renamed)]; // allocate bits input.ReadBytes(Bits, 0, Bits.Length); }
public System.Object Run() { System.String segmentFileName = null; long lastGen = -1; long gen = 0; int genLookaheadCount = 0; System.IO.IOException exc = null; bool retry = false; int method = 0; // Loop until we succeed in calling doBody() without // hitting an IOException. An IOException most likely // means a commit was in process and has finished, in // the time it took us to load the now-old infos files // (and segments files). It's also possible it's a // true error (corrupt index). To distinguish these, // on each retry we must see "forward progress" on // which generation we are trying to load. If we // don't, then the original error is real and we throw // it. // We have three methods for determining the current // generation. We try the first two in parallel, and // fall back to the third when necessary. while (true) { if (0 == method) { // Method 1: list the directory and use the highest // segments_N file. This method works well as long // as there is no stale caching on the directory // contents (NOTE: NFS clients often have such stale // caching): System.String[] files = null; long genA = -1; if (directory != null) { files = directory.List(); } else { files = System.IO.Directory.GetFileSystemEntries(fileDirectory.FullName); } if (files != null) { genA = Lucene.Net.Index.SegmentInfos.GetCurrentSegmentGeneration(files); } Lucene.Net.Index.SegmentInfos.Message("directory listing genA=" + genA); // Method 2: open segments.gen and read its // contents. Then we take the larger of the two // gen's. This way, if either approach is hitting // a stale cache (NFS) we have a better chance of // getting the right generation. long genB = -1; if (directory != null) { for (int i = 0; i < Lucene.Net.Index.SegmentInfos.defaultGenFileRetryCount; i++) { IndexInput genInput = null; try { genInput = directory.OpenInput(IndexFileNames.SEGMENTS_GEN); } catch (System.IO.FileNotFoundException e) { Lucene.Net.Index.SegmentInfos.Message("segments.gen open: FileNotFoundException " + e); break; } catch (System.IO.IOException e) { Lucene.Net.Index.SegmentInfos.Message("segments.gen open: IOException " + e); } if (genInput != null) { try { int version = genInput.ReadInt(); if (version == Lucene.Net.Index.SegmentInfos.FORMAT_LOCKLESS) { long gen0 = genInput.ReadLong(); long gen1 = genInput.ReadLong(); Lucene.Net.Index.SegmentInfos.Message("fallback check: " + gen0 + "; " + gen1); if (gen0 == gen1) { // The file is consistent. genB = gen0; break; } } } catch (System.IO.IOException err2) { // will retry } finally { genInput.Close(); } } try { System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * Lucene.Net.Index.SegmentInfos.defaultGenFileRetryPauseMsec)); } catch (System.Threading.ThreadInterruptedException e) { // will retry } } } Lucene.Net.Index.SegmentInfos.Message(IndexFileNames.SEGMENTS_GEN + " check: genB=" + genB); // Pick the larger of the two gen's: if (genA > genB) { gen = genA; } else { gen = genB; } if (gen == -1) { // Neither approach found a generation System.String s; if (files != null) { s = ""; for (int i = 0; i < files.Length; i++) { s += (" " + files[i]); } } else { s = " null"; } throw new System.IO.FileNotFoundException("no segments* file found in " + directory + ": files:" + s); } } // Third method (fallback if first & second methods // are not reliable): since both directory cache and // file contents cache seem to be stale, just // advance the generation. if (1 == method || (0 == method && lastGen == gen && retry)) { method = 1; if (genLookaheadCount < Lucene.Net.Index.SegmentInfos.defaultGenLookaheadCount) { gen++; genLookaheadCount++; Lucene.Net.Index.SegmentInfos.Message("look ahead increment gen to " + gen); } } if (lastGen == gen) { // This means we're about to try the same // segments_N last tried. This is allowed, // exactly once, because writer could have been in // the process of writing segments_N last time. if (retry) { // OK, we've tried the same segments_N file // twice in a row, so this must be a real // error. We throw the original exception we // got. throw exc; } else { retry = true; } } else { // Segment file has advanced since our last loop, so // reset retry: retry = false; } lastGen = gen; segmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); try { System.Object v = DoBody(segmentFileName); if (exc != null) { Lucene.Net.Index.SegmentInfos.Message("success on " + segmentFileName); } return(v); } catch (System.IO.IOException err) { // Save the original root cause: if (exc == null) { exc = err; } Lucene.Net.Index.SegmentInfos.Message("primary Exception on '" + segmentFileName + "': " + err + "'; will retry: retry=" + retry + "; gen = " + gen); if (!retry && gen > 1) { // This is our first time trying this segments // file (because retry is false), and, there is // possibly a segments_(N-1) (because gen > 1). // So, check if the segments_(N-1) exists and // try it if so: System.String prevSegmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen - 1); bool prevExists; if (directory != null) { prevExists = directory.FileExists(prevSegmentFileName); } else { bool tmpBool; if (System.IO.File.Exists(new System.IO.FileInfo(fileDirectory.FullName + "\\" + prevSegmentFileName).FullName)) { tmpBool = true; } else { tmpBool = System.IO.Directory.Exists(new System.IO.FileInfo(fileDirectory.FullName + "\\" + prevSegmentFileName).FullName); } prevExists = tmpBool; } if (prevExists) { Lucene.Net.Index.SegmentInfos.Message("fallback to prior segment file '" + prevSegmentFileName + "'"); try { System.Object v = DoBody(prevSegmentFileName); if (exc != null) { Lucene.Net.Index.SegmentInfos.Message("success on fallback " + prevSegmentFileName); } return(v); } catch (System.IO.IOException err2) { Lucene.Net.Index.SegmentInfos.Message("secondary Exception on '" + prevSegmentFileName + "': " + err2 + "'; will retry"); } } } } } }
/// <summary>Read as a bit set </summary> private void ReadBits(IndexInput input) { count = input.ReadInt(); // read count bits = new byte[(size >> 3) + 1]; // allocate bits input.ReadBytes(bits, 0, bits.Length); }
/// <summary> /// read as a d-gaps cleared bits list </summary> private void ReadClearedDgaps(IndexInput input) { Size_Renamed = input.ReadInt(); // (re)read size Count_Renamed = input.ReadInt(); // read count Bits = new byte[GetNumBytes(Size_Renamed)]; // allocate bits for (int i = 0; i < Bits.Length; ++i) { Bits[i] = 0xff; } ClearUnusedBits(); int last = 0; int numCleared = Size() - Count(); while (numCleared > 0) { last += input.ReadVInt(); Bits[last] = input.ReadByte(); numCleared -= 8 - BitUtil.BitCount(Bits[last]); Debug.Assert(numCleared >= 0 || (last == (Bits.Length - 1) && numCleared == -(8 - (Size_Renamed & 7)))); } }
public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext) { string fileName = IndexFileNames.SegmentFileName(segmentName, "", PreFlexRWFieldInfosWriter.FIELD_INFOS_EXTENSION); IndexInput input = directory.OpenInput(fileName, iocontext); try { int format = input.ReadVInt(); if (format > FORMAT_MINIMUM) { throw new IndexFormatTooOldException(input, format, FORMAT_MINIMUM, PreFlexRWFieldInfosWriter.FORMAT_CURRENT); } if (format < PreFlexRWFieldInfosWriter.FORMAT_CURRENT && format != PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW) { throw new IndexFormatTooNewException(input, format, FORMAT_MINIMUM, PreFlexRWFieldInfosWriter.FORMAT_CURRENT); } int size = input.ReadVInt(); //read in the size FieldInfo[] infos = new FieldInfo[size]; for (int i = 0; i < size; i++) { string name = input.ReadString(); int fieldNumber = format == PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW ? input.ReadInt() : i; byte bits = input.ReadByte(); bool isIndexed = (bits & PreFlexRWFieldInfosWriter.IS_INDEXED) != 0; bool storeTermVector = (bits & PreFlexRWFieldInfosWriter.STORE_TERMVECTOR) != 0; bool omitNorms = (bits & PreFlexRWFieldInfosWriter.OMIT_NORMS) != 0; bool storePayloads = (bits & PreFlexRWFieldInfosWriter.STORE_PAYLOADS) != 0; FieldInfo.IndexOptions?indexOptions; if (!isIndexed) { indexOptions = null; } else if ((bits & PreFlexRWFieldInfosWriter.OMIT_TERM_FREQ_AND_POSITIONS) != 0) { indexOptions = FieldInfo.IndexOptions.DOCS_ONLY; } else if ((bits & PreFlexRWFieldInfosWriter.OMIT_POSITIONS) != 0) { if (format <= PreFlexRWFieldInfosWriter.FORMAT_OMIT_POSITIONS) { indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; } else { throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format + " (resource: " + input + ")"); } } else { indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; } // LUCENE-3027: past indices were able to write // storePayloads=true when omitTFAP is also true, // which is invalid. We correct that, here: if (indexOptions != FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { storePayloads = false; } DocValuesType_e?normType = isIndexed && !omitNorms ? (DocValuesType_e?)DocValuesType_e.NUMERIC : null; if (format == PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW && normType != null) { // RW can have norms but doesn't write them normType = input.ReadByte() != 0 ? (DocValuesType_e?)DocValuesType_e.NUMERIC : null; } infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, null, normType, null); } if (input.FilePointer != input.Length()) { throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.FilePointer + " vs size " + input.Length() + " (resource: " + input + ")"); } return(new FieldInfos(infos)); } finally { input.Dispose(); } }
private int CheckValidFormat(IndexInput in_Renamed) { int format = in_Renamed.ReadInt(); if (format > FORMAT_CURRENT) { throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FORMAT_CURRENT + " or less"); } return format; }
internal SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi) { input = i; fieldInfos = fis; isIndex = isi; maxSkipLevels = 1; // use single-level skip lists for formats > -3 int firstInt = input.ReadInt(); if (firstInt >= 0) { // original-format file, without explicit format version number format = 0; size = firstInt; // back-compatible settings indexInterval = 128; skipInterval = System.Int32.MaxValue; // switch off skipTo optimization } else { // we have a format version number format = firstInt; // check that it is a format we can understand if (format < TermInfosWriter.FORMAT_CURRENT) { throw new CorruptIndexException("Unknown format version:" + format + " expected " + TermInfosWriter.FORMAT_CURRENT + " or higher"); } size = input.ReadLong(); // read the size if (format == -1) { if (!isIndex) { indexInterval = input.ReadInt(); formatM1SkipInterval = input.ReadInt(); } // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in // skipTo implementation of these versions skipInterval = System.Int32.MaxValue; } else { indexInterval = input.ReadInt(); skipInterval = input.ReadInt(); if (format <= TermInfosWriter.FORMAT) { // this new format introduces multi-level skipping maxSkipLevels = input.ReadInt(); } } System.Diagnostics.Debug.Assert(indexInterval > 0, "indexInterval=" + indexInterval + " is negative; must be > 0"); System.Diagnostics.Debug.Assert(skipInterval > 0, "skipInterval=" + skipInterval + " is negative; must be > 0"); } if (format > TermInfosWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { termBuffer.SetPreUTF8Strings(); scanBuffer.SetPreUTF8Strings(); prevBuffer.SetPreUTF8Strings(); } }
private NumericDocValues LoadShortField(FieldInfo field, IndexInput input) { CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.INTS_CODEC_NAME, Lucene40DocValuesFormat.INTS_VERSION_START, Lucene40DocValuesFormat.INTS_VERSION_CURRENT); int valueSize = input.ReadInt(); if (valueSize != 2) { throw new CorruptIndexException("invalid valueSize: " + valueSize); } int maxDoc = State.SegmentInfo.DocCount; short[] values = new short[maxDoc]; for (int i = 0; i < values.Length; i++) { values[i] = input.ReadShort(); } RamBytesUsed_Renamed.AddAndGet(RamUsageEstimator.SizeOf(values)); return new NumericDocValuesAnonymousInnerClassHelper4(values); }
private SortedDocValues LoadBytesFixedSorted(FieldInfo field, IndexInput data, IndexInput index) { CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT); CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT); int fixedLength = data.ReadInt(); int valueCount = index.ReadInt(); PagedBytes bytes = new PagedBytes(16); bytes.Copy(data, fixedLength * (long)valueCount); PagedBytes.Reader bytesReader = bytes.Freeze(true); PackedInts.Reader reader = PackedInts.GetReader(index); RamBytesUsed_Renamed.AddAndGet(bytes.RamBytesUsed() + reader.RamBytesUsed()); return CorrectBuggyOrds(new SortedDocValuesAnonymousInnerClassHelper(fixedLength, valueCount, bytesReader, reader)); }
/// <summary> /// read as a d-gaps list </summary> private void ReadSetDgaps(IndexInput input) { Size_Renamed = input.ReadInt(); // (re)read size Count_Renamed = input.ReadInt(); // read count Bits = new byte[GetNumBytes(Size_Renamed)]; // allocate bits int last = 0; int n = Count(); while (n > 0) { last += input.ReadVInt(); Bits[last] = input.ReadByte(); n -= BitUtil.BitCount(Bits[last]); Debug.Assert(n >= 0); } }
/// <summary> Construct a new SegmentInfo instance by reading a /// previously saved SegmentInfo from input. /// /// </summary> /// <param name="dir">directory to load from /// </param> /// <param name="format">format of the segments info file /// </param> /// <param name="input">input handle to read segment info from /// </param> internal SegmentInfo(Directory dir, int format, IndexInput input) { this.dir = dir; name = input.ReadString(); docCount = input.ReadInt(); if (format <= SegmentInfos.FORMAT_LOCKLESS) { delGen = input.ReadLong(); if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE) { docStoreOffset = input.ReadInt(); if (docStoreOffset != - 1) { docStoreSegment = input.ReadString(); docStoreIsCompoundFile = (1 == input.ReadByte()); } else { docStoreSegment = name; docStoreIsCompoundFile = false; } } else { docStoreOffset = - 1; docStoreSegment = name; docStoreIsCompoundFile = false; } if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) { hasSingleNormFile = (1 == input.ReadByte()); } else { hasSingleNormFile = false; } int numNormGen = input.ReadInt(); if (numNormGen == NO) { normGen = null; } else { normGen = new long[numNormGen]; for (int j = 0; j < numNormGen; j++) { normGen[j] = input.ReadLong(); } } isCompoundFile = (sbyte) input.ReadByte(); preLockless = (isCompoundFile == CHECK_DIR); if (format <= SegmentInfos.FORMAT_DEL_COUNT) { delCount = input.ReadInt(); System.Diagnostics.Debug.Assert(delCount <= docCount); } else delCount = - 1; if (format <= SegmentInfos.FORMAT_HAS_PROX) hasProx = input.ReadByte() == 1; else hasProx = true; if (format <= SegmentInfos.FORMAT_DIAGNOSTICS) { diagnostics = input.ReadStringStringMap(); } else { diagnostics = new System.Collections.Generic.Dictionary<string,string>(); } } else { delGen = CHECK_DIR; normGen = null; isCompoundFile = (sbyte) (CHECK_DIR); preLockless = true; hasSingleNormFile = false; docStoreOffset = - 1; docStoreIsCompoundFile = false; docStoreSegment = null; delCount = - 1; hasProx = true; diagnostics = new System.Collections.Generic.Dictionary<string,string>(); } }
/// <summary> Construct a new SegmentInfo instance by reading a /// previously saved SegmentInfo from input. /// /// </summary> /// <param name="dir">directory to load from /// </param> /// <param name="format">format of the segments info file /// </param> /// <param name="input">input handle to read segment info from /// </param> internal SegmentInfo(Directory dir, int format, IndexInput input) { this.dir = dir; name = input.ReadString(); docCount = input.ReadInt(); if (format <= SegmentInfos.FORMAT_LOCKLESS) { delGen = input.ReadLong(); if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE) { docStoreOffset = input.ReadInt(); if (docStoreOffset != - 1) { docStoreSegment = input.ReadString(); docStoreIsCompoundFile = (1 == input.ReadByte()); } else { docStoreSegment = name; docStoreIsCompoundFile = false; } } else { docStoreOffset = - 1; docStoreSegment = name; docStoreIsCompoundFile = false; } if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) { hasSingleNormFile = (1 == input.ReadByte()); } else { hasSingleNormFile = false; } int numNormGen = input.ReadInt(); if (numNormGen == NO) { normGen = null; } else { normGen = new long[numNormGen]; for (int j = 0; j < numNormGen; j++) { normGen[j] = input.ReadLong(); } } isCompoundFile = (sbyte) input.ReadByte(); preLockless = (isCompoundFile == CHECK_DIR); } else { delGen = CHECK_DIR; normGen = null; isCompoundFile = (sbyte) (CHECK_DIR); preLockless = true; hasSingleNormFile = false; docStoreOffset = - 1; docStoreIsCompoundFile = false; docStoreSegment = null; } }
/// <summary>read as a d-gaps list </summary> private void ReadDgaps(IndexInput input) { size = input.ReadInt(); // (re)read size count = input.ReadInt(); // read count bits = new byte[(size >> 3) + 1]; // allocate bits int last = 0; int n = Count(); while (n > 0) { last += input.ReadVInt(); bits[last] = input.ReadByte(); n -= BYTE_COUNTS[bits[last] & 0xFF]; } }
/// <summary> Read a particular segmentFileName. Note that this may /// throw an IOException if a commit is in process. /// /// </summary> /// <param name="directory">-- directory containing the segments file /// </param> /// <param name="segmentFileName">-- segment file to load /// </param> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public void Read(Directory directory, System.String segmentFileName) { bool success = false; // Clear any previous segments: Clear(); IndexInput input = directory.OpenInput(segmentFileName); generation = GenerationFromSegmentsFileName(segmentFileName); lastGeneration = generation; try { int format = input.ReadInt(); if (format < 0) { // file contains explicit format info // check that it is a format we can understand if (format < CURRENT_FORMAT) { throw new CorruptIndexException("Unknown format version: " + format); } version = input.ReadLong(); // read version counter = input.ReadInt(); // read counter } else { // file is in old format without explicit format info counter = format; } for (int i = input.ReadInt(); i > 0; i--) { // read segmentInfos Add(new SegmentInfo(directory, format, input)); } if (format >= 0) { // in old format the version number may be at the end of the file if (input.GetFilePointer() >= input.Length()) { version = (System.DateTime.Now.Ticks - 621355968000000000) / 10000; } // old file format without version number else { version = input.ReadLong(); // read version } } success = true; } finally { input.Close(); if (!success) { // Clear any segment infos we had loaded so we // have a clean slate on retry: Clear(); } } }
internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) { bool success = false; isOriginal = true; try { fieldInfos = fn; cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize); cloneableIndexStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize); // First version of fdx did not include a format // header, but, the first int will always be 0 in that // case int firstInt = cloneableIndexStream.ReadInt(); format = firstInt == 0 ? 0 : firstInt; if (format > FieldsWriter.FORMAT_CURRENT) throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FieldsWriter.FORMAT_CURRENT + " or lower"); formatSize = format > FieldsWriter.FORMAT ? 4 : 0; if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) cloneableFieldsStream.SetModifiedUTF8StringsMode(); fieldsStream = (IndexInput) cloneableFieldsStream.Clone(); long indexSize = cloneableIndexStream.Length() - formatSize; if (docStoreOffset != - 1) { // We read only a slice out of this shared fields file this.docStoreOffset = docStoreOffset; this.size = size; // Verify the file is long enough to hold all of our // docs System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset); } else { this.docStoreOffset = 0; this.size = (int) (indexSize >> 3); } indexStream = (IndexInput) cloneableIndexStream.Clone(); numTotalDocs = (int) (indexSize >> 3); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Dispose(); } } }
public override long Get(int index) { long majorBitPos = (long)index * bitsPerValue; long elementPos = (long)((ulong)majorBitPos >> 3); try { @in.Seek(StartPointer + elementPos); int bitPos = (int)(majorBitPos & 7); // round up bits to a multiple of 8 to find total bytes needed to read int roundedBits = ((bitPos + bitsPerValue + 7) & ~7); // the number of extra bits read at the end to shift out int shiftRightBits = roundedBits - bitPos - bitsPerValue; long rawValue; switch ((int)((uint)roundedBits >> 3)) { case 1: rawValue = @in.ReadByte(); break; case 2: rawValue = @in.ReadShort(); break; case 3: rawValue = ((long)@in.ReadShort() << 8) | (@in.ReadByte() & 0xFFL); break; case 4: rawValue = @in.ReadInt(); break; case 5: rawValue = ((long)@in.ReadInt() << 8) | (@in.ReadByte() & 0xFFL); break; case 6: rawValue = ((long)@in.ReadInt() << 16) | (@in.ReadShort() & 0xFFFFL); break; case 7: rawValue = ((long)@in.ReadInt() << 24) | ((@in.ReadShort() & 0xFFFFL) << 8) | (@in.ReadByte() & 0xFFL); break; case 8: rawValue = @in.ReadLong(); break; case 9: // We must be very careful not to shift out relevant bits. So we account for right shift // we would normally do on return here, and reset it. rawValue = (@in.ReadLong() << (8 - shiftRightBits)) | ((int)((uint)(@in.ReadByte() & 0xFFL) >> shiftRightBits)); shiftRightBits = 0; break; default: throw new InvalidOperationException("bitsPerValue too large: " + bitsPerValue); } return(((long)((ulong)rawValue >> shiftRightBits)) & ValueMask); } catch (System.IO.IOException ioe) { throw new InvalidOperationException("failed", ioe); } }