/// <summary> /// Returns a <seealso cref="Status"/> instance detailing /// the state of the index. /// </summary> /// <param name="onlySegments"> list of specific segment names to check /// /// <p>As this method checks every byte in the specified /// segments, on a large index it can take quite a long /// time to run. /// /// <p><b>WARNING</b>: make sure /// you only call this when the index is not opened by any /// writer. </param> public virtual Status DoCheckIndex(IList<string> onlySegments) { NumberFormatInfo nf = CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); Status result = new Status(); result.Dir = Dir; try { sis.Read(Dir); } catch (Exception t) { Msg(infoStream, "ERROR: could not read any segments file in directory"); result.MissingSegments = true; if (infoStream != null) { // LUCENENET NOTE: Some tests rely on the error type being in // the message. We can't get the error type with StackTrace, we // need ToString() for that. infoStream.WriteLine(t.ToString()); //infoStream.WriteLine(t.StackTrace); } return result; } // find the oldest and newest segment versions string oldest = Convert.ToString(int.MaxValue), newest = Convert.ToString(int.MinValue); string oldSegs = null; bool foundNonNullVersion = false; IComparer<string> versionComparator = StringHelper.VersionComparator; foreach (SegmentCommitInfo si in sis.Segments) { string version = si.Info.Version; if (version == null) { // pre-3.1 segment oldSegs = "pre-3.1"; } else { foundNonNullVersion = true; if (versionComparator.Compare(version, oldest) < 0) { oldest = version; } if (versionComparator.Compare(version, newest) > 0) { newest = version; } } } int numSegments = sis.Size(); string segmentsFileName = sis.SegmentsFileName; // note: we only read the format byte (required preamble) here! IndexInput input = null; try { input = Dir.OpenInput(segmentsFileName, IOContext.READONCE); } catch (Exception t) { Msg(infoStream, "ERROR: could not open segments file in directory"); if (infoStream != null) { // LUCENENET NOTE: Some tests rely on the error type being in // the message. We can't get the error type with StackTrace, we // need ToString() for that. infoStream.WriteLine(t.ToString()); //infoStream.WriteLine(t.StackTrace); } result.CantOpenSegments = true; return result; } int format = 0; try { format = input.ReadInt(); } catch (Exception t) { Msg(infoStream, "ERROR: could not read segment file version in directory"); if (infoStream != null) { // LUCENENET NOTE: Some tests rely on the error type being in // the message. We can't get the error type with StackTrace, we // need ToString() for that. infoStream.WriteLine(t.ToString()); //infoStream.WriteLine(t.StackTrace); } result.MissingSegmentVersion = true; return result; } finally { if (input != null) { input.Dispose(); } } string sFormat = ""; bool skip = false; result.SegmentsFileName = segmentsFileName; result.NumSegments = numSegments; result.UserData = sis.UserData; string userDataString; if (sis.UserData.Count > 0) { userDataString = " userData=" + sis.UserData; } else { userDataString = ""; } string versionString = null; if (oldSegs != null) { if (foundNonNullVersion) { versionString = "versions=[" + oldSegs + " .. " + newest + "]"; } else { versionString = "version=" + oldSegs; } } else { versionString = oldest.Equals(newest) ? ("version=" + oldest) : ("versions=[" + oldest + " .. " + newest + "]"); } Msg(infoStream, "Segments file=" + segmentsFileName + " numSegments=" + numSegments + " " + versionString + " format=" + sFormat + userDataString); if (onlySegments != null) { result.Partial = true; if (infoStream != null) { infoStream.Write("\nChecking only these segments:"); foreach (string s in onlySegments) { infoStream.Write(" " + s); } } result.SegmentsChecked.AddRange(onlySegments); Msg(infoStream, ":"); } if (skip) { Msg(infoStream, "\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); result.ToolOutOfDate = true; return result; } result.NewSegments = (SegmentInfos)sis.Clone(); result.NewSegments.Clear(); result.MaxSegmentName = -1; for (int i = 0; i < numSegments; i++) { SegmentCommitInfo info = sis.Info(i); int segmentName = 0; try { segmentName = int.Parse /*Convert.ToInt32*/(info.Info.Name.Substring(1)); } catch { } if (segmentName > result.MaxSegmentName) { result.MaxSegmentName = segmentName; } if (onlySegments != null && !onlySegments.Contains(info.Info.Name)) { continue; } Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus(); result.SegmentInfos.Add(segInfoStat); Msg(infoStream, " " + (1 + i) + " of " + numSegments + ": name=" + info.Info.Name + " docCount=" + info.Info.DocCount); segInfoStat.Name = info.Info.Name; segInfoStat.DocCount = info.Info.DocCount; string version = info.Info.Version; if (info.Info.DocCount <= 0 && version != null && versionComparator.Compare(version, "4.5") >= 0) { throw new Exception("illegal number of documents: maxDoc=" + info.Info.DocCount); } int toLoseDocCount = info.Info.DocCount; AtomicReader reader = null; try { Codec codec = info.Info.Codec; Msg(infoStream, " codec=" + codec); segInfoStat.Codec = codec; Msg(infoStream, " compound=" + info.Info.UseCompoundFile); segInfoStat.Compound = info.Info.UseCompoundFile; Msg(infoStream, " numFiles=" + info.Files().Count); segInfoStat.NumFiles = info.Files().Count; segInfoStat.SizeMB = info.SizeInBytes() / (1024.0 * 1024.0); if (info.Info.GetAttribute(Lucene3xSegmentInfoFormat.DS_OFFSET_KEY) == null) { // don't print size in bytes if its a 3.0 segment with shared docstores Msg(infoStream, " size (MB)=" + segInfoStat.SizeMB.ToString(nf)); } IDictionary<string, string> diagnostics = info.Info.Diagnostics; segInfoStat.Diagnostics = diagnostics; if (diagnostics.Count > 0) { Msg(infoStream, " diagnostics = " + diagnostics); } if (!info.HasDeletions()) { Msg(infoStream, " no deletions"); segInfoStat.HasDeletions = false; } else { Msg(infoStream, " has deletions [delGen=" + info.DelGen + "]"); segInfoStat.HasDeletions = true; segInfoStat.DeletionsGen = info.DelGen; } if (infoStream != null) { infoStream.Write(" test: open reader........."); } reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, IOContext.DEFAULT); Msg(infoStream, "OK"); segInfoStat.OpenReaderPassed = true; if (infoStream != null) { infoStream.Write(" test: check integrity....."); } reader.CheckIntegrity(); Msg(infoStream, "OK"); if (infoStream != null) { infoStream.Write(" test: check live docs....."); } int numDocs = reader.NumDocs; toLoseDocCount = numDocs; if (reader.HasDeletions) { if (reader.NumDocs != info.Info.DocCount - info.DelCount) { throw new Exception("delete count mismatch: info=" + (info.Info.DocCount - info.DelCount) + " vs reader=" + reader.NumDocs); } if ((info.Info.DocCount - reader.NumDocs) > reader.MaxDoc) { throw new Exception("too many deleted docs: maxDoc()=" + reader.MaxDoc + " vs del count=" + (info.Info.DocCount - reader.NumDocs)); } if (info.Info.DocCount - numDocs != info.DelCount) { throw new Exception("delete count mismatch: info=" + info.DelCount + " vs reader=" + (info.Info.DocCount - numDocs)); } Bits liveDocs = reader.LiveDocs; if (liveDocs == null) { throw new Exception("segment should have deletions, but liveDocs is null"); } else { int numLive = 0; for (int j = 0; j < liveDocs.Length(); j++) { if (liveDocs.Get(j)) { numLive++; } } if (numLive != numDocs) { throw new Exception("liveDocs count mismatch: info=" + numDocs + ", vs bits=" + numLive); } } segInfoStat.NumDeleted = info.Info.DocCount - numDocs; Msg(infoStream, "OK [" + (segInfoStat.NumDeleted) + " deleted docs]"); } else { if (info.DelCount != 0) { throw new Exception("delete count mismatch: info=" + info.DelCount + " vs reader=" + (info.Info.DocCount - numDocs)); } Bits liveDocs = reader.LiveDocs; if (liveDocs != null) { // its ok for it to be non-null here, as long as none are set right? for (int j = 0; j < liveDocs.Length(); j++) { if (!liveDocs.Get(j)) { throw new Exception("liveDocs mismatch: info says no deletions but doc " + j + " is deleted."); } } } Msg(infoStream, "OK"); } if (reader.MaxDoc != info.Info.DocCount) { throw new Exception("SegmentReader.maxDoc() " + reader.MaxDoc + " != SegmentInfos.docCount " + info.Info.DocCount); } // Test getFieldInfos() if (infoStream != null) { infoStream.Write(" test: fields.............."); } FieldInfos fieldInfos = reader.FieldInfos; Msg(infoStream, "OK [" + fieldInfos.Size() + " fields]"); segInfoStat.NumFields = fieldInfos.Size(); // Test Field Norms segInfoStat.FieldNormStatus = TestFieldNorms(reader, infoStream); // Test the Term Index segInfoStat.TermIndexStatus = TestPostings(reader, infoStream, Verbose); // Test Stored Fields segInfoStat.StoredFieldStatus = TestStoredFields(reader, infoStream); // Test Term Vectors segInfoStat.TermVectorStatus = TestTermVectors(reader, infoStream, Verbose, CrossCheckTermVectors_Renamed); segInfoStat.DocValuesStatus = TestDocValues(reader, infoStream); // Rethrow the first exception we encountered // this will cause stats for failed segments to be incremented properly if (segInfoStat.FieldNormStatus.Error != null) { throw new Exception("Field Norm test failed"); } else if (segInfoStat.TermIndexStatus.Error != null) { throw new Exception("Term Index test failed"); } else if (segInfoStat.StoredFieldStatus.Error != null) { throw new Exception("Stored Field test failed"); } else if (segInfoStat.TermVectorStatus.Error != null) { throw new Exception("Term Vector test failed"); } else if (segInfoStat.DocValuesStatus.Error != null) { throw new Exception("DocValues test failed"); } Msg(infoStream, ""); } catch (Exception t) { Msg(infoStream, "FAILED"); string comment; comment = "fixIndex() would remove reference to this segment"; Msg(infoStream, " WARNING: " + comment + "; full exception:"); if (infoStream != null) { // LUCENENET NOTE: Some tests rely on the error type being in // the message. We can't get the error type with StackTrace, we // need ToString() for that. infoStream.WriteLine(t.ToString()); //infoStream.WriteLine(t.StackTrace); } Msg(infoStream, ""); result.TotLoseDocCount += toLoseDocCount; result.NumBadSegments++; continue; } finally { if (reader != null) { reader.Dispose(); } } // Keeper result.NewSegments.Add((SegmentCommitInfo)info.Clone()); } if (0 == result.NumBadSegments) { result.Clean = true; } else { Msg(infoStream, "WARNING: " + result.NumBadSegments + " broken segments (containing " + result.TotLoseDocCount + " documents) detected"); } if (!(result.ValidCounter = (result.MaxSegmentName < sis.Counter))) { result.Clean = false; result.NewSegments.Counter = result.MaxSegmentName + 1; Msg(infoStream, "ERROR: Next segment name counter " + sis.Counter + " is not greater than max segment name " + result.MaxSegmentName); } if (result.Clean) { Msg(infoStream, "No problems were detected with this index.\n"); } return result; }