/// <summary> Should internally checkpoint state that will change /// during commit so that we can rollback if necessary. /// </summary> internal virtual void StartCommit() { if (directoryOwner) { rollbackSegmentInfos = (SegmentInfos)segmentInfos.Clone(); } rollbackHasChanges = hasChanges; }
/// <summary> Should internally checkpoint state that will change /// during commit so that we can rollback if necessary. /// </summary> internal virtual void StartCommit() { if (segmentInfos != null) { rollbackSegmentInfos = (SegmentInfos) segmentInfos.Clone(); } rollbackHasChanges = hasChanges; }
/// <summary>Returns a <see cref="Status" /> instance detailing /// the state of the index. /// /// </summary> /// <param name="onlySegments">list of specific segment names to check /// /// <p/>As this method checks every byte in the specified /// segments, on a large index it can take quite a long /// time to run. /// /// <p/><b>WARNING</b>: make sure /// you only call this when the index is not opened by any /// writer. /// </param> public virtual Status CheckIndex_Renamed_Method(List<string> onlySegments) { System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); Status result = new Status(); result.dir = dir; try { sis.Read(dir); } catch (System.Exception t) { Msg("ERROR: could not read any segments file in directory"); result.missingSegments = true; if (infoStream != null) infoStream.WriteLine(t.StackTrace); return result; } int numSegments = sis.Count; var segmentsFileName = sis.GetCurrentSegmentFileName(); IndexInput input = null; try { input = dir.OpenInput(segmentsFileName); } catch (System.Exception t) { Msg("ERROR: could not open segments file in directory"); if (infoStream != null) infoStream.WriteLine(t.StackTrace); result.cantOpenSegments = true; return result; } int format = 0; try { format = input.ReadInt(); } catch (System.Exception t) { Msg("ERROR: could not read segment file version in directory"); if (infoStream != null) infoStream.WriteLine(t.StackTrace); result.missingSegmentVersion = true; return result; } finally { if (input != null) input.Close(); } System.String sFormat = ""; bool skip = false; if (format == SegmentInfos.FORMAT) sFormat = "FORMAT [Lucene Pre-2.1]"; if (format == SegmentInfos.FORMAT_LOCKLESS) sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; else { if (format == SegmentInfos.FORMAT_CHECKSUM) sFormat = "FORMAT_CHECKSUM [Lucene 2.4]"; else if (format == SegmentInfos.FORMAT_DEL_COUNT) sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]"; else if (format == SegmentInfos.FORMAT_HAS_PROX) sFormat = "FORMAT_HAS_PROX [Lucene 2.4]"; else if (format == SegmentInfos.FORMAT_USER_DATA) sFormat = "FORMAT_USER_DATA [Lucene 2.9]"; else if (format == SegmentInfos.FORMAT_DIAGNOSTICS) sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]"; else if (format < SegmentInfos.CURRENT_FORMAT) { sFormat = "int=" + format + " [newer version of Lucene than this tool]"; skip = true; } else { sFormat = format + " [Lucene 1.3 or prior]"; } } result.segmentsFileName = segmentsFileName; result.numSegments = numSegments; result.segmentFormat = sFormat; result.userData = sis.UserData; System.String userDataString; if (sis.UserData.Count > 0) { userDataString = " userData=" + CollectionsHelper.CollectionToString(sis.UserData); } else { userDataString = ""; } Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString); if (onlySegments != null) { result.partial = true; if (infoStream != null) infoStream.Write("\nChecking only these segments:"); foreach(string s in onlySegments) { if (infoStream != null) { infoStream.Write(" " + s); } } result.segmentsChecked.AddRange(onlySegments); Msg(":"); } if (skip) { Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); result.toolOutOfDate = true; return result; } result.newSegments = (SegmentInfos) sis.Clone(); result.newSegments.Clear(); for (int i = 0; i < numSegments; i++) { SegmentInfo info = sis.Info(i); if (onlySegments != null && !onlySegments.Contains(info.name)) continue; var segInfoStat = new Status.SegmentInfoStatus(); result.segmentInfos.Add(segInfoStat); Msg(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); segInfoStat.name = info.name; segInfoStat.docCount = info.docCount; int toLoseDocCount = info.docCount; SegmentReader reader = null; try { Msg(" compound=" + info.GetUseCompoundFile()); segInfoStat.compound = info.GetUseCompoundFile(); Msg(" hasProx=" + info.HasProx); segInfoStat.hasProx = info.HasProx; Msg(" numFiles=" + info.Files().Count); segInfoStat.numFiles = info.Files().Count; Msg(System.String.Format(nf, " size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) })); segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0); IDictionary<string, string> diagnostics = info.Diagnostics; segInfoStat.diagnostics = diagnostics; if (diagnostics.Count > 0) { Msg(" diagnostics = " + CollectionsHelper.CollectionToString(diagnostics)); } int docStoreOffset = info.DocStoreOffset; if (docStoreOffset != - 1) { Msg(" docStoreOffset=" + docStoreOffset); segInfoStat.docStoreOffset = docStoreOffset; Msg(" docStoreSegment=" + info.DocStoreSegment); segInfoStat.docStoreSegment = info.DocStoreSegment; Msg(" docStoreIsCompoundFile=" + info.DocStoreIsCompoundFile); segInfoStat.docStoreCompoundFile = info.DocStoreIsCompoundFile; } System.String delFileName = info.GetDelFileName(); if (delFileName == null) { Msg(" no deletions"); segInfoStat.hasDeletions = false; } else { Msg(" has deletions [delFileName=" + delFileName + "]"); segInfoStat.hasDeletions = true; segInfoStat.deletionsFileName = delFileName; } if (infoStream != null) infoStream.Write(" test: open reader........."); reader = SegmentReader.Get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); segInfoStat.openReaderPassed = true; int numDocs = reader.NumDocs(); toLoseDocCount = numDocs; if (reader.HasDeletions) { if (reader.deletedDocs.Count() != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (reader.deletedDocs.Count() > reader.MaxDoc) { throw new System.SystemException("too many deleted docs: MaxDoc=" + reader.MaxDoc + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (info.docCount - numDocs != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } segInfoStat.numDeleted = info.docCount - numDocs; Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]"); } else { if (info.GetDelCount() != 0) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } Msg("OK"); } if (reader.MaxDoc != info.docCount) throw new System.SystemException("SegmentReader.MaxDoc " + reader.MaxDoc + " != SegmentInfos.docCount " + info.docCount); // Test getFieldNames() if (infoStream != null) { infoStream.Write(" test: fields.............."); } ICollection<string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL); Msg("OK [" + fieldNames.Count + " fields]"); segInfoStat.numFields = fieldNames.Count; // Test Field Norms segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader); // Test the Term Index segInfoStat.termIndexStatus = TestTermIndex(info, reader); // Test Stored Fields segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf); // Test Term Vectors segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf); // Rethrow the first exception we encountered // This will cause stats for failed segments to be incremented properly if (segInfoStat.fieldNormStatus.error != null) { throw new SystemException("Field Norm test failed"); } else if (segInfoStat.termIndexStatus.error != null) { throw new SystemException("Term Index test failed"); } else if (segInfoStat.storedFieldStatus.error != null) { throw new SystemException("Stored Field test failed"); } else if (segInfoStat.termVectorStatus.error != null) { throw new System.SystemException("Term Vector test failed"); } Msg(""); } catch (System.Exception t) { Msg("FAILED"); const string comment = "fixIndex() would remove reference to this segment"; Msg(" WARNING: " + comment + "; full exception:"); if (infoStream != null) infoStream.WriteLine(t.StackTrace); Msg(""); result.totLoseDocCount += toLoseDocCount; result.numBadSegments++; continue; } finally { if (reader != null) reader.Close(); } // Keeper result.newSegments.Add((SegmentInfo)info.Clone()); } if (0 == result.numBadSegments) { result.clean = true; Msg("No problems were detected with this index.\n"); } else Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected"); return result; }
/// <summary> /// Used by near real-time search </summary> internal static DirectoryReader Open(IndexWriter writer, SegmentInfos infos, bool applyAllDeletes) { // IndexWriter synchronizes externally before calling // us, which ensures infos will not change; so there's // no need to process segments in reverse order int numSegments = infos.Count; IList <SegmentReader> readers = new List <SegmentReader>(); Directory dir = writer.Directory; SegmentInfos segmentInfos = (SegmentInfos)infos.Clone(); int infosUpto = 0; bool success = false; try { for (int i = 0; i < numSegments; i++) { // NOTE: important that we use infos not // segmentInfos here, so that we are passing the // actual instance of SegmentInfoPerCommit in // IndexWriter's segmentInfos: SegmentCommitInfo info = infos.Info(i); Debug.Assert(info.Info.Dir == dir); ReadersAndUpdates rld = writer.readerPool.Get(info, true); try { SegmentReader reader = rld.GetReadOnlyClone(IOContext.READ); if (reader.NumDocs > 0 || writer.KeepFullyDeletedSegments) { // Steal the ref: readers.Add(reader); infosUpto++; } else { reader.DecRef(); segmentInfos.Remove(infosUpto); } } finally { writer.readerPool.Release(rld); } } writer.IncRefDeleter(segmentInfos); StandardDirectoryReader result = new StandardDirectoryReader(dir, readers.ToArray(), writer, segmentInfos, writer.Config.ReaderTermsIndexDivisor, applyAllDeletes); success = true; return(result); } finally { if (!success) { foreach (SegmentReader r in readers) { try { r.DecRef(); } #pragma warning disable 168 catch (Exception th) #pragma warning restore 168 { // ignore any exception that is thrown here to not mask any original // exception. } } } } }
/// <summary>Returns a {@link Status} instance detailing /// the state of the index. /// /// </summary> /// <param name="onlySegments">list of specific segment names to check /// /// <p/>As this method checks every byte in the specified /// segments, on a large index it can take quite a long /// time to run. /// /// <p/><b>WARNING</b>: make sure /// you only call this when the index is not opened by any /// writer. /// </param> public virtual Status CheckIndex_Renamed_Method(System.Collections.IList onlySegments) { System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); Status result = new Status(); result.dir = dir; try { sis.Read(dir); } catch (System.Exception t) { Msg("ERROR: could not read any segments file in directory"); result.missingSegments = true; if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } return(result); } int numSegments = sis.Count; System.String segmentsFileName = sis.GetCurrentSegmentFileName(); IndexInput input = null; try { input = dir.OpenInput(segmentsFileName); } catch (System.Exception t) { Msg("ERROR: could not open segments file in directory"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } result.cantOpenSegments = true; return(result); } int format = 0; try { format = input.ReadInt(); } catch (System.Exception t) { Msg("ERROR: could not read segment file version in directory"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } result.missingSegmentVersion = true; return(result); } finally { if (input != null) { input.Close(); } } System.String sFormat = ""; bool skip = false; if (format == SegmentInfos.FORMAT) { sFormat = "FORMAT [Lucene Pre-2.1]"; } if (format == SegmentInfos.FORMAT_LOCKLESS) { sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; } else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) { sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; } else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) { sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; } else { if (format == SegmentInfos.FORMAT_CHECKSUM) { sFormat = "FORMAT_CHECKSUM [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_DEL_COUNT) { sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_HAS_PROX) { sFormat = "FORMAT_HAS_PROX [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_USER_DATA) { sFormat = "FORMAT_USER_DATA [Lucene 2.9]"; } else if (format == SegmentInfos.FORMAT_DIAGNOSTICS) { sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]"; } else if (format < SegmentInfos.CURRENT_FORMAT) { sFormat = "int=" + format + " [newer version of Lucene than this tool]"; skip = true; } else { sFormat = format + " [Lucene 1.3 or prior]"; } } result.segmentsFileName = segmentsFileName; result.numSegments = numSegments; result.segmentFormat = sFormat; result.userData = sis.GetUserData(); System.String userDataString; if (sis.GetUserData().Count > 0) { userDataString = " userData=" + SupportClass.CollectionsHelper.CollectionToString(sis.GetUserData()); } else { userDataString = ""; } Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString); if (onlySegments != null) { result.partial = true; if (infoStream != null) { infoStream.Write("\nChecking only these segments:"); } System.Collections.IEnumerator it = onlySegments.GetEnumerator(); while (it.MoveNext()) { if (infoStream != null) { infoStream.Write(" " + it.Current); } } System.Collections.IEnumerator e = onlySegments.GetEnumerator(); while (e.MoveNext() == true) { result.segmentsChecked.Add(e.Current); } Msg(":"); } if (skip) { Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); result.toolOutOfDate = true; return(result); } result.newSegments = (SegmentInfos)sis.Clone(); result.newSegments.Clear(); for (int i = 0; i < numSegments; i++) { SegmentInfo info = sis.Info(i); if (onlySegments != null && !onlySegments.Contains(info.name)) { continue; } Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus(); result.segmentInfos.Add(segInfoStat); Msg(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); segInfoStat.name = info.name; segInfoStat.docCount = info.docCount; int toLoseDocCount = info.docCount; SegmentReader reader = null; try { Msg(" compound=" + info.GetUseCompoundFile()); segInfoStat.compound = info.GetUseCompoundFile(); Msg(" hasProx=" + info.GetHasProx()); segInfoStat.hasProx = info.GetHasProx(); Msg(" numFiles=" + info.Files().Count); segInfoStat.numFiles = info.Files().Count; Msg(System.String.Format(nf, " size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) })); segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0); System.Collections.Generic.IDictionary <string, string> diagnostics = info.GetDiagnostics(); segInfoStat.diagnostics = diagnostics; if (diagnostics.Count > 0) { Msg(" diagnostics = " + SupportClass.CollectionsHelper.CollectionToString(diagnostics)); } int docStoreOffset = info.GetDocStoreOffset(); if (docStoreOffset != -1) { Msg(" docStoreOffset=" + docStoreOffset); segInfoStat.docStoreOffset = docStoreOffset; Msg(" docStoreSegment=" + info.GetDocStoreSegment()); segInfoStat.docStoreSegment = info.GetDocStoreSegment(); Msg(" docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile()); segInfoStat.docStoreCompoundFile = info.GetDocStoreIsCompoundFile(); } System.String delFileName = info.GetDelFileName(); if (delFileName == null) { Msg(" no deletions"); segInfoStat.hasDeletions = false; } else { Msg(" has deletions [delFileName=" + delFileName + "]"); segInfoStat.hasDeletions = true; segInfoStat.deletionsFileName = delFileName; } if (infoStream != null) { infoStream.Write(" test: open reader........."); } reader = SegmentReader.Get(info); segInfoStat.openReaderPassed = true; int numDocs = reader.NumDocs(); toLoseDocCount = numDocs; if (reader.HasDeletions()) { if (reader.deletedDocs.Count() != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (reader.deletedDocs.Count() > reader.MaxDoc()) { throw new System.SystemException("too many deleted docs: maxDoc()=" + reader.MaxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (info.docCount - numDocs != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } segInfoStat.numDeleted = info.docCount - numDocs; Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]"); } else { if (info.GetDelCount() != 0) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } Msg("OK"); } if (reader.MaxDoc() != info.docCount) { throw new System.SystemException("SegmentReader.maxDoc() " + reader.MaxDoc() + " != SegmentInfos.docCount " + info.docCount); } // Test getFieldNames() if (infoStream != null) { infoStream.Write(" test: fields.............."); } System.Collections.Generic.ICollection <string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL); Msg("OK [" + fieldNames.Count + " fields]"); segInfoStat.numFields = fieldNames.Count; // Test Field Norms segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader); // Test the Term Index segInfoStat.termIndexStatus = TestTermIndex(info, reader); // Test Stored Fields segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf); // Test Term Vectors segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf); // Rethrow the first exception we encountered // This will cause stats for failed segments to be incremented properly if (segInfoStat.fieldNormStatus.error != null) { throw new System.SystemException("Field Norm test failed"); } else if (segInfoStat.termIndexStatus.error != null) { throw new System.SystemException("Term Index test failed"); } else if (segInfoStat.storedFieldStatus.error != null) { throw new System.SystemException("Stored Field test failed"); } else if (segInfoStat.termVectorStatus.error != null) { throw new System.SystemException("Term Vector test failed"); } Msg(""); } catch (System.Exception t) { Msg("FAILED"); System.String comment; comment = "fixIndex() would remove reference to this segment"; Msg(" WARNING: " + comment + "; full exception:"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } Msg(""); result.totLoseDocCount += toLoseDocCount; result.numBadSegments++; continue; } finally { if (reader != null) { reader.Close(); } } // Keeper result.newSegments.Add(info.Clone()); } if (0 == result.numBadSegments) { result.clean = true; Msg("No problems were detected with this index.\n"); } else { Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected"); } return(result); }
private void SetRollbackSegmentInfos(SegmentInfos infos) { lock (this) { rollbackSegmentInfos = (SegmentInfos) infos.Clone(); System.Diagnostics.Debug.Assert(!rollbackSegmentInfos.HasExternalSegments(directory)); rollbackSegments = new System.Collections.Hashtable(); int size = rollbackSegmentInfos.Count; for (int i = 0; i < size; i++) rollbackSegments[rollbackSegmentInfos.Info(i)] = (System.Int32) i; } }
/// <summary> /// Returns a <seealso cref="Status"/> instance detailing /// the state of the index. /// </summary> /// <param name="onlySegments"> list of specific segment names to check /// /// <p>As this method checks every byte in the specified /// segments, on a large index it can take quite a long /// time to run. /// /// <p><b>WARNING</b>: make sure /// you only call this when the index is not opened by any /// writer. </param> public virtual Status DoCheckIndex(IList<string> onlySegments) { NumberFormatInfo nf = CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); Status result = new Status(); result.Dir = Dir; try { sis.Read(Dir); } catch (Exception t) { Msg(infoStream, "ERROR: could not read any segments file in directory"); result.MissingSegments = true; if (infoStream != null) { // LUCENENET NOTE: Some tests rely on the error type being in // the message. We can't get the error type with StackTrace, we // need ToString() for that. infoStream.WriteLine(t.ToString()); //infoStream.WriteLine(t.StackTrace); } return result; } // find the oldest and newest segment versions string oldest = Convert.ToString(int.MaxValue), newest = Convert.ToString(int.MinValue); string oldSegs = null; bool foundNonNullVersion = false; IComparer<string> versionComparator = StringHelper.VersionComparator; foreach (SegmentCommitInfo si in sis.Segments) { string version = si.Info.Version; if (version == null) { // pre-3.1 segment oldSegs = "pre-3.1"; } else { foundNonNullVersion = true; if (versionComparator.Compare(version, oldest) < 0) { oldest = version; } if (versionComparator.Compare(version, newest) > 0) { newest = version; } } } int numSegments = sis.Size(); string segmentsFileName = sis.SegmentsFileName; // note: we only read the format byte (required preamble) here! IndexInput input = null; try { input = Dir.OpenInput(segmentsFileName, IOContext.READONCE); } catch (Exception t) { Msg(infoStream, "ERROR: could not open segments file in directory"); if (infoStream != null) { // LUCENENET NOTE: Some tests rely on the error type being in // the message. We can't get the error type with StackTrace, we // need ToString() for that. infoStream.WriteLine(t.ToString()); //infoStream.WriteLine(t.StackTrace); } result.CantOpenSegments = true; return result; } int format = 0; try { format = input.ReadInt(); } catch (Exception t) { Msg(infoStream, "ERROR: could not read segment file version in directory"); if (infoStream != null) { // LUCENENET NOTE: Some tests rely on the error type being in // the message. We can't get the error type with StackTrace, we // need ToString() for that. infoStream.WriteLine(t.ToString()); //infoStream.WriteLine(t.StackTrace); } result.MissingSegmentVersion = true; return result; } finally { if (input != null) { input.Dispose(); } } string sFormat = ""; bool skip = false; result.SegmentsFileName = segmentsFileName; result.NumSegments = numSegments; result.UserData = sis.UserData; string userDataString; if (sis.UserData.Count > 0) { userDataString = " userData=" + sis.UserData; } else { userDataString = ""; } string versionString = null; if (oldSegs != null) { if (foundNonNullVersion) { versionString = "versions=[" + oldSegs + " .. " + newest + "]"; } else { versionString = "version=" + oldSegs; } } else { versionString = oldest.Equals(newest) ? ("version=" + oldest) : ("versions=[" + oldest + " .. " + newest + "]"); } Msg(infoStream, "Segments file=" + segmentsFileName + " numSegments=" + numSegments + " " + versionString + " format=" + sFormat + userDataString); if (onlySegments != null) { result.Partial = true; if (infoStream != null) { infoStream.Write("\nChecking only these segments:"); foreach (string s in onlySegments) { infoStream.Write(" " + s); } } result.SegmentsChecked.AddRange(onlySegments); Msg(infoStream, ":"); } if (skip) { Msg(infoStream, "\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); result.ToolOutOfDate = true; return result; } result.NewSegments = (SegmentInfos)sis.Clone(); result.NewSegments.Clear(); result.MaxSegmentName = -1; for (int i = 0; i < numSegments; i++) { SegmentCommitInfo info = sis.Info(i); int segmentName = 0; try { segmentName = int.Parse /*Convert.ToInt32*/(info.Info.Name.Substring(1)); } catch { } if (segmentName > result.MaxSegmentName) { result.MaxSegmentName = segmentName; } if (onlySegments != null && !onlySegments.Contains(info.Info.Name)) { continue; } Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus(); result.SegmentInfos.Add(segInfoStat); Msg(infoStream, " " + (1 + i) + " of " + numSegments + ": name=" + info.Info.Name + " docCount=" + info.Info.DocCount); segInfoStat.Name = info.Info.Name; segInfoStat.DocCount = info.Info.DocCount; string version = info.Info.Version; if (info.Info.DocCount <= 0 && version != null && versionComparator.Compare(version, "4.5") >= 0) { throw new Exception("illegal number of documents: maxDoc=" + info.Info.DocCount); } int toLoseDocCount = info.Info.DocCount; AtomicReader reader = null; try { Codec codec = info.Info.Codec; Msg(infoStream, " codec=" + codec); segInfoStat.Codec = codec; Msg(infoStream, " compound=" + info.Info.UseCompoundFile); segInfoStat.Compound = info.Info.UseCompoundFile; Msg(infoStream, " numFiles=" + info.Files().Count); segInfoStat.NumFiles = info.Files().Count; segInfoStat.SizeMB = info.SizeInBytes() / (1024.0 * 1024.0); if (info.Info.GetAttribute(Lucene3xSegmentInfoFormat.DS_OFFSET_KEY) == null) { // don't print size in bytes if its a 3.0 segment with shared docstores Msg(infoStream, " size (MB)=" + segInfoStat.SizeMB.ToString(nf)); } IDictionary<string, string> diagnostics = info.Info.Diagnostics; segInfoStat.Diagnostics = diagnostics; if (diagnostics.Count > 0) { Msg(infoStream, " diagnostics = " + diagnostics); } if (!info.HasDeletions()) { Msg(infoStream, " no deletions"); segInfoStat.HasDeletions = false; } else { Msg(infoStream, " has deletions [delGen=" + info.DelGen + "]"); segInfoStat.HasDeletions = true; segInfoStat.DeletionsGen = info.DelGen; } if (infoStream != null) { infoStream.Write(" test: open reader........."); } reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, IOContext.DEFAULT); Msg(infoStream, "OK"); segInfoStat.OpenReaderPassed = true; if (infoStream != null) { infoStream.Write(" test: check integrity....."); } reader.CheckIntegrity(); Msg(infoStream, "OK"); if (infoStream != null) { infoStream.Write(" test: check live docs....."); } int numDocs = reader.NumDocs; toLoseDocCount = numDocs; if (reader.HasDeletions) { if (reader.NumDocs != info.Info.DocCount - info.DelCount) { throw new Exception("delete count mismatch: info=" + (info.Info.DocCount - info.DelCount) + " vs reader=" + reader.NumDocs); } if ((info.Info.DocCount - reader.NumDocs) > reader.MaxDoc) { throw new Exception("too many deleted docs: maxDoc()=" + reader.MaxDoc + " vs del count=" + (info.Info.DocCount - reader.NumDocs)); } if (info.Info.DocCount - numDocs != info.DelCount) { throw new Exception("delete count mismatch: info=" + info.DelCount + " vs reader=" + (info.Info.DocCount - numDocs)); } Bits liveDocs = reader.LiveDocs; if (liveDocs == null) { throw new Exception("segment should have deletions, but liveDocs is null"); } else { int numLive = 0; for (int j = 0; j < liveDocs.Length(); j++) { if (liveDocs.Get(j)) { numLive++; } } if (numLive != numDocs) { throw new Exception("liveDocs count mismatch: info=" + numDocs + ", vs bits=" + numLive); } } segInfoStat.NumDeleted = info.Info.DocCount - numDocs; Msg(infoStream, "OK [" + (segInfoStat.NumDeleted) + " deleted docs]"); } else { if (info.DelCount != 0) { throw new Exception("delete count mismatch: info=" + info.DelCount + " vs reader=" + (info.Info.DocCount - numDocs)); } Bits liveDocs = reader.LiveDocs; if (liveDocs != null) { // its ok for it to be non-null here, as long as none are set right? for (int j = 0; j < liveDocs.Length(); j++) { if (!liveDocs.Get(j)) { throw new Exception("liveDocs mismatch: info says no deletions but doc " + j + " is deleted."); } } } Msg(infoStream, "OK"); } if (reader.MaxDoc != info.Info.DocCount) { throw new Exception("SegmentReader.maxDoc() " + reader.MaxDoc + " != SegmentInfos.docCount " + info.Info.DocCount); } // Test getFieldInfos() if (infoStream != null) { infoStream.Write(" test: fields.............."); } FieldInfos fieldInfos = reader.FieldInfos; Msg(infoStream, "OK [" + fieldInfos.Size() + " fields]"); segInfoStat.NumFields = fieldInfos.Size(); // Test Field Norms segInfoStat.FieldNormStatus = TestFieldNorms(reader, infoStream); // Test the Term Index segInfoStat.TermIndexStatus = TestPostings(reader, infoStream, Verbose); // Test Stored Fields segInfoStat.StoredFieldStatus = TestStoredFields(reader, infoStream); // Test Term Vectors segInfoStat.TermVectorStatus = TestTermVectors(reader, infoStream, Verbose, CrossCheckTermVectors_Renamed); segInfoStat.DocValuesStatus = TestDocValues(reader, infoStream); // Rethrow the first exception we encountered // this will cause stats for failed segments to be incremented properly if (segInfoStat.FieldNormStatus.Error != null) { throw new Exception("Field Norm test failed"); } else if (segInfoStat.TermIndexStatus.Error != null) { throw new Exception("Term Index test failed"); } else if (segInfoStat.StoredFieldStatus.Error != null) { throw new Exception("Stored Field test failed"); } else if (segInfoStat.TermVectorStatus.Error != null) { throw new Exception("Term Vector test failed"); } else if (segInfoStat.DocValuesStatus.Error != null) { throw new Exception("DocValues test failed"); } Msg(infoStream, ""); } catch (Exception t) { Msg(infoStream, "FAILED"); string comment; comment = "fixIndex() would remove reference to this segment"; Msg(infoStream, " WARNING: " + comment + "; full exception:"); if (infoStream != null) { // LUCENENET NOTE: Some tests rely on the error type being in // the message. We can't get the error type with StackTrace, we // need ToString() for that. infoStream.WriteLine(t.ToString()); //infoStream.WriteLine(t.StackTrace); } Msg(infoStream, ""); result.TotLoseDocCount += toLoseDocCount; result.NumBadSegments++; continue; } finally { if (reader != null) { reader.Dispose(); } } // Keeper result.NewSegments.Add((SegmentCommitInfo)info.Clone()); } if (0 == result.NumBadSegments) { result.Clean = true; } else { Msg(infoStream, "WARNING: " + result.NumBadSegments + " broken segments (containing " + result.TotLoseDocCount + " documents) detected"); } if (!(result.ValidCounter = (result.MaxSegmentName < sis.Counter))) { result.Clean = false; result.NewSegments.Counter = result.MaxSegmentName + 1; Msg(infoStream, "ERROR: Next segment name counter " + sis.Counter + " is not greater than max segment name " + result.MaxSegmentName); } if (result.Clean) { Msg(infoStream, "No problems were detected with this index.\n"); } return result; }
// Used by near real-time search internal DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor) { this.internalDirectory = writer.Directory; this.readOnly = true; segmentInfos = infos; segmentInfosStart = (SegmentInfos) infos.Clone(); this.termInfosIndexDivisor = termInfosIndexDivisor; if (!readOnly) { // We assume that this segments_N was previously // properly sync'd: synced.UnionWith(infos.Files(internalDirectory, true)); } // IndexWriter synchronizes externally before calling // us, which ensures infos will not change; so there's // no need to process segments in reverse order int numSegments = infos.Count; var readers = new SegmentReader[numSegments]; Directory dir = writer.Directory; int upto = 0; for (int i = 0; i < numSegments; i++) { bool success = false; try { SegmentInfo info = infos.Info(i); if (info.dir == dir) { readers[upto++] = writer.readerPool.GetReadOnlyClone(info, true, termInfosIndexDivisor); } success = true; } finally { if (!success) { // Close all readers we had opened: for (upto--; upto >= 0; upto--) { try { readers[upto].Close(); } catch (System.Exception) { // keep going - we want to clean up as much as possible } } } } } this.writer = writer; if (upto < readers.Length) { // This means some segments were in a foreign Directory var newReaders = new SegmentReader[upto]; Array.Copy(readers, 0, newReaders, 0, upto); readers = newReaders; } Initialize(readers); }
/// <summary>Returns true if index is clean, else false.</summary> public static bool Check(Directory dir, bool doFix) { System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); try { sis.Read(dir); } catch (System.Exception t) { out_Renamed.WriteLine("ERROR: could not read any segments file in directory"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return(false); } int numSegments = sis.Count; System.String segmentsFileName = sis.GetCurrentSegmentFileName(); IndexInput input = null; try { input = dir.OpenInput(segmentsFileName); } catch (System.Exception t) { out_Renamed.WriteLine("ERROR: could not open segments file in directory"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return(false); } int format = 0; try { format = input.ReadInt(); } catch (System.Exception t) { out_Renamed.WriteLine("ERROR: could not read segment file version in directory"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return(false); } finally { if (input != null) { input.Close(); } } System.String sFormat = ""; bool skip = false; if (format == SegmentInfos.FORMAT) { sFormat = "FORMAT [Lucene Pre-2.1]"; } if (format == SegmentInfos.FORMAT_LOCKLESS) { sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; } else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) { sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; } else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) { sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; } else if (format < SegmentInfos.FORMAT_SHARED_DOC_STORE) { sFormat = "int=" + format + " [newer version of Lucene than this tool]"; skip = true; } else { sFormat = format + " [Lucene 1.3 or prior]"; } out_Renamed.WriteLine("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat); if (skip) { out_Renamed.WriteLine("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); return(false); } SegmentInfos newSIS = (SegmentInfos)sis.Clone(); newSIS.Clear(); bool changed = false; int totLoseDocCount = 0; int numBadSegments = 0; for (int i = 0; i < numSegments; i++) { SegmentInfo info = sis.Info(i); out_Renamed.WriteLine(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); int toLoseDocCount = info.docCount; SegmentReader reader = null; try { out_Renamed.WriteLine(" compound=" + info.GetUseCompoundFile()); out_Renamed.WriteLine(" numFiles=" + info.Files().Count); out_Renamed.WriteLine(String.Format(nf, " size (MB)={0:f}", new Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) })); int docStoreOffset = info.GetDocStoreOffset(); if (docStoreOffset != -1) { out_Renamed.WriteLine(" docStoreOffset=" + docStoreOffset); out_Renamed.WriteLine(" docStoreSegment=" + info.GetDocStoreSegment()); out_Renamed.WriteLine(" docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile()); } System.String delFileName = info.GetDelFileName(); if (delFileName == null) { out_Renamed.WriteLine(" no deletions"); } else { out_Renamed.WriteLine(" has deletions [delFileName=" + delFileName + "]"); } out_Renamed.Write(" test: open reader........."); reader = SegmentReader.Get(info); int numDocs = reader.NumDocs(); toLoseDocCount = numDocs; if (reader.HasDeletions()) { out_Renamed.WriteLine("OK [" + (info.docCount - numDocs) + " deleted docs]"); } else { out_Renamed.WriteLine("OK"); } out_Renamed.Write(" test: fields, norms......."); System.Collections.IDictionary fieldNames = (System.Collections.IDictionary)reader.GetFieldNames(IndexReader.FieldOption.ALL); System.Collections.IEnumerator it = fieldNames.Keys.GetEnumerator(); while (it.MoveNext()) { System.String fieldName = (System.String)it.Current; byte[] b = reader.Norms(fieldName); if (b.Length != info.docCount) { throw new System.SystemException("norms for field \"" + fieldName + "\" is length " + b.Length + " != maxDoc " + info.docCount); } } out_Renamed.WriteLine("OK [" + fieldNames.Count + " fields]"); out_Renamed.Write(" test: terms, freq, prox..."); TermEnum termEnum = reader.Terms(); TermPositions termPositions = reader.TermPositions(); // Used only to count up # deleted docs for this // term MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader); long termCount = 0; long totFreq = 0; long totPos = 0; while (termEnum.Next()) { termCount++; Term term = termEnum.Term(); int docFreq = termEnum.DocFreq(); termPositions.Seek(term); int lastDoc = -1; int freq0 = 0; totFreq += docFreq; while (termPositions.Next()) { freq0++; int doc = termPositions.Doc(); int freq = termPositions.Freq(); if (doc <= lastDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " < lastDoc " + lastDoc); } lastDoc = doc; if (freq <= 0) { throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); } int lastPos = -1; totPos += freq; for (int j = 0; j < freq; j++) { int pos = termPositions.NextPosition(); if (pos < 0) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); } if (pos <= lastPos) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); } } } // Now count how many deleted docs occurred in // this term: int delCount; if (reader.HasDeletions()) { myTermDocs.Seek(term); while (myTermDocs.Next()) { } delCount = myTermDocs.delCount; } else { delCount = 0; } if (freq0 + delCount != docFreq) { throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount); } } out_Renamed.WriteLine("OK [" + termCount + " terms; " + totFreq + " terms/docs pairs; " + totPos + " tokens]"); out_Renamed.Write(" test: stored fields......."); int docCount = 0; long totFields = 0; for (int j = 0; j < info.docCount; j++) { if (!reader.IsDeleted(j)) { docCount++; Document doc = reader.Document(j); totFields += doc.GetFields().Count; } } if (docCount != reader.NumDocs()) { throw new System.SystemException("docCount=" + docCount + " but saw " + docCount + " undeleted docs"); } out_Renamed.WriteLine(String.Format(nf, "OK [{0:d} total field count; avg {1:f} fields per doc]", new Object[] { totFields, (((float)totFields) / docCount) })); out_Renamed.Write(" test: term vectors........"); int totVectors = 0; for (int j = 0; j < info.docCount; j++) { if (!reader.IsDeleted(j)) { TermFreqVector[] tfv = reader.GetTermFreqVectors(j); if (tfv != null) { totVectors += tfv.Length; } } } out_Renamed.WriteLine(String.Format(nf, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new Object[] { totVectors, (((float)totVectors) / docCount) })); out_Renamed.WriteLine(""); } catch (System.Exception t) { out_Renamed.WriteLine("FAILED"); System.String comment; if (doFix) { comment = "will remove reference to this segment (-fix is specified)"; } else { comment = "would remove reference to this segment (-fix was not specified)"; } out_Renamed.WriteLine(" WARNING: " + comment + "; full exception:"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); out_Renamed.WriteLine(""); totLoseDocCount += toLoseDocCount; numBadSegments++; changed = true; continue; } finally { if (reader != null) { reader.Close(); } } // Keeper newSIS.Add(info.Clone()); } if (!changed) { out_Renamed.WriteLine("No problems were detected with this index.\n"); return(true); } else { out_Renamed.WriteLine("WARNING: " + numBadSegments + " broken segments detected"); if (doFix) { out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents will be lost"); } else { out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents would be lost if -fix were specified"); } out_Renamed.WriteLine(); } if (doFix) { out_Renamed.WriteLine("NOTE: will write new segments file in 5 seconds; this will remove " + totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!"); for (int i = 0; i < 5; i++) { try { System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1000)); } catch (System.Threading.ThreadInterruptedException) { SupportClass.ThreadClass.Current().Interrupt(); i--; continue; } out_Renamed.WriteLine(" " + (5 - i) + "..."); } out_Renamed.Write("Writing..."); try { newSIS.Write(dir); } catch (System.Exception t) { out_Renamed.WriteLine("FAILED; exiting"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return(false); } out_Renamed.WriteLine("OK"); out_Renamed.WriteLine("Wrote new segments file \"" + newSIS.GetCurrentSegmentFileName() + "\""); } else { out_Renamed.WriteLine("NOTE: would write new segments file [-fix was not specified]"); } out_Renamed.WriteLine(""); return(false); }
private void SetRollbackSegmentInfos(SegmentInfos infos) { lock (this) { rollbackSegmentInfos = (SegmentInfos)infos.Clone(); System.Diagnostics.Debug.Assert(!HasExternalSegments(rollbackSegmentInfos)); rollbackSegments = new Dictionary<SegmentInfo, int>(); int size = rollbackSegmentInfos.Count; for (int i = 0; i < size; i++) rollbackSegments[rollbackSegmentInfos.Info(i)] = i; } }
/// <summary>Returns true if index is clean, else false.</summary> public static bool Check(Directory dir, bool doFix) { System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); try { sis.Read(dir); } catch (System.Exception t) { out_Renamed.WriteLine("ERROR: could not read any segments file in directory"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return false; } int numSegments = sis.Count; System.String segmentsFileName = sis.GetCurrentSegmentFileName(); IndexInput input = null; try { input = dir.OpenInput(segmentsFileName); } catch (System.Exception t) { out_Renamed.WriteLine("ERROR: could not open segments file in directory"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return false; } int format = 0; try { format = input.ReadInt(); } catch (System.Exception t) { out_Renamed.WriteLine("ERROR: could not read segment file version in directory"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return false; } finally { if (input != null) input.Close(); } System.String sFormat = ""; bool skip = false; if (format == SegmentInfos.FORMAT) sFormat = "FORMAT [Lucene Pre-2.1]"; if (format == SegmentInfos.FORMAT_LOCKLESS) sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; else if (format < SegmentInfos.FORMAT_SHARED_DOC_STORE) { sFormat = "int=" + format + " [newer version of Lucene than this tool]"; skip = true; } else { sFormat = format + " [Lucene 1.3 or prior]"; } out_Renamed.WriteLine("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat); if (skip) { out_Renamed.WriteLine("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); return false; } SegmentInfos newSIS = (SegmentInfos) sis.Clone(); newSIS.Clear(); bool changed = false; int totLoseDocCount = 0; int numBadSegments = 0; for (int i = 0; i < numSegments; i++) { SegmentInfo info = sis.Info(i); out_Renamed.WriteLine(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); int toLoseDocCount = info.docCount; SegmentReader reader = null; try { out_Renamed.WriteLine(" compound=" + info.GetUseCompoundFile()); out_Renamed.WriteLine(" numFiles=" + info.Files().Count); out_Renamed.WriteLine(String.Format(nf, " size (MB)={0:f}", new Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) })); int docStoreOffset = info.GetDocStoreOffset(); if (docStoreOffset != - 1) { out_Renamed.WriteLine(" docStoreOffset=" + docStoreOffset); out_Renamed.WriteLine(" docStoreSegment=" + info.GetDocStoreSegment()); out_Renamed.WriteLine(" docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile()); } System.String delFileName = info.GetDelFileName(); if (delFileName == null) out_Renamed.WriteLine(" no deletions"); else out_Renamed.WriteLine(" has deletions [delFileName=" + delFileName + "]"); out_Renamed.Write(" test: open reader........."); reader = SegmentReader.Get(info); int numDocs = reader.NumDocs(); toLoseDocCount = numDocs; if (reader.HasDeletions()) out_Renamed.WriteLine("OK [" + (info.docCount - numDocs) + " deleted docs]"); else out_Renamed.WriteLine("OK"); out_Renamed.Write(" test: fields, norms......."); System.Collections.ICollection fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL); System.Collections.IEnumerator it = fieldNames.GetEnumerator(); while (it.MoveNext()) { System.String fieldName = (System.String) it.Current; byte[] b = reader.Norms(fieldName); if (b.Length != info.docCount) throw new System.SystemException("norms for field \"" + fieldName + "\" is length " + b.Length + " != maxDoc " + info.docCount); } out_Renamed.WriteLine("OK [" + fieldNames.Count + " fields]"); out_Renamed.Write(" test: terms, freq, prox..."); TermEnum termEnum = reader.Terms(); TermPositions termPositions = reader.TermPositions(); // Used only to count up # deleted docs for this // term MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader); long termCount = 0; long totFreq = 0; long totPos = 0; while (termEnum.Next()) { termCount++; Term term = termEnum.Term(); int docFreq = termEnum.DocFreq(); termPositions.Seek(term); int lastDoc = - 1; int freq0 = 0; totFreq += docFreq; while (termPositions.Next()) { freq0++; int doc = termPositions.Doc(); int freq = termPositions.Freq(); if (doc <= lastDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " < lastDoc " + lastDoc); } lastDoc = doc; if (freq <= 0) { throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); } int lastPos = - 1; totPos += freq; for (int j = 0; j < freq; j++) { int pos = termPositions.NextPosition(); if (pos < -1) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); } if (pos < lastPos) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); } } } // Now count how many deleted docs occurred in // this term: int delCount; if (reader.HasDeletions()) { myTermDocs.Seek(term); while (myTermDocs.Next()) { } delCount = myTermDocs.delCount; } else delCount = 0; if (freq0 + delCount != docFreq) { throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount); } } out_Renamed.WriteLine("OK [" + termCount + " terms; " + totFreq + " terms/docs pairs; " + totPos + " tokens]"); out_Renamed.Write(" test: stored fields......."); int docCount = 0; long totFields = 0; for (int j = 0; j < info.docCount; j++) if (!reader.IsDeleted(j)) { docCount++; Document doc = reader.Document(j); totFields += doc.GetFields().Count; } if (docCount != reader.NumDocs()) throw new System.SystemException("docCount=" + docCount + " but saw " + docCount + " undeleted docs"); out_Renamed.WriteLine(String.Format(nf, "OK [{0:d} total field count; avg {1:f} fields per doc]", new Object[] { totFields, (((float)totFields) / docCount) })); out_Renamed.Write(" test: term vectors........"); int totVectors = 0; for (int j = 0; j < info.docCount; j++) if (!reader.IsDeleted(j)) { TermFreqVector[] tfv = reader.GetTermFreqVectors(j); if (tfv != null) totVectors += tfv.Length; } out_Renamed.WriteLine(String.Format(nf, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new Object[] { totVectors, (((float)totVectors) / docCount) })); out_Renamed.WriteLine(""); } catch (System.Exception t) { out_Renamed.WriteLine("FAILED"); System.String comment; if (doFix) comment = "will remove reference to this segment (-fix is specified)"; else comment = "would remove reference to this segment (-fix was not specified)"; out_Renamed.WriteLine(" WARNING: " + comment + "; full exception:"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); out_Renamed.WriteLine(""); totLoseDocCount += toLoseDocCount; numBadSegments++; changed = true; continue; } finally { if (reader != null) reader.Close(); } // Keeper newSIS.Add(info.Clone()); } if (!changed) { out_Renamed.WriteLine("No problems were detected with this index.\n"); return true; } else { out_Renamed.WriteLine("WARNING: " + numBadSegments + " broken segments detected"); if (doFix) out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents will be lost"); else out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents would be lost if -fix were specified"); out_Renamed.WriteLine(); } if (doFix) { out_Renamed.WriteLine("NOTE: will write new segments file in 5 seconds; this will remove " + totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!"); for (int i = 0; i < 5; i++) { try { System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1000)); } catch (System.Threading.ThreadInterruptedException) { SupportClass.ThreadClass.Current().Interrupt(); i--; continue; } out_Renamed.WriteLine(" " + (5 - i) + "..."); } out_Renamed.Write("Writing..."); try { newSIS.Write(dir); } catch (System.Exception t) { out_Renamed.WriteLine("FAILED; exiting"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return false; } out_Renamed.WriteLine("OK"); out_Renamed.WriteLine("Wrote new segments file \"" + newSIS.GetCurrentSegmentFileName() + "\""); } else { out_Renamed.WriteLine("NOTE: would write new segments file [-fix was not specified]"); } out_Renamed.WriteLine(""); return false; }
/// <summary> /// Used by near real-time search </summary> internal static DirectoryReader Open(IndexWriter writer, SegmentInfos infos, bool applyAllDeletes) { // IndexWriter synchronizes externally before calling // us, which ensures infos will not change; so there's // no need to process segments in reverse order int numSegments = infos.Size(); IList<SegmentReader> readers = new List<SegmentReader>(); Directory dir = writer.Directory; SegmentInfos segmentInfos = (SegmentInfos)infos.Clone(); int infosUpto = 0; bool success = false; try { for (int i = 0; i < numSegments; i++) { // NOTE: important that we use infos not // segmentInfos here, so that we are passing the // actual instance of SegmentInfoPerCommit in // IndexWriter's segmentInfos: SegmentCommitInfo info = infos.Info(i); Debug.Assert(info.Info.Dir == dir); ReadersAndUpdates rld = writer.readerPool.Get(info, true); try { SegmentReader reader = rld.GetReadOnlyClone(IOContext.READ); if (reader.NumDocs > 0 || writer.KeepFullyDeletedSegments) { // Steal the ref: readers.Add(reader); infosUpto++; } else { reader.DecRef(); segmentInfos.Remove(infosUpto); } } finally { writer.readerPool.Release(rld); } } writer.IncRefDeleter(segmentInfos); StandardDirectoryReader result = new StandardDirectoryReader(dir, readers.ToArray(), writer, segmentInfos, writer.Config.ReaderTermsIndexDivisor, applyAllDeletes); success = true; return result; } finally { if (!success) { foreach (SegmentReader r in readers) { try { r.DecRef(); } catch (Exception th) { // ignore any exception that is thrown here to not mask any original // exception. } } } } }