public CommitPoint(IndexFileDeleter enclosingInstance, System.Collections.ICollection commitsToDelete, Directory directory, SegmentInfos segmentInfos) { InitBlock(enclosingInstance); this.directory = directory; this.commitsToDelete = commitsToDelete; userData = segmentInfos.GetUserData(); segmentsFileName = segmentInfos.GetCurrentSegmentFileName(); version = segmentInfos.GetVersion(); generation = segmentInfos.GetGeneration(); files = segmentInfos.Files(directory, true); gen = segmentInfos.GetGeneration(); isOptimized = segmentInfos.Count == 1 && !segmentInfos.Info(0).HasDeletions(); System.Diagnostics.Debug.Assert(!segmentInfos.HasExternalSegments(directory)); }
public CommitPoint(IndexFileDeleter enclosingInstance, System.Collections.ICollection commitsToDelete, Directory directory, SegmentInfos segmentInfos) { InitBlock(enclosingInstance); this.directory = directory; this.commitsToDelete = commitsToDelete; userData = segmentInfos.GetUserData(); segmentsFileName = segmentInfos.GetCurrentSegmentFileName(); version = segmentInfos.GetVersion(); generation = segmentInfos.GetGeneration(); files = segmentInfos.Files(directory, true); gen = segmentInfos.GetGeneration(); isOptimized = segmentInfos.Count == 1 && !segmentInfos.Info(0).HasDeletions(); System.Diagnostics.Debug.Assert(!segmentInfos.HasExternalSegments(directory)); }
/// <summary> For definition of "check point" see IndexWriter comments: /// "Clarification: Check Points (and commits)". /// /// Writer calls this when it has made a "consistent /// change" to the index, meaning new files are written to /// the index and the in-memory SegmentInfos have been /// modified to point to those files. /// /// This may or may not be a commit (segments_N may or may /// not have been written). /// /// We simply incref the files referenced by the new /// SegmentInfos and decref the files we had previously /// seen (if any). /// /// If this is a commit, we also call the policy to give it /// a chance to remove other commits. If any commits are /// removed, we decref their files as well. /// </summary> public void Checkpoint(SegmentInfos segmentInfos, bool isCommit) { if (infoStream != null) { Message("now checkpoint \"" + segmentInfos.GetCurrentSegmentFileName() + "\" [" + segmentInfos.Count + " segments " + "; isCommit = " + isCommit + "]"); } // Try again now to delete any previously un-deletable // files (because they were in use, on Windows): DeletePendingFiles(); // Incref the files: IncRef(segmentInfos, isCommit); if (isCommit) { // Append to our commits list: commits.Add(new CommitPoint(this, commitsToDelete, directory, segmentInfos)); // Tell policy so it can remove commits: policy.OnCommit(commits); // Decref files for commits that were deleted by the policy: DeleteCommits(); } else { System.Collections.Generic.IList<string> docWriterFiles; if (docWriter != null) { docWriterFiles = docWriter.OpenFiles(); if (docWriterFiles != null) // We must incRef these files before decRef'ing // last files to make sure we don't accidentally // delete them: IncRef(docWriterFiles); } else docWriterFiles = null; // DecRef old files from the last checkpoint, if any: int size = lastFiles.Count; if (size > 0) { for (int i = 0; i < size; i++) DecRef(lastFiles[i]); lastFiles.Clear(); } // Save files so we can decr on next checkpoint/commit: foreach (string fname in segmentInfos.Files(directory, false)) { lastFiles.Add(fname); } if (docWriterFiles != null) { foreach (string fname in docWriterFiles) { lastFiles.Add(fname); } } } }
/// <summary> Initialize the deleter: find all previous commits in /// the Directory, incref the files they reference, call /// the policy to let it delete commits. This will remove /// any files not referenced by any of the commits. /// </summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, System.IO.StreamWriter infoStream, DocumentsWriter docWriter, System.Collections.Generic.Dictionary<string, string> synced) { this.docWriter = docWriter; this.infoStream = infoStream; this.synced = synced; if (infoStream != null) { Message("init: current segments file is \"" + segmentInfos.GetCurrentSegmentFileName() + "\"; deletionPolicy=" + policy); } this.policy = policy; this.directory = directory; // First pass: walk the files and initialize our ref // counts: long currentGen = segmentInfos.GetGeneration(); IndexFileNameFilter filter = IndexFileNameFilter.GetFilter(); System.String[] files = directory.ListAll(); CommitPoint currentCommitPoint = null; for (int i = 0; i < files.Length; i++) { System.String fileName = files[i]; if (filter.Accept(null, fileName) && !fileName.Equals(IndexFileNames.SEGMENTS_GEN)) { // Add this file to refCounts with initial count 0: GetRefCount(fileName); if (fileName.StartsWith(IndexFileNames.SEGMENTS)) { // This is a commit (segments or segments_N), and // it's valid (<= the max gen). Load it, then // incref all files it refers to: if (infoStream != null) { Message("init: load commit \"" + fileName + "\""); } SegmentInfos sis = new SegmentInfos(); try { sis.Read(directory, fileName); } catch (System.IO.FileNotFoundException e) { // LUCENE-948: on NFS (and maybe others), if // you have writers switching back and forth // between machines, it's very likely that the // dir listing will be stale and will claim a // file segments_X exists when in fact it // doesn't. So, we catch this and handle it // as if the file does not exist if (infoStream != null) { Message("init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point"); } sis = null; } catch (System.IO.IOException e) { if (SegmentInfos.GenerationFromSegmentsFileName(fileName) <= currentGen) { throw e; } else { // Most likely we are opening an index that // has an aborted "future" commit, so suppress // exc in this case sis = null; } } if (sis != null) { CommitPoint commitPoint = new CommitPoint(this,commitsToDelete, directory, sis); if (sis.GetGeneration() == segmentInfos.GetGeneration()) { currentCommitPoint = commitPoint; } commits.Add(commitPoint); IncRef(sis, true); if (lastSegmentInfos == null || sis.GetGeneration() > lastSegmentInfos.GetGeneration()) { lastSegmentInfos = sis; } } } } } if (currentCommitPoint == null) { // We did not in fact see the segments_N file // corresponding to the segmentInfos that was passed // in. Yet, it must exist, because our caller holds // the write lock. This can happen when the directory // listing was stale (eg when index accessed via NFS // client with stale directory listing cache). So we // try now to explicitly open this commit point: SegmentInfos sis = new SegmentInfos(); try { sis.Read(directory, segmentInfos.GetCurrentSegmentFileName()); } catch (System.IO.IOException e) { throw new CorruptIndexException("failed to locate current segments_N file"); } if (infoStream != null) Message("forced open of current segments file " + segmentInfos.GetCurrentSegmentFileName()); currentCommitPoint = new CommitPoint(this, commitsToDelete, directory, sis); commits.Add(currentCommitPoint); IncRef(sis, true); } // We keep commits list in sorted order (oldest to newest): commits.Sort(); // Now delete anything with ref count at 0. These are // presumably abandoned files eg due to crash of // IndexWriter. System.Collections.Generic.IEnumerator<System.Collections.Generic.KeyValuePair<System.String, RefCount>> it = refCounts.GetEnumerator(); while (it.MoveNext()) { System.String fileName = (System.String) it.Current.Key; RefCount rc = (RefCount) refCounts[fileName]; if (0 == rc.count) { if (infoStream != null) { Message("init: removing unreferenced file \"" + fileName + "\""); } DeleteFile(fileName); } } // Finally, give policy a chance to remove things on // startup: policy.OnInit(commits); // Always protect the incoming segmentInfos since // sometime it may not be the most recent commit Checkpoint(segmentInfos, false); startingCommitDeleted = currentCommitPoint.IsDeleted(); DeleteCommits(); }
/// <summary>Returns a {@link Status} instance detailing /// the state of the index. /// /// </summary> /// <param name="onlySegments">list of specific segment names to check /// /// <p/>As this method checks every byte in the specified /// segments, on a large index it can take quite a long /// time to run. /// /// <p/><b>WARNING</b>: make sure /// you only call this when the index is not opened by any /// writer. /// </param> public virtual Status CheckIndex_Renamed_Method(System.Collections.IList onlySegments) { System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); Status result = new Status(); result.dir = dir; try { sis.Read(dir); } catch (System.Exception t) { Msg("ERROR: could not read any segments file in directory"); result.missingSegments = true; if (infoStream != null) infoStream.WriteLine(t.StackTrace); return result; } int numSegments = sis.Count; System.String segmentsFileName = sis.GetCurrentSegmentFileName(); IndexInput input = null; try { input = dir.OpenInput(segmentsFileName); } catch (System.Exception t) { Msg("ERROR: could not open segments file in directory"); if (infoStream != null) infoStream.WriteLine(t.StackTrace); result.cantOpenSegments = true; return result; } int format = 0; try { format = input.ReadInt(); } catch (System.Exception t) { Msg("ERROR: could not read segment file version in directory"); if (infoStream != null) infoStream.WriteLine(t.StackTrace); result.missingSegmentVersion = true; return result; } finally { if (input != null) input.Close(); } System.String sFormat = ""; bool skip = false; if (format == SegmentInfos.FORMAT) sFormat = "FORMAT [Lucene Pre-2.1]"; if (format == SegmentInfos.FORMAT_LOCKLESS) sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; else { if (format == SegmentInfos.FORMAT_CHECKSUM) sFormat = "FORMAT_CHECKSUM [Lucene 2.4]"; else if (format == SegmentInfos.FORMAT_DEL_COUNT) sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]"; else if (format == SegmentInfos.FORMAT_HAS_PROX) sFormat = "FORMAT_HAS_PROX [Lucene 2.4]"; else if (format == SegmentInfos.FORMAT_USER_DATA) sFormat = "FORMAT_USER_DATA [Lucene 2.9]"; else if (format == SegmentInfos.FORMAT_DIAGNOSTICS) sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]"; else if (format < SegmentInfos.CURRENT_FORMAT) { sFormat = "int=" + format + " [newer version of Lucene than this tool]"; skip = true; } else { sFormat = format + " [Lucene 1.3 or prior]"; } } result.segmentsFileName = segmentsFileName; result.numSegments = numSegments; result.segmentFormat = sFormat; result.userData = sis.GetUserData(); System.String userDataString; if (sis.GetUserData().Count > 0) { userDataString = " userData=" + SupportClass.CollectionsHelper.CollectionToString(sis.GetUserData()); } else { userDataString = ""; } Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString); if (onlySegments != null) { result.partial = true; if (infoStream != null) infoStream.Write("\nChecking only these segments:"); System.Collections.IEnumerator it = onlySegments.GetEnumerator(); while (it.MoveNext()) { if (infoStream != null) { infoStream.Write(" " + it.Current); } } System.Collections.IEnumerator e = onlySegments.GetEnumerator(); while (e.MoveNext() == true) { result.segmentsChecked.Add(e.Current); } Msg(":"); } if (skip) { Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); result.toolOutOfDate = true; return result; } result.newSegments = (SegmentInfos) sis.Clone(); result.newSegments.Clear(); for (int i = 0; i < numSegments; i++) { SegmentInfo info = sis.Info(i); if (onlySegments != null && !onlySegments.Contains(info.name)) continue; Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus(); result.segmentInfos.Add(segInfoStat); Msg(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); segInfoStat.name = info.name; segInfoStat.docCount = info.docCount; int toLoseDocCount = info.docCount; SegmentReader reader = null; try { Msg(" compound=" + info.GetUseCompoundFile()); segInfoStat.compound = info.GetUseCompoundFile(); Msg(" hasProx=" + info.GetHasProx()); segInfoStat.hasProx = info.GetHasProx(); Msg(" numFiles=" + info.Files().Count); segInfoStat.numFiles = info.Files().Count; Msg(System.String.Format(nf, " size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) })); segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0); System.Collections.Generic.IDictionary<string, string> diagnostics = info.GetDiagnostics(); segInfoStat.diagnostics = diagnostics; if (diagnostics.Count > 0) { Msg(" diagnostics = " + SupportClass.CollectionsHelper.CollectionToString(diagnostics)); } int docStoreOffset = info.GetDocStoreOffset(); if (docStoreOffset != - 1) { Msg(" docStoreOffset=" + docStoreOffset); segInfoStat.docStoreOffset = docStoreOffset; Msg(" docStoreSegment=" + info.GetDocStoreSegment()); segInfoStat.docStoreSegment = info.GetDocStoreSegment(); Msg(" docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile()); segInfoStat.docStoreCompoundFile = info.GetDocStoreIsCompoundFile(); } System.String delFileName = info.GetDelFileName(); if (delFileName == null) { Msg(" no deletions"); segInfoStat.hasDeletions = false; } else { Msg(" has deletions [delFileName=" + delFileName + "]"); segInfoStat.hasDeletions = true; segInfoStat.deletionsFileName = delFileName; } if (infoStream != null) infoStream.Write(" test: open reader........."); reader = SegmentReader.Get(info); segInfoStat.openReaderPassed = true; int numDocs = reader.NumDocs(); toLoseDocCount = numDocs; if (reader.HasDeletions()) { if (reader.deletedDocs.Count() != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (reader.deletedDocs.Count() > reader.MaxDoc()) { throw new System.SystemException("too many deleted docs: maxDoc()=" + reader.MaxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (info.docCount - numDocs != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } segInfoStat.numDeleted = info.docCount - numDocs; Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]"); } else { if (info.GetDelCount() != 0) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } Msg("OK"); } if (reader.MaxDoc() != info.docCount) throw new System.SystemException("SegmentReader.maxDoc() " + reader.MaxDoc() + " != SegmentInfos.docCount " + info.docCount); // Test getFieldNames() if (infoStream != null) { infoStream.Write(" test: fields.............."); } System.Collections.Generic.ICollection<string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL); Msg("OK [" + fieldNames.Count + " fields]"); segInfoStat.numFields = fieldNames.Count; // Test Field Norms segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader); // Test the Term Index segInfoStat.termIndexStatus = TestTermIndex(info, reader); // Test Stored Fields segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf); // Test Term Vectors segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf); // Rethrow the first exception we encountered // This will cause stats for failed segments to be incremented properly if (segInfoStat.fieldNormStatus.error != null) { throw new System.SystemException("Field Norm test failed"); } else if (segInfoStat.termIndexStatus.error != null) { throw new System.SystemException("Term Index test failed"); } else if (segInfoStat.storedFieldStatus.error != null) { throw new System.SystemException("Stored Field test failed"); } else if (segInfoStat.termVectorStatus.error != null) { throw new System.SystemException("Term Vector test failed"); } Msg(""); } catch (System.Exception t) { Msg("FAILED"); System.String comment; comment = "fixIndex() would remove reference to this segment"; Msg(" WARNING: " + comment + "; full exception:"); if (infoStream != null) infoStream.WriteLine(t.StackTrace); Msg(""); result.totLoseDocCount += toLoseDocCount; result.numBadSegments++; continue; } finally { if (reader != null) reader.Close(); } // Keeper result.newSegments.Add(info.Clone()); } if (0 == result.numBadSegments) { result.clean = true; Msg("No problems were detected with this index.\n"); } else Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected"); return result; }
internal ReaderCommit(SegmentInfos infos, Directory dir) { segmentsFileName = infos.GetCurrentSegmentFileName(); this.dir = dir; userData = infos.GetUserData(); files = infos.Files(dir, true); version = infos.GetVersion(); generation = infos.GetGeneration(); isOptimized = infos.Count == 1 && !infos.Info(0).HasDeletions(); }
/// <summary>Returns a {@link Status} instance detailing /// the state of the index. /// /// </summary> /// <param name="onlySegments">list of specific segment names to check /// /// <p/>As this method checks every byte in the specified /// segments, on a large index it can take quite a long /// time to run. /// /// <p/><b>WARNING</b>: make sure /// you only call this when the index is not opened by any /// writer. /// </param> public virtual Status CheckIndex_Renamed_Method(System.Collections.IList onlySegments) { System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); Status result = new Status(); result.dir = dir; try { sis.Read(dir); } catch (System.Exception t) { Msg("ERROR: could not read any segments file in directory"); result.missingSegments = true; if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } return(result); } int numSegments = sis.Count; System.String segmentsFileName = sis.GetCurrentSegmentFileName(); IndexInput input = null; try { input = dir.OpenInput(segmentsFileName); } catch (System.Exception t) { Msg("ERROR: could not open segments file in directory"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } result.cantOpenSegments = true; return(result); } int format = 0; try { format = input.ReadInt(); } catch (System.Exception t) { Msg("ERROR: could not read segment file version in directory"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } result.missingSegmentVersion = true; return(result); } finally { if (input != null) { input.Close(); } } System.String sFormat = ""; bool skip = false; if (format == SegmentInfos.FORMAT) { sFormat = "FORMAT [Lucene Pre-2.1]"; } if (format == SegmentInfos.FORMAT_LOCKLESS) { sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; } else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) { sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; } else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) { sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; } else { if (format == SegmentInfos.FORMAT_CHECKSUM) { sFormat = "FORMAT_CHECKSUM [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_DEL_COUNT) { sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_HAS_PROX) { sFormat = "FORMAT_HAS_PROX [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_USER_DATA) { sFormat = "FORMAT_USER_DATA [Lucene 2.9]"; } else if (format == SegmentInfos.FORMAT_DIAGNOSTICS) { sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]"; } else if (format < SegmentInfos.CURRENT_FORMAT) { sFormat = "int=" + format + " [newer version of Lucene than this tool]"; skip = true; } else { sFormat = format + " [Lucene 1.3 or prior]"; } } result.segmentsFileName = segmentsFileName; result.numSegments = numSegments; result.segmentFormat = sFormat; result.userData = sis.GetUserData(); System.String userDataString; if (sis.GetUserData().Count > 0) { userDataString = " userData=" + SupportClass.CollectionsHelper.CollectionToString(sis.GetUserData()); } else { userDataString = ""; } Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString); if (onlySegments != null) { result.partial = true; if (infoStream != null) { infoStream.Write("\nChecking only these segments:"); } System.Collections.IEnumerator it = onlySegments.GetEnumerator(); while (it.MoveNext()) { if (infoStream != null) { infoStream.Write(" " + it.Current); } } System.Collections.IEnumerator e = onlySegments.GetEnumerator(); while (e.MoveNext() == true) { result.segmentsChecked.Add(e.Current); } Msg(":"); } if (skip) { Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); result.toolOutOfDate = true; return(result); } result.newSegments = (SegmentInfos)sis.Clone(); result.newSegments.Clear(); for (int i = 0; i < numSegments; i++) { SegmentInfo info = sis.Info(i); if (onlySegments != null && !onlySegments.Contains(info.name)) { continue; } Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus(); result.segmentInfos.Add(segInfoStat); Msg(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); segInfoStat.name = info.name; segInfoStat.docCount = info.docCount; int toLoseDocCount = info.docCount; SegmentReader reader = null; try { Msg(" compound=" + info.GetUseCompoundFile()); segInfoStat.compound = info.GetUseCompoundFile(); Msg(" hasProx=" + info.GetHasProx()); segInfoStat.hasProx = info.GetHasProx(); Msg(" numFiles=" + info.Files().Count); segInfoStat.numFiles = info.Files().Count; Msg(System.String.Format(nf, " size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) })); segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0); System.Collections.Generic.IDictionary <string, string> diagnostics = info.GetDiagnostics(); segInfoStat.diagnostics = diagnostics; if (diagnostics.Count > 0) { Msg(" diagnostics = " + SupportClass.CollectionsHelper.CollectionToString(diagnostics)); } int docStoreOffset = info.GetDocStoreOffset(); if (docStoreOffset != -1) { Msg(" docStoreOffset=" + docStoreOffset); segInfoStat.docStoreOffset = docStoreOffset; Msg(" docStoreSegment=" + info.GetDocStoreSegment()); segInfoStat.docStoreSegment = info.GetDocStoreSegment(); Msg(" docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile()); segInfoStat.docStoreCompoundFile = info.GetDocStoreIsCompoundFile(); } System.String delFileName = info.GetDelFileName(); if (delFileName == null) { Msg(" no deletions"); segInfoStat.hasDeletions = false; } else { Msg(" has deletions [delFileName=" + delFileName + "]"); segInfoStat.hasDeletions = true; segInfoStat.deletionsFileName = delFileName; } if (infoStream != null) { infoStream.Write(" test: open reader........."); } reader = SegmentReader.Get(info); segInfoStat.openReaderPassed = true; int numDocs = reader.NumDocs(); toLoseDocCount = numDocs; if (reader.HasDeletions()) { if (reader.deletedDocs.Count() != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (reader.deletedDocs.Count() > reader.MaxDoc()) { throw new System.SystemException("too many deleted docs: maxDoc()=" + reader.MaxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (info.docCount - numDocs != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } segInfoStat.numDeleted = info.docCount - numDocs; Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]"); } else { if (info.GetDelCount() != 0) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } Msg("OK"); } if (reader.MaxDoc() != info.docCount) { throw new System.SystemException("SegmentReader.maxDoc() " + reader.MaxDoc() + " != SegmentInfos.docCount " + info.docCount); } // Test getFieldNames() if (infoStream != null) { infoStream.Write(" test: fields.............."); } System.Collections.Generic.ICollection <string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL); Msg("OK [" + fieldNames.Count + " fields]"); segInfoStat.numFields = fieldNames.Count; // Test Field Norms segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader); // Test the Term Index segInfoStat.termIndexStatus = TestTermIndex(info, reader); // Test Stored Fields segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf); // Test Term Vectors segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf); // Rethrow the first exception we encountered // This will cause stats for failed segments to be incremented properly if (segInfoStat.fieldNormStatus.error != null) { throw new System.SystemException("Field Norm test failed"); } else if (segInfoStat.termIndexStatus.error != null) { throw new System.SystemException("Term Index test failed"); } else if (segInfoStat.storedFieldStatus.error != null) { throw new System.SystemException("Stored Field test failed"); } else if (segInfoStat.termVectorStatus.error != null) { throw new System.SystemException("Term Vector test failed"); } Msg(""); } catch (System.Exception t) { Msg("FAILED"); System.String comment; comment = "fixIndex() would remove reference to this segment"; Msg(" WARNING: " + comment + "; full exception:"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } Msg(""); result.totLoseDocCount += toLoseDocCount; result.numBadSegments++; continue; } finally { if (reader != null) { reader.Close(); } } // Keeper result.newSegments.Add(info.Clone()); } if (0 == result.numBadSegments) { result.clean = true; Msg("No problems were detected with this index.\n"); } else { Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected"); } return(result); }
/// <summary> For definition of "check point" see IndexWriter comments: /// "Clarification: Check Points (and commits)". /// /// Writer calls this when it has made a "consistent /// change" to the index, meaning new files are written to /// the index and the in-memory SegmentInfos have been /// modified to point to those files. /// /// This may or may not be a commit (segments_N may or may /// not have been written). /// /// We simply incref the files referenced by the new /// SegmentInfos and decref the files we had previously /// seen (if any). /// /// If this is a commit, we also call the policy to give it /// a chance to remove other commits. If any commits are /// removed, we decref their files as well. /// </summary> public void Checkpoint(SegmentInfos segmentInfos, bool isCommit) { if (infoStream != null) { Message("now checkpoint \"" + segmentInfos.GetCurrentSegmentFileName() + "\" [" + segmentInfos.Count + " segments " + "; isCommit = " + isCommit + "]"); } // Try again now to delete any previously un-deletable // files (because they were in use, on Windows): DeletePendingFiles(); // Incref the files: IncRef(segmentInfos, isCommit); if (isCommit) { // Append to our commits list: commits.Add(new CommitPoint(this, commitsToDelete, directory, segmentInfos)); // Tell policy so it can remove commits: policy.OnCommit(commits); // Decref files for commits that were deleted by the policy: DeleteCommits(); } else { System.Collections.Generic.IList <string> docWriterFiles; if (docWriter != null) { docWriterFiles = docWriter.OpenFiles(); if (docWriterFiles != null) { // We must incRef these files before decRef'ing // last files to make sure we don't accidentally // delete them: IncRef(docWriterFiles); } } else { docWriterFiles = null; } // DecRef old files from the last checkpoint, if any: int size = lastFiles.Count; if (size > 0) { for (int i = 0; i < size; i++) { DecRef(lastFiles[i]); } lastFiles.Clear(); } // Save files so we can decr on next checkpoint/commit: foreach (string fname in segmentInfos.Files(directory, false)) { lastFiles.Add(fname); } if (docWriterFiles != null) { foreach (string fname in docWriterFiles) { lastFiles.Add(fname); } } } }
/// <summary> Initialize the deleter: find all previous commits in /// the Directory, incref the files they reference, call /// the policy to let it delete commits. This will remove /// any files not referenced by any of the commits. /// </summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, System.IO.StreamWriter infoStream, DocumentsWriter docWriter, System.Collections.Generic.Dictionary <string, string> synced) { this.docWriter = docWriter; this.infoStream = infoStream; this.synced = synced; if (infoStream != null) { Message("init: current segments file is \"" + segmentInfos.GetCurrentSegmentFileName() + "\"; deletionPolicy=" + policy); } this.policy = policy; this.directory = directory; // First pass: walk the files and initialize our ref // counts: long currentGen = segmentInfos.GetGeneration(); IndexFileNameFilter filter = IndexFileNameFilter.GetFilter(); System.String[] files = directory.ListAll(); CommitPoint currentCommitPoint = null; for (int i = 0; i < files.Length; i++) { System.String fileName = files[i]; if (filter.Accept(null, fileName) && !fileName.Equals(IndexFileNames.SEGMENTS_GEN)) { // Add this file to refCounts with initial count 0: GetRefCount(fileName); if (fileName.StartsWith(IndexFileNames.SEGMENTS)) { // This is a commit (segments or segments_N), and // it's valid (<= the max gen). Load it, then // incref all files it refers to: if (infoStream != null) { Message("init: load commit \"" + fileName + "\""); } SegmentInfos sis = new SegmentInfos(); try { sis.Read(directory, fileName); } catch (System.IO.FileNotFoundException e) { // LUCENE-948: on NFS (and maybe others), if // you have writers switching back and forth // between machines, it's very likely that the // dir listing will be stale and will claim a // file segments_X exists when in fact it // doesn't. So, we catch this and handle it // as if the file does not exist if (infoStream != null) { Message("init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point"); } sis = null; } catch (System.IO.IOException e) { if (SegmentInfos.GenerationFromSegmentsFileName(fileName) <= currentGen) { throw e; } else { // Most likely we are opening an index that // has an aborted "future" commit, so suppress // exc in this case sis = null; } } if (sis != null) { CommitPoint commitPoint = new CommitPoint(this, commitsToDelete, directory, sis); if (sis.GetGeneration() == segmentInfos.GetGeneration()) { currentCommitPoint = commitPoint; } commits.Add(commitPoint); IncRef(sis, true); if (lastSegmentInfos == null || sis.GetGeneration() > lastSegmentInfos.GetGeneration()) { lastSegmentInfos = sis; } } } } } if (currentCommitPoint == null) { // We did not in fact see the segments_N file // corresponding to the segmentInfos that was passed // in. Yet, it must exist, because our caller holds // the write lock. This can happen when the directory // listing was stale (eg when index accessed via NFS // client with stale directory listing cache). So we // try now to explicitly open this commit point: SegmentInfos sis = new SegmentInfos(); try { sis.Read(directory, segmentInfos.GetCurrentSegmentFileName()); } catch (System.IO.IOException e) { throw new CorruptIndexException("failed to locate current segments_N file"); } if (infoStream != null) { Message("forced open of current segments file " + segmentInfos.GetCurrentSegmentFileName()); } currentCommitPoint = new CommitPoint(this, commitsToDelete, directory, sis); commits.Add(currentCommitPoint); IncRef(sis, true); } // We keep commits list in sorted order (oldest to newest): commits.Sort(); // Now delete anything with ref count at 0. These are // presumably abandoned files eg due to crash of // IndexWriter. System.Collections.Generic.IEnumerator <System.Collections.Generic.KeyValuePair <System.String, RefCount> > it = refCounts.GetEnumerator(); while (it.MoveNext()) { System.String fileName = (System.String)it.Current.Key; RefCount rc = (RefCount)refCounts[fileName]; if (0 == rc.count) { if (infoStream != null) { Message("init: removing unreferenced file \"" + fileName + "\""); } DeleteFile(fileName); } } // Finally, give policy a chance to remove things on // startup: policy.OnInit(commits); // Always protect the incoming segmentInfos since // sometime it may not be the most recent commit Checkpoint(segmentInfos, false); startingCommitDeleted = currentCommitPoint.IsDeleted(); DeleteCommits(); }