/// <summary>Returns true if this single info is optimized (has no /// pending norms or deletes, is in the same dir as the /// writer, and matches the current compound file setting /// </summary> private bool IsOptimized(SegmentInfo info) { bool hasDeletions = writer.NumDeletedDocs(info) > 0; return(!hasDeletions && !info.HasSeparateNorms() && info.dir == writer.GetDirectory() && (info.GetUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0)); }
protected internal override DirectoryIndexReader DoReopen(SegmentInfos infos) { lock (this) { DirectoryIndexReader newReader; if (infos.Count == 1) { SegmentInfo si = infos.Info(0); if (segment.Equals(si.name) && si.GetUseCompoundFile() == this.si.GetUseCompoundFile()) { newReader = ReopenSegment(si); } else { // segment not referenced anymore, reopen not possible // or segment format changed newReader = SegmentReader.Get(readOnly, infos, infos.Info(0), false); } } else { if (readOnly) { return(new ReadOnlyMultiSegmentReader(directory, infos, closeDirectory, new SegmentReader[] { this }, null, null)); } else { return(new MultiSegmentReader(directory, infos, closeDirectory, new SegmentReader[] { this }, null, null, false)); } } return(newReader); } }
/// <summary>Returns true if this single info is optimized (has no /// pending norms or deletes, is in the same dir as the /// writer, and matches the current compound file setting /// </summary> private bool IsOptimized(SegmentInfo info, IState state) { bool hasDeletions = writer.NumDeletedDocs(info, state) > 0; return(!hasDeletions && !info.HasSeparateNorms(state) && info.dir == writer.Directory && (info.GetUseCompoundFile(state) == useCompoundFile || internalNoCFSRatio < 1.0)); }
internal virtual System.Collections.ArrayList Files() { System.Collections.ArrayList files = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(16)); if (si.GetUseCompoundFile()) { System.String name = segment + ".cfs"; if (Directory().FileExists(name)) { files.Add(name); } } else { for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.Length; i++) { System.String name = segment + "." + IndexFileNames.INDEX_EXTENSIONS[i]; if (Directory().FileExists(name)) { files.Add(name); } } } if (si.HasDeletions()) { files.Add(si.GetDelFileName()); } bool addedNrm = false; for (int i = 0; i < fieldInfos.Size(); i++) { System.String name = si.GetNormFileName(i); if (name != null && Directory().FileExists(name)) { if (name.EndsWith("." + IndexFileNames.NORMS_EXTENSION)) { if (addedNrm) { continue; // add .nrm just once } addedNrm = true; } files.Add(name); } } return(files); }
internal static bool UsesCompoundFile(SegmentInfo si) { return(si.GetUseCompoundFile()); }
internal virtual SegmentReader ReopenSegment(SegmentInfo si) { lock (this) { bool deletionsUpToDate = (this.si.HasDeletions() == si.HasDeletions()) && (!si.HasDeletions() || this.si.GetDelFileName().Equals(si.GetDelFileName())); bool normsUpToDate = true; bool[] fieldNormsChanged = new bool[fieldInfos.Size()]; if (normsUpToDate) { for (int i = 0; i < fieldInfos.Size(); i++) { if (!this.si.GetNormFileName(i).Equals(si.GetNormFileName(i))) { normsUpToDate = false; fieldNormsChanged[i] = true; } } } if (normsUpToDate && deletionsUpToDate) { return(this); } // clone reader SegmentReader clone; if (readOnly) { clone = new ReadOnlySegmentReader(); } else { clone = new SegmentReader(); } bool success = false; try { clone.readOnly = readOnly; clone.directory = directory; clone.si = si; clone.segment = segment; clone.readBufferSize = readBufferSize; clone.cfsReader = cfsReader; clone.storeCFSReader = storeCFSReader; clone.fieldInfos = fieldInfos; clone.tis = tis; clone.freqStream = freqStream; clone.proxStream = proxStream; clone.termVectorsReaderOrig = termVectorsReaderOrig; // we have to open a new FieldsReader, because it is not thread-safe // and can thus not be shared among multiple SegmentReaders // TODO: Change this in case FieldsReader becomes thread-safe in the future System.String fieldsSegment; Directory storeDir = Directory(); if (si.GetDocStoreOffset() != -1) { fieldsSegment = si.GetDocStoreSegment(); if (storeCFSReader != null) { storeDir = storeCFSReader; } } else { fieldsSegment = segment; if (cfsReader != null) { storeDir = cfsReader; } } if (fieldsReader != null) { clone.fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); } if (!deletionsUpToDate) { // load deleted docs clone.deletedDocs = null; clone.LoadDeletedDocs(); } else { clone.deletedDocs = this.deletedDocs; } clone.norms = new System.Collections.Hashtable(); if (!normsUpToDate) { // load norms for (int i = 0; i < fieldNormsChanged.Length; i++) { // copy unchanged norms to the cloned reader and incRef those norms if (!fieldNormsChanged[i]) { System.String curField = fieldInfos.FieldInfo(i).name; Norm norm = (Norm)this.norms[curField]; norm.IncRef(); clone.norms[curField] = norm; } } clone.OpenNorms(si.GetUseCompoundFile() ? cfsReader : Directory(), readBufferSize); } else { System.Collections.IEnumerator it = norms.Keys.GetEnumerator(); while (it.MoveNext()) { System.String field = (System.String)it.Current; Norm norm = (Norm)norms[field]; norm.IncRef(); clone.norms[field] = norm; } } if (clone.singleNormStream == null) { for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.isIndexed && !fi.omitNorms) { Directory d = si.GetUseCompoundFile() ? cfsReader : Directory(); System.String fileName = si.GetNormFileName(fi.number); if (si.HasSeparateNorms(fi.number)) { continue; } if (fileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION)) { clone.singleNormStream = d.OpenInput(fileName, readBufferSize); break; } } } } success = true; } finally { if (this.referencedSegmentReader != null) { // this reader shares resources with another SegmentReader, // so we increment the other readers refCount. We don't // increment the refCount of the norms because we did // that already for the shared norms clone.referencedSegmentReader = this.referencedSegmentReader; referencedSegmentReader.IncRefReaderNotNorms(); } else { // this reader wasn't reopened, so we increment this // readers refCount clone.referencedSegmentReader = this; IncRefReaderNotNorms(); } if (!success) { // An exception occured during reopen, we have to decRef the norms // that we incRef'ed already and close singleNormsStream and FieldsReader clone.DecRef(); } } return(clone); } }
private void Initialize(SegmentInfo si, int readBufferSize, bool doOpenStores) { segment = si.name; this.si = si; this.readBufferSize = readBufferSize; bool success = false; try { // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(Directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); cfsDir = cfsReader; } Directory storeDir; if (doOpenStores) { if (si.GetDocStoreOffset() != - 1) { if (si.GetDocStoreIsCompoundFile()) { storeCFSReader = new CompoundFileReader(Directory(), si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize); storeDir = storeCFSReader; } else { storeDir = Directory(); } } else { storeDir = cfsDir; } } else storeDir = null; fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); bool anyProx = false; int numFields = fieldInfos.Size(); for (int i = 0; !anyProx && i < numFields; i++) if (!fieldInfos.FieldInfo(i).omitTf) anyProx = true; System.String fieldsSegment; if (si.GetDocStoreOffset() != - 1) fieldsSegment = si.GetDocStoreSegment(); else fieldsSegment = segment; if (doOpenStores) { fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); // Verify two sources of "maxDoc" agree: if (si.GetDocStoreOffset() == - 1 && fieldsReader.Size() != si.docCount) { throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount); } } tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize); LoadDeletedDocs(); // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq", readBufferSize); if (anyProx) proxStream = cfsDir.OpenInput(segment + ".prx", readBufferSize); OpenNorms(cfsDir, readBufferSize); if (doOpenStores && fieldInfos.HasVectors()) { // open term vector files only as needed System.String vectorsSegment; if (si.GetDocStoreOffset() != - 1) vectorsSegment = si.GetDocStoreSegment(); else vectorsSegment = segment; termVectorsReaderOrig = new TermVectorsReader(storeDir, vectorsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { DoClose(); } } }
private void Initialize(SegmentInfo si) { segment = si.name; this.si = si; bool success = false; try { // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(Directory(), segment + ".cfs"); cfsDir = cfsReader; } // No compound file exists - use the multi-file format fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos); // Verify two sources of "maxDoc" agree: if (fieldsReader.Size() != si.docCount) { throw new System.SystemException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount); } tis = new TermInfosReader(cfsDir, segment, fieldInfos); // NOTE: the bitvector is stored using the regular directory, not cfs if (HasDeletions(si)) { deletedDocs = new BitVector(Directory(), si.GetDelFileName()); // Verify # deletes does not exceed maxDoc for this segment: if (deletedDocs.Count() > MaxDoc()) { throw new System.SystemException("number of deletes (" + deletedDocs.Count() + ") exceeds max doc (" + MaxDoc() + ") for segment " + si.name); } } // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq"); proxStream = cfsDir.OpenInput(segment + ".prx"); OpenNorms(cfsDir); if (fieldInfos.HasVectors()) { // open term vector files only as needed termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos); } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { DoClose(); } } }
internal void OpenDocStores(SegmentInfo si) { lock (this) { System.Diagnostics.Debug.Assert(si.name.Equals(segment)); if (fieldsReaderOrig == null) { Directory storeDir; if (si.GetDocStoreOffset() != - 1) { if (si.GetDocStoreIsCompoundFile()) { System.Diagnostics.Debug.Assert(storeCFSReader == null); storeCFSReader = new CompoundFileReader(dir, si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize); storeDir = storeCFSReader; System.Diagnostics.Debug.Assert(storeDir != null); } else { storeDir = dir; System.Diagnostics.Debug.Assert(storeDir != null); } } else if (si.GetUseCompoundFile()) { // In some cases, we were originally opened when CFS // was not used, but then we are asked to open doc // stores after the segment has switched to CFS if (cfsReader == null) { cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); } storeDir = cfsReader; System.Diagnostics.Debug.Assert(storeDir != null); } else { storeDir = dir; System.Diagnostics.Debug.Assert(storeDir != null); } System.String storesSegment; if (si.GetDocStoreOffset() != - 1) { storesSegment = si.GetDocStoreSegment(); } else { storesSegment = segment; } fieldsReaderOrig = new FieldsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); // Verify two sources of "maxDoc" agree: if (si.GetDocStoreOffset() == - 1 && fieldsReaderOrig.Size() != si.docCount) { throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + fieldsReaderOrig.Size() + " but segmentInfo shows " + si.docCount); } if (fieldInfos.HasVectors()) { // open term vector files only as needed termVectorsReaderOrig = new TermVectorsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); } } } }
internal CoreReaders(Directory dir, SegmentInfo si, int readBufferSize, int termsIndexDivisor) { segment = si.name; this.readBufferSize = readBufferSize; this.dir = dir; bool success = false; try { Directory dir0 = dir; if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); dir0 = cfsReader; } cfsDir = dir0; fieldInfos = new FieldInfos(cfsDir, segment + "." + IndexFileNames.FIELD_INFOS_EXTENSION); this.termsIndexDivisor = termsIndexDivisor; TermInfosReader reader = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize, termsIndexDivisor); if (termsIndexDivisor == - 1) { tisNoIndex = reader; } else { tis = reader; tisNoIndex = null; } // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + "." + IndexFileNames.FREQ_EXTENSION, readBufferSize); if (fieldInfos.HasProx()) { proxStream = cfsDir.OpenInput(segment + "." + IndexFileNames.PROX_EXTENSION, readBufferSize); } else { proxStream = null; } success = true; } finally { if (!success) { DecRef(); } } }
internal CoreReaders(SegmentReader origInstance, Directory dir, SegmentInfo si, int readBufferSize, int termsIndexDivisor) { segment = si.name; this.readBufferSize = readBufferSize; this.dir = dir; bool success = false; try { Directory dir0 = dir; if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); dir0 = cfsReader; } cfsDir = dir0; fieldInfos = new FieldInfos(cfsDir, segment + "." + IndexFileNames.FIELD_INFOS_EXTENSION); this.termsIndexDivisor = termsIndexDivisor; TermInfosReader reader = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize, termsIndexDivisor); if (termsIndexDivisor == - 1) { tisNoIndex = reader; } else { tis = reader; tisNoIndex = null; } // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + "." + IndexFileNames.FREQ_EXTENSION, readBufferSize); if (fieldInfos.HasProx()) { proxStream = cfsDir.OpenInput(segment + "." + IndexFileNames.PROX_EXTENSION, readBufferSize); } else { proxStream = null; } success = true; } finally { if (!success) { DecRef(); } } // Must assign this at the end -- if we hit an // exception above core, we don't want to attempt to // purge the FieldCache (will hit NPE because core is // not assigned yet). this.origInstance = origInstance; }
/// <summary>Returns true if this single nfo is optimized (has no /// pending norms or deletes, is in the same dir as the /// writer, and matches the current compound file setting /// </summary> private bool IsOptimized(IndexWriter writer, SegmentInfo info) { return !info.HasDeletions() && !info.HasSeparateNorms() && info.dir == writer.GetDirectory() && info.GetUseCompoundFile() == useCompoundFile; }
internal virtual SegmentReader ReopenSegment(SegmentInfo si) { lock (this) { bool deletionsUpToDate = (this.si.HasDeletions() == si.HasDeletions()) && (!si.HasDeletions() || this.si.GetDelFileName().Equals(si.GetDelFileName())); bool normsUpToDate = true; bool[] fieldNormsChanged = new bool[fieldInfos.Size()]; if (normsUpToDate) { for (int i = 0; i < fieldInfos.Size(); i++) { if (!this.si.GetNormFileName(i).Equals(si.GetNormFileName(i))) { normsUpToDate = false; fieldNormsChanged[i] = true; } } } if (normsUpToDate && deletionsUpToDate) { return this; } // clone reader SegmentReader clone; if (readOnly) clone = new ReadOnlySegmentReader(); else clone = new SegmentReader(); bool success = false; try { clone.readOnly = readOnly; clone.directory = directory; clone.si = si; clone.segment = segment; clone.readBufferSize = readBufferSize; clone.cfsReader = cfsReader; clone.storeCFSReader = storeCFSReader; clone.fieldInfos = fieldInfos; clone.tis = tis; clone.freqStream = freqStream; clone.proxStream = proxStream; clone.termVectorsReaderOrig = termVectorsReaderOrig; // we have to open a new FieldsReader, because it is not thread-safe // and can thus not be shared among multiple SegmentReaders // TODO: Change this in case FieldsReader becomes thread-safe in the future System.String fieldsSegment; Directory storeDir = Directory(); if (si.GetDocStoreOffset() != - 1) { fieldsSegment = si.GetDocStoreSegment(); if (storeCFSReader != null) { storeDir = storeCFSReader; } } else { fieldsSegment = segment; if (cfsReader != null) { storeDir = cfsReader; } } if (fieldsReader != null) { clone.fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); } if (!deletionsUpToDate) { // load deleted docs clone.deletedDocs = null; clone.LoadDeletedDocs(); } else { clone.deletedDocs = this.deletedDocs; } clone.norms = new System.Collections.Hashtable(); if (!normsUpToDate) { // load norms for (int i = 0; i < fieldNormsChanged.Length; i++) { // copy unchanged norms to the cloned reader and incRef those norms if (!fieldNormsChanged[i]) { System.String curField = fieldInfos.FieldInfo(i).name; Norm norm = (Norm) this.norms[curField]; norm.IncRef(); clone.norms[curField] = norm; } } clone.OpenNorms(si.GetUseCompoundFile() ? cfsReader : Directory(), readBufferSize); } else { System.Collections.IEnumerator it = norms.Keys.GetEnumerator(); while (it.MoveNext()) { System.String field = (System.String) it.Current; Norm norm = (Norm) norms[field]; norm.IncRef(); clone.norms[field] = norm; } } if (clone.singleNormStream == null) { for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.isIndexed && !fi.omitNorms) { Directory d = si.GetUseCompoundFile() ? cfsReader : Directory(); System.String fileName = si.GetNormFileName(fi.number); if (si.HasSeparateNorms(fi.number)) { continue; } if (fileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION)) { clone.singleNormStream = d.OpenInput(fileName, readBufferSize); break; } } } } success = true; } finally { if (this.referencedSegmentReader != null) { // this reader shares resources with another SegmentReader, // so we increment the other readers refCount. We don't // increment the refCount of the norms because we did // that already for the shared norms clone.referencedSegmentReader = this.referencedSegmentReader; referencedSegmentReader.IncRefReaderNotNorms(); } else { // this reader wasn't reopened, so we increment this // readers refCount clone.referencedSegmentReader = this; IncRefReaderNotNorms(); } if (!success) { // An exception occured during reopen, we have to decRef the norms // that we incRef'ed already and close singleNormsStream and FieldsReader clone.DecRef(); } } return clone; } }
/// <summary>Returns true if this single nfo is optimized (has no /// pending norms or deletes, is in the same dir as the /// writer, and matches the current compound file setting /// </summary> private bool IsOptimized(IndexWriter writer, SegmentInfo info) { return(!info.HasDeletions() && !info.HasSeparateNorms() && info.dir == writer.GetDirectory() && info.GetUseCompoundFile() == useCompoundFile); }
/// <summary>Returns a {@link Status} instance detailing /// the state of the index. /// /// </summary> /// <param name="onlySegments">list of specific segment names to check /// /// <p/>As this method checks every byte in the specified /// segments, on a large index it can take quite a long /// time to run. /// /// <p/><b>WARNING</b>: make sure /// you only call this when the index is not opened by any /// writer. /// </param> public virtual Status CheckIndex_Renamed_Method(System.Collections.IList onlySegments) { System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); Status result = new Status(); result.dir = dir; try { sis.Read(dir); } catch (System.Exception t) { Msg("ERROR: could not read any segments file in directory"); result.missingSegments = true; if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } return(result); } int numSegments = sis.Count; System.String segmentsFileName = sis.GetCurrentSegmentFileName(); IndexInput input = null; try { input = dir.OpenInput(segmentsFileName); } catch (System.Exception t) { Msg("ERROR: could not open segments file in directory"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } result.cantOpenSegments = true; return(result); } int format = 0; try { format = input.ReadInt(); } catch (System.Exception t) { Msg("ERROR: could not read segment file version in directory"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } result.missingSegmentVersion = true; return(result); } finally { if (input != null) { input.Close(); } } System.String sFormat = ""; bool skip = false; if (format == SegmentInfos.FORMAT) { sFormat = "FORMAT [Lucene Pre-2.1]"; } if (format == SegmentInfos.FORMAT_LOCKLESS) { sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; } else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) { sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; } else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) { sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; } else { if (format == SegmentInfos.FORMAT_CHECKSUM) { sFormat = "FORMAT_CHECKSUM [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_DEL_COUNT) { sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_HAS_PROX) { sFormat = "FORMAT_HAS_PROX [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_USER_DATA) { sFormat = "FORMAT_USER_DATA [Lucene 2.9]"; } else if (format == SegmentInfos.FORMAT_DIAGNOSTICS) { sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]"; } else if (format < SegmentInfos.CURRENT_FORMAT) { sFormat = "int=" + format + " [newer version of Lucene than this tool]"; skip = true; } else { sFormat = format + " [Lucene 1.3 or prior]"; } } result.segmentsFileName = segmentsFileName; result.numSegments = numSegments; result.segmentFormat = sFormat; result.userData = sis.GetUserData(); System.String userDataString; if (sis.GetUserData().Count > 0) { userDataString = " userData=" + SupportClass.CollectionsHelper.CollectionToString(sis.GetUserData()); } else { userDataString = ""; } Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString); if (onlySegments != null) { result.partial = true; if (infoStream != null) { infoStream.Write("\nChecking only these segments:"); } System.Collections.IEnumerator it = onlySegments.GetEnumerator(); while (it.MoveNext()) { if (infoStream != null) { infoStream.Write(" " + it.Current); } } System.Collections.IEnumerator e = onlySegments.GetEnumerator(); while (e.MoveNext() == true) { result.segmentsChecked.Add(e.Current); } Msg(":"); } if (skip) { Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); result.toolOutOfDate = true; return(result); } result.newSegments = (SegmentInfos)sis.Clone(); result.newSegments.Clear(); for (int i = 0; i < numSegments; i++) { SegmentInfo info = sis.Info(i); if (onlySegments != null && !onlySegments.Contains(info.name)) { continue; } Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus(); result.segmentInfos.Add(segInfoStat); Msg(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); segInfoStat.name = info.name; segInfoStat.docCount = info.docCount; int toLoseDocCount = info.docCount; SegmentReader reader = null; try { Msg(" compound=" + info.GetUseCompoundFile()); segInfoStat.compound = info.GetUseCompoundFile(); Msg(" hasProx=" + info.GetHasProx()); segInfoStat.hasProx = info.GetHasProx(); Msg(" numFiles=" + info.Files().Count); segInfoStat.numFiles = info.Files().Count; Msg(System.String.Format(nf, " size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) })); segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0); System.Collections.Generic.IDictionary <string, string> diagnostics = info.GetDiagnostics(); segInfoStat.diagnostics = diagnostics; if (diagnostics.Count > 0) { Msg(" diagnostics = " + SupportClass.CollectionsHelper.CollectionToString(diagnostics)); } int docStoreOffset = info.GetDocStoreOffset(); if (docStoreOffset != -1) { Msg(" docStoreOffset=" + docStoreOffset); segInfoStat.docStoreOffset = docStoreOffset; Msg(" docStoreSegment=" + info.GetDocStoreSegment()); segInfoStat.docStoreSegment = info.GetDocStoreSegment(); Msg(" docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile()); segInfoStat.docStoreCompoundFile = info.GetDocStoreIsCompoundFile(); } System.String delFileName = info.GetDelFileName(); if (delFileName == null) { Msg(" no deletions"); segInfoStat.hasDeletions = false; } else { Msg(" has deletions [delFileName=" + delFileName + "]"); segInfoStat.hasDeletions = true; segInfoStat.deletionsFileName = delFileName; } if (infoStream != null) { infoStream.Write(" test: open reader........."); } reader = SegmentReader.Get(info); segInfoStat.openReaderPassed = true; int numDocs = reader.NumDocs(); toLoseDocCount = numDocs; if (reader.HasDeletions()) { if (reader.deletedDocs.Count() != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (reader.deletedDocs.Count() > reader.MaxDoc()) { throw new System.SystemException("too many deleted docs: maxDoc()=" + reader.MaxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (info.docCount - numDocs != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } segInfoStat.numDeleted = info.docCount - numDocs; Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]"); } else { if (info.GetDelCount() != 0) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } Msg("OK"); } if (reader.MaxDoc() != info.docCount) { throw new System.SystemException("SegmentReader.maxDoc() " + reader.MaxDoc() + " != SegmentInfos.docCount " + info.docCount); } // Test getFieldNames() if (infoStream != null) { infoStream.Write(" test: fields.............."); } System.Collections.Generic.ICollection <string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL); Msg("OK [" + fieldNames.Count + " fields]"); segInfoStat.numFields = fieldNames.Count; // Test Field Norms segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader); // Test the Term Index segInfoStat.termIndexStatus = TestTermIndex(info, reader); // Test Stored Fields segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf); // Test Term Vectors segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf); // Rethrow the first exception we encountered // This will cause stats for failed segments to be incremented properly if (segInfoStat.fieldNormStatus.error != null) { throw new System.SystemException("Field Norm test failed"); } else if (segInfoStat.termIndexStatus.error != null) { throw new System.SystemException("Term Index test failed"); } else if (segInfoStat.storedFieldStatus.error != null) { throw new System.SystemException("Stored Field test failed"); } else if (segInfoStat.termVectorStatus.error != null) { throw new System.SystemException("Term Vector test failed"); } Msg(""); } catch (System.Exception t) { Msg("FAILED"); System.String comment; comment = "fixIndex() would remove reference to this segment"; Msg(" WARNING: " + comment + "; full exception:"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } Msg(""); result.totLoseDocCount += toLoseDocCount; result.numBadSegments++; continue; } finally { if (reader != null) { reader.Close(); } } // Keeper result.newSegments.Add(info.Clone()); } if (0 == result.numBadSegments) { result.clean = true; Msg("No problems were detected with this index.\n"); } else { Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected"); } return(result); }
/// <summary>Returns true if index is clean, else false.</summary> public static bool Check(Directory dir, bool doFix) { System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); try { sis.Read(dir); } catch (System.Exception t) { out_Renamed.WriteLine("ERROR: could not read any segments file in directory"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return(false); } int numSegments = sis.Count; System.String segmentsFileName = sis.GetCurrentSegmentFileName(); IndexInput input = null; try { input = dir.OpenInput(segmentsFileName); } catch (System.Exception t) { out_Renamed.WriteLine("ERROR: could not open segments file in directory"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return(false); } int format = 0; try { format = input.ReadInt(); } catch (System.Exception t) { out_Renamed.WriteLine("ERROR: could not read segment file version in directory"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return(false); } finally { if (input != null) { input.Close(); } } System.String sFormat = ""; bool skip = false; if (format == SegmentInfos.FORMAT) { sFormat = "FORMAT [Lucene Pre-2.1]"; } if (format == SegmentInfos.FORMAT_LOCKLESS) { sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; } else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) { sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; } else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) { sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; } else if (format < SegmentInfos.FORMAT_SHARED_DOC_STORE) { sFormat = "int=" + format + " [newer version of Lucene than this tool]"; skip = true; } else { sFormat = format + " [Lucene 1.3 or prior]"; } out_Renamed.WriteLine("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat); if (skip) { out_Renamed.WriteLine("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); return(false); } SegmentInfos newSIS = (SegmentInfos)sis.Clone(); newSIS.Clear(); bool changed = false; int totLoseDocCount = 0; int numBadSegments = 0; for (int i = 0; i < numSegments; i++) { SegmentInfo info = sis.Info(i); out_Renamed.WriteLine(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); int toLoseDocCount = info.docCount; SegmentReader reader = null; try { out_Renamed.WriteLine(" compound=" + info.GetUseCompoundFile()); out_Renamed.WriteLine(" numFiles=" + info.Files().Count); out_Renamed.WriteLine(String.Format(nf, " size (MB)={0:f}", new Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) })); int docStoreOffset = info.GetDocStoreOffset(); if (docStoreOffset != -1) { out_Renamed.WriteLine(" docStoreOffset=" + docStoreOffset); out_Renamed.WriteLine(" docStoreSegment=" + info.GetDocStoreSegment()); out_Renamed.WriteLine(" docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile()); } System.String delFileName = info.GetDelFileName(); if (delFileName == null) { out_Renamed.WriteLine(" no deletions"); } else { out_Renamed.WriteLine(" has deletions [delFileName=" + delFileName + "]"); } out_Renamed.Write(" test: open reader........."); reader = SegmentReader.Get(info); int numDocs = reader.NumDocs(); toLoseDocCount = numDocs; if (reader.HasDeletions()) { out_Renamed.WriteLine("OK [" + (info.docCount - numDocs) + " deleted docs]"); } else { out_Renamed.WriteLine("OK"); } out_Renamed.Write(" test: fields, norms......."); System.Collections.IDictionary fieldNames = (System.Collections.IDictionary)reader.GetFieldNames(IndexReader.FieldOption.ALL); System.Collections.IEnumerator it = fieldNames.Keys.GetEnumerator(); while (it.MoveNext()) { System.String fieldName = (System.String)it.Current; byte[] b = reader.Norms(fieldName); if (b.Length != info.docCount) { throw new System.SystemException("norms for field \"" + fieldName + "\" is length " + b.Length + " != maxDoc " + info.docCount); } } out_Renamed.WriteLine("OK [" + fieldNames.Count + " fields]"); out_Renamed.Write(" test: terms, freq, prox..."); TermEnum termEnum = reader.Terms(); TermPositions termPositions = reader.TermPositions(); // Used only to count up # deleted docs for this // term MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader); long termCount = 0; long totFreq = 0; long totPos = 0; while (termEnum.Next()) { termCount++; Term term = termEnum.Term(); int docFreq = termEnum.DocFreq(); termPositions.Seek(term); int lastDoc = -1; int freq0 = 0; totFreq += docFreq; while (termPositions.Next()) { freq0++; int doc = termPositions.Doc(); int freq = termPositions.Freq(); if (doc <= lastDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " < lastDoc " + lastDoc); } lastDoc = doc; if (freq <= 0) { throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); } int lastPos = -1; totPos += freq; for (int j = 0; j < freq; j++) { int pos = termPositions.NextPosition(); if (pos < 0) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); } if (pos <= lastPos) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); } } } // Now count how many deleted docs occurred in // this term: int delCount; if (reader.HasDeletions()) { myTermDocs.Seek(term); while (myTermDocs.Next()) { } delCount = myTermDocs.delCount; } else { delCount = 0; } if (freq0 + delCount != docFreq) { throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount); } } out_Renamed.WriteLine("OK [" + termCount + " terms; " + totFreq + " terms/docs pairs; " + totPos + " tokens]"); out_Renamed.Write(" test: stored fields......."); int docCount = 0; long totFields = 0; for (int j = 0; j < info.docCount; j++) { if (!reader.IsDeleted(j)) { docCount++; Document doc = reader.Document(j); totFields += doc.GetFields().Count; } } if (docCount != reader.NumDocs()) { throw new System.SystemException("docCount=" + docCount + " but saw " + docCount + " undeleted docs"); } out_Renamed.WriteLine(String.Format(nf, "OK [{0:d} total field count; avg {1:f} fields per doc]", new Object[] { totFields, (((float)totFields) / docCount) })); out_Renamed.Write(" test: term vectors........"); int totVectors = 0; for (int j = 0; j < info.docCount; j++) { if (!reader.IsDeleted(j)) { TermFreqVector[] tfv = reader.GetTermFreqVectors(j); if (tfv != null) { totVectors += tfv.Length; } } } out_Renamed.WriteLine(String.Format(nf, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new Object[] { totVectors, (((float)totVectors) / docCount) })); out_Renamed.WriteLine(""); } catch (System.Exception t) { out_Renamed.WriteLine("FAILED"); System.String comment; if (doFix) { comment = "will remove reference to this segment (-fix is specified)"; } else { comment = "would remove reference to this segment (-fix was not specified)"; } out_Renamed.WriteLine(" WARNING: " + comment + "; full exception:"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); out_Renamed.WriteLine(""); totLoseDocCount += toLoseDocCount; numBadSegments++; changed = true; continue; } finally { if (reader != null) { reader.Close(); } } // Keeper newSIS.Add(info.Clone()); } if (!changed) { out_Renamed.WriteLine("No problems were detected with this index.\n"); return(true); } else { out_Renamed.WriteLine("WARNING: " + numBadSegments + " broken segments detected"); if (doFix) { out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents will be lost"); } else { out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents would be lost if -fix were specified"); } out_Renamed.WriteLine(); } if (doFix) { out_Renamed.WriteLine("NOTE: will write new segments file in 5 seconds; this will remove " + totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!"); for (int i = 0; i < 5; i++) { try { System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1000)); } catch (System.Threading.ThreadInterruptedException) { SupportClass.ThreadClass.Current().Interrupt(); i--; continue; } out_Renamed.WriteLine(" " + (5 - i) + "..."); } out_Renamed.Write("Writing..."); try { newSIS.Write(dir); } catch (System.Exception t) { out_Renamed.WriteLine("FAILED; exiting"); out_Renamed.Write(t.StackTrace); out_Renamed.Flush(); return(false); } out_Renamed.WriteLine("OK"); out_Renamed.WriteLine("Wrote new segments file \"" + newSIS.GetCurrentSegmentFileName() + "\""); } else { out_Renamed.WriteLine("NOTE: would write new segments file [-fix was not specified]"); } out_Renamed.WriteLine(""); return(false); }
// NOTE: only called from IndexWriter when a near // real-time reader is opened, or applyDeletes is run, // sharing a segment that's still being merged. This // method is not fully thread safe, and relies on the // synchronization in IndexWriter internal void LoadTermsIndex(SegmentInfo si, int termsIndexDivisor) { lock (this) { if (tis == null) { Directory dir0; if (si.GetUseCompoundFile()) { // In some cases, we were originally opened when CFS // was not used, but then we are asked to open the // terms reader with index, the segment has switched // to CFS if (cfsReader == null) { cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); } dir0 = cfsReader; } else { dir0 = dir; } tis = new TermInfosReader(dir0, segment, fieldInfos, readBufferSize, termsIndexDivisor); } } }
/// <summary>Returns true if this single info is optimized (has no /// pending norms or deletes, is in the same dir as the /// writer, and matches the current compound file setting /// </summary> private bool IsOptimized(SegmentInfo info) { bool hasDeletions = writer.NumDeletedDocs(info) > 0; return !hasDeletions && !info.HasSeparateNorms() && info.dir == writer.GetDirectory() && (info.GetUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0); }
internal virtual SegmentReader ReopenSegment(SegmentInfo si, bool doClone, bool openReadOnly) { lock (this) { bool deletionsUpToDate = (this.si.HasDeletions() == si.HasDeletions()) && (!si.HasDeletions() || this.si.GetDelFileName().Equals(si.GetDelFileName())); bool normsUpToDate = true; bool[] fieldNormsChanged = new bool[core.fieldInfos.Size()]; int fieldCount = core.fieldInfos.Size(); for (int i = 0; i < fieldCount; i++) { if (!this.si.GetNormFileName(i).Equals(si.GetNormFileName(i))) { normsUpToDate = false; fieldNormsChanged[i] = true; } } // if we're cloning we need to run through the reopenSegment logic // also if both old and new readers aren't readonly, we clone to avoid sharing modifications if (normsUpToDate && deletionsUpToDate && !doClone && openReadOnly && readOnly) { return this; } // When cloning, the incoming SegmentInfos should not // have any changes in it: System.Diagnostics.Debug.Assert(!doClone ||(normsUpToDate && deletionsUpToDate)); // clone reader SegmentReader clone; try { if (openReadOnly) clone = (SegmentReader) System.Activator.CreateInstance(READONLY_IMPL); else clone = (SegmentReader) System.Activator.CreateInstance(IMPL); } catch (System.Exception e) { throw new System.SystemException("cannot load SegmentReader class: " + e, e); } bool success = false; try { core.IncRef(); clone.core = core; clone.readOnly = openReadOnly; clone.si = si; clone.readBufferSize = readBufferSize; if (!openReadOnly && hasChanges) { // My pending changes transfer to the new reader clone.pendingDeleteCount = pendingDeleteCount; clone.deletedDocsDirty = deletedDocsDirty; clone.normsDirty = normsDirty; clone.hasChanges = hasChanges; hasChanges = false; } if (doClone) { if (deletedDocs != null) { deletedDocsRef.IncRef(); clone.deletedDocs = deletedDocs; clone.deletedDocsRef = deletedDocsRef; } } else { if (!deletionsUpToDate) { // load deleted docs System.Diagnostics.Debug.Assert(clone.deletedDocs == null); clone.LoadDeletedDocs(); } else if (deletedDocs != null) { deletedDocsRef.IncRef(); clone.deletedDocs = deletedDocs; clone.deletedDocsRef = deletedDocsRef; } } clone.SetDisableFakeNorms(GetDisableFakeNorms()); clone.norms = new System.Collections.Hashtable(); // Clone norms for (int i = 0; i < fieldNormsChanged.Length; i++) { // Clone unchanged norms to the cloned reader if (doClone || !fieldNormsChanged[i]) { System.String curField = core.fieldInfos.FieldInfo(i).name; Norm norm = (Norm) this.norms[curField]; if (norm != null) clone.norms[curField] = norm.Clone(); } } // If we are not cloning, then this will open anew // any norms that have changed: clone.OpenNorms(si.GetUseCompoundFile()?core.GetCFSReader():Directory(), readBufferSize); success = true; } finally { if (!success) { // An exception occured during reopen, we have to decRef the norms // that we incRef'ed already and close singleNormsStream and FieldsReader clone.DecRef(); } } return clone; } }
private void Initialize(SegmentInfo si, int readBufferSize, bool doOpenStores) { segment = si.name; this.si = si; this.readBufferSize = readBufferSize; bool success = false; try { // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(Directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); cfsDir = cfsReader; } Directory storeDir; if (doOpenStores) { if (si.GetDocStoreOffset() != -1) { if (si.GetDocStoreIsCompoundFile()) { storeCFSReader = new CompoundFileReader(Directory(), si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize); storeDir = storeCFSReader; } else { storeDir = Directory(); } } else { storeDir = cfsDir; } } else { storeDir = null; } fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); bool anyProx = false; int numFields = fieldInfos.Size(); for (int i = 0; !anyProx && i < numFields; i++) { if (!fieldInfos.FieldInfo(i).omitTf) { anyProx = true; } } System.String fieldsSegment; if (si.GetDocStoreOffset() != -1) { fieldsSegment = si.GetDocStoreSegment(); } else { fieldsSegment = segment; } if (doOpenStores) { fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); // Verify two sources of "maxDoc" agree: if (si.GetDocStoreOffset() == -1 && fieldsReader.Size() != si.docCount) { throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount); } } tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize); LoadDeletedDocs(); // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq", readBufferSize); if (anyProx) { proxStream = cfsDir.OpenInput(segment + ".prx", readBufferSize); } OpenNorms(cfsDir, readBufferSize); if (doOpenStores && fieldInfos.HasVectors()) { // open term vector files only as needed System.String vectorsSegment; if (si.GetDocStoreOffset() != -1) { vectorsSegment = si.GetDocStoreSegment(); } else { vectorsSegment = segment; } termVectorsReaderOrig = new TermVectorsReader(storeDir, vectorsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { DoClose(); } } }
internal static bool UsesCompoundFile(SegmentInfo si) { return si.GetUseCompoundFile(); }
/// <summary>Determine index files that are no longer referenced /// and therefore should be deleted. This is called once /// (by the writer), and then subsequently we add onto /// deletable any files that are no longer needed at the /// point that we create the unused file (eg when merging /// segments), and we only remove from deletable when a /// file is successfully deleted. /// </summary> public void FindDeletableFiles() { // Gather all "current" segments: System.Collections.Hashtable current = new System.Collections.Hashtable(); for (int j = 0; j < segmentInfos.Count; j++) { SegmentInfo segmentInfo = (SegmentInfo)segmentInfos[j]; current[segmentInfo.name] = segmentInfo; } // Then go through all files in the Directory that are // Lucene index files, and add to deletable if they are // not referenced by the current segments info: System.String segmentsInfosFileName = segmentInfos.GetCurrentSegmentFileName(); IndexFileNameFilter filter = IndexFileNameFilter.GetFilter(); System.String[] files = directory.List(); for (int i = 0; i < files.Length; i++) { if (filter.Accept(null, files[i]) && !files[i].Equals(segmentsInfosFileName) && !files[i].Equals(IndexFileNames.SEGMENTS_GEN)) { System.String segmentName; System.String extension; // First remove any extension: int loc = files[i].IndexOf((System.Char) '.'); if (loc != -1) { extension = files[i].Substring(1 + loc); segmentName = files[i].Substring(0, (loc) - (0)); } else { extension = null; segmentName = files[i]; } // Then, remove any generation count: loc = segmentName.IndexOf((System.Char) '_', 1); if (loc != -1) { segmentName = segmentName.Substring(0, (loc) - (0)); } // Delete this file if it's not a "current" segment, // or, it is a single index file but there is now a // corresponding compound file: bool doDelete = false; if (!current.ContainsKey(segmentName)) { // Delete if segment is not referenced: doDelete = true; } else { // OK, segment is referenced, but file may still // be orphan'd: SegmentInfo info = (SegmentInfo)current[segmentName]; if (filter.IsCFSFile(files[i]) && info.GetUseCompoundFile()) { // This file is in fact stored in a CFS file for // this segment: doDelete = true; } else { Pattern p = new System.Text.RegularExpressions.Regex("s\\d+"); if ("del".Equals(extension)) { // This is a _segmentName_N.del file: if (!files[i].Equals(info.GetDelFileName())) { // If this is a seperate .del file, but it // doesn't match the current del filename for // this segment, then delete it: doDelete = true; } } else if (extension != null && extension.StartsWith("s") && p.Match(extension).Success) { int field = System.Int32.Parse(extension.Substring(1)); // This is a _segmentName_N.sX file: if (!files[i].Equals(info.GetNormFileName(field))) { // This is an orphan'd separate norms file: doDelete = true; } } else if ("cfs".Equals(extension) && !info.GetUseCompoundFile()) { // This is a partially written // _segmentName.cfs: doDelete = true; } } } if (doDelete) { AddDeletableFile(files[i]); if (infoStream != null) { infoStream.WriteLine("IndexFileDeleter: file \"" + files[i] + "\" is unreferenced in index and will be deleted on next commit"); } } } } }