/// <summary> Writer calls this when it has hit an error and had to /// roll back, to tell us that there may now be /// unreferenced files in the filesystem. So we re-list /// the filesystem and delete such files. If segmentName /// is non-null, we will only delete files corresponding to /// that segment. /// </summary> public void Refresh(System.String segmentName) { System.String[] files = directory.ListAll(); IndexFileNameFilter filter = IndexFileNameFilter.GetFilter(); System.String segmentPrefix1; System.String segmentPrefix2; if (segmentName != null) { segmentPrefix1 = segmentName + "."; segmentPrefix2 = segmentName + "_"; } else { segmentPrefix1 = null; segmentPrefix2 = null; } for (int i = 0; i < files.Length; i++) { System.String fileName = files[i]; if (filter.Accept(null, fileName) && (segmentName == null || fileName.StartsWith(segmentPrefix1) || fileName.StartsWith(segmentPrefix2)) && !refCounts.ContainsKey(fileName) && !fileName.Equals(IndexFileNames.SEGMENTS_GEN)) { // Unreferenced file, so remove it if (infoStream != null) { Message("refresh [prefix=" + segmentName + "]: removing newly created unreferenced file \"" + fileName + "\""); } DeleteFile(fileName); } } }
/// <summary> /// Returns a list of files in a give directory. /// </summary> /// <param name="fullName">The full path name to the directory.</param> /// <param name="indexFileNameFilter"></param> /// <returns>An array containing the files.</returns> public static System.String[] GetLuceneIndexFiles(System.String fullName, Lucene.Net.Index.IndexFileNameFilter indexFileNameFilter) { System.IO.DirectoryInfo dInfo = new System.IO.DirectoryInfo(fullName); System.Collections.ArrayList list = new System.Collections.ArrayList(); foreach (System.IO.FileInfo fInfo in dInfo.GetFiles()) { if (indexFileNameFilter.Accept(fInfo, fInfo.Name) == true) { list.Add(fInfo.Name); } } System.String[] retFiles = new System.String[list.Count]; list.CopyTo(retFiles); return(retFiles); }
/* * Return all files referenced by this SegmentInfo. The * returns List is a locally cached List so you should not * modify it. */ public IList <string> Files() { if (files != null) { // Already cached: return(files); } var fileList = new System.Collections.Generic.List <string>(); bool useCompoundFile = GetUseCompoundFile(); if (useCompoundFile) { fileList.Add(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION); } else { System.String[] exts = IndexFileNames.NON_STORE_INDEX_EXTENSIONS; for (int i = 0; i < exts.Length; i++) { AddIfExists(fileList, name + "." + exts[i]); } } if (docStoreOffset != -1) { // We are sharing doc stores (stored fields, term // vectors) with other segments System.Diagnostics.Debug.Assert(docStoreSegment != null); if (docStoreIsCompoundFile) { fileList.Add(docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION); } else { System.String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS; for (int i = 0; i < exts.Length; i++) { AddIfExists(fileList, docStoreSegment + "." + exts[i]); } } } else if (!useCompoundFile) { // We are not sharing, and, these files were not // included in the compound file System.String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS; for (int i = 0; i < exts.Length; i++) { AddIfExists(fileList, name + "." + exts[i]); } } System.String delFileName = IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen); if (delFileName != null && (delGen >= YES || dir.FileExists(delFileName))) { fileList.Add(delFileName); } // Careful logic for norms files if (normGen != null) { for (int i = 0; i < normGen.Length; i++) { long gen = normGen[i]; if (gen >= YES) { // Definitely a separate norm file, with generation: fileList.Add(IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i, gen)); } else if (NO == gen) { // No separate norms but maybe plain norms // in the non compound file case: if (!hasSingleNormFile && !useCompoundFile) { System.String fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i; if (dir.FileExists(fileName)) { fileList.Add(fileName); } } } else if (CHECK_DIR == gen) { // Pre-2.1: we have to check file existence System.String fileName = null; if (useCompoundFile) { fileName = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i; } else if (!hasSingleNormFile) { fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i; } if (fileName != null && dir.FileExists(fileName)) { fileList.Add(fileName); } } } } else if (preLockless || (!hasSingleNormFile && !useCompoundFile)) { // Pre-2.1: we have to scan the dir to find all // matching _X.sN/_X.fN files for our segment: System.String prefix; if (useCompoundFile) { prefix = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION; } else { prefix = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION; } int prefixLength = prefix.Length; System.String[] allFiles = dir.ListAll(); IndexFileNameFilter filter = IndexFileNameFilter.Filter; for (int i = 0; i < allFiles.Length; i++) { System.String fileName = allFiles[i]; if (filter.Accept(null, fileName) && fileName.Length > prefixLength && System.Char.IsDigit(fileName[prefixLength]) && fileName.StartsWith(prefix)) { fileList.Add(fileName); } } } //System.Diagnostics.Debug.Assert(); files = fileList; return(files); }
/// <summary> Returns true if any fields in this segment have separate norms.</summary> public bool HasSeparateNorms() { if (normGen == null) { if (!preLockless) { // This means we were created w/ LOCKLESS code and no // norms are written yet: return(false); } else { // This means this segment was saved with pre-LOCKLESS // code. So we must fallback to the original // directory list check: System.String[] result = dir.ListAll(); if (result == null) { throw new System.IO.IOException("cannot read directory " + dir + ": ListAll() returned null"); } IndexFileNameFilter filter = IndexFileNameFilter.Filter; System.String pattern; pattern = name + ".s"; int patternLength = pattern.Length; for (int i = 0; i < result.Length; i++) { string fileName = result[i]; if (filter.Accept(null, fileName) && fileName.StartsWith(pattern) && char.IsDigit(fileName[patternLength])) { return(true); } } return(false); } } else { // This means this segment was saved with LOCKLESS // code so we first check whether any normGen's are >= 1 // (meaning they definitely have separate norms): for (int i = 0; i < normGen.Length; i++) { if (normGen[i] >= YES) { return(true); } } // Next we look for any == 0. These cases were // pre-LOCKLESS and must be checked in directory: for (int i = 0; i < normGen.Length; i++) { if (normGen[i] == CHECK_DIR) { if (HasSeparateNorms(i)) { return(true); } } } } return(false); }
/// <summary> Initialize the deleter: find all previous commits in /// the Directory, incref the files they reference, call /// the policy to let it delete commits. The incoming /// segmentInfos must have been loaded from a commit point /// and not yet modified. This will remove any files not /// referenced by any of the commits. /// </summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, System.IO.TextWriter infoStream, DocumentsWriter docWriter) { this.docWriter = docWriter; this.infoStream = infoStream; if (infoStream != null) { Message("init: current segments file is \"" + segmentInfos.GetCurrentSegmentFileName() + "\"; deletionPolicy=" + policy); } this.policy = policy; this.directory = directory; // First pass: walk the files and initialize our ref // counts: long currentGen = segmentInfos.GetGeneration(); IndexFileNameFilter filter = IndexFileNameFilter.GetFilter(); System.String[] files = directory.List(); if (files == null) { throw new System.IO.IOException("cannot read directory " + directory + ": list() returned null"); } CommitPoint currentCommitPoint = null; for (int i = 0; i < files.Length; i++) { System.String fileName = files[i]; if (filter.Accept(null, fileName) && !fileName.Equals(IndexFileNames.SEGMENTS_GEN)) { // Add this file to refCounts with initial count 0: GetRefCount(fileName); if (fileName.StartsWith(IndexFileNames.SEGMENTS)) { // This is a commit (segments or segments_N), and // it's valid (<= the max gen). Load it, then // incref all files it refers to: if (SegmentInfos.GenerationFromSegmentsFileName(fileName) <= currentGen) { if (infoStream != null) { Message("init: load commit \"" + fileName + "\""); } SegmentInfos sis = new SegmentInfos(); try { sis.Read(directory, fileName); } catch (System.IO.FileNotFoundException e) { // LUCENE-948: on NFS (and maybe others), if // you have writers switching back and forth // between machines, it's very likely that the // dir listing will be stale and will claim a // file segments_X exists when in fact it // doesn't. So, we catch this and handle it // as if the file does not exist if (infoStream != null) { Message("init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point"); } sis = null; } if (sis != null) { CommitPoint commitPoint = new CommitPoint(this, sis); if (sis.GetGeneration() == segmentInfos.GetGeneration()) { currentCommitPoint = commitPoint; } commits.Add(commitPoint); IncRef(sis, true); } } } } } if (currentCommitPoint == null) { // We did not in fact see the segments_N file // corresponding to the segmentInfos that was passed // in. Yet, it must exist, because our caller holds // the write lock. This can happen when the directory // listing was stale (eg when index accessed via NFS // client with stale directory listing cache). So we // try now to explicitly open this commit point: SegmentInfos sis = new SegmentInfos(); try { sis.Read(directory, segmentInfos.GetCurrentSegmentFileName()); } catch (System.IO.IOException e) { throw new CorruptIndexException("failed to locate current segments_N file"); } if (infoStream != null) { Message("forced open of current segments file " + segmentInfos.GetCurrentSegmentFileName()); } currentCommitPoint = new CommitPoint(this, sis); commits.Add(currentCommitPoint); IncRef(sis, true); } // We keep commits list in sorted order (oldest to newest): commits.Sort(); // Now delete anything with ref count at 0. These are // presumably abandoned files eg due to crash of // IndexWriter. System.Collections.IEnumerator it = refCounts.Keys.GetEnumerator(); while (it.MoveNext()) { System.String fileName = (System.String)it.Current; RefCount rc = (RefCount)refCounts[fileName]; if (0 == rc.count) { if (infoStream != null) { Message("init: removing unreferenced file \"" + fileName + "\""); } DeleteFile(fileName); } } // Finally, give policy a chance to remove things on // startup: policy.OnInit(commits); // It's OK for the onInit to remove the current commit // point; we just have to checkpoint our in-memory // SegmentInfos to protect those files that it uses: if (currentCommitPoint.deleted) { Checkpoint(segmentInfos, false); } DeleteCommits(); }
/// <summary> Initialize the deleter: find all previous commits in /// the Directory, incref the files they reference, call /// the policy to let it delete commits. This will remove /// any files not referenced by any of the commits. /// </summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, System.IO.StreamWriter infoStream, DocumentsWriter docWriter, HashSet <string> synced) { this.docWriter = docWriter; this.infoStream = infoStream; this.synced = synced; if (infoStream != null) { Message("init: current segments file is \"" + segmentInfos.GetCurrentSegmentFileName() + "\"; deletionPolicy=" + policy); } this.policy = policy; this.directory = directory; // First pass: walk the files and initialize our ref // counts: long currentGen = segmentInfos.Generation; IndexFileNameFilter filter = IndexFileNameFilter.Filter; System.String[] files = directory.ListAll(); CommitPoint currentCommitPoint = null; for (int i = 0; i < files.Length; i++) { System.String fileName = files[i]; if (filter.Accept(null, fileName) && !fileName.Equals(IndexFileNames.SEGMENTS_GEN)) { // Add this file to refCounts with initial count 0: GetRefCount(fileName); if (fileName.StartsWith(IndexFileNames.SEGMENTS)) { // This is a commit (segments or segments_N), and // it's valid (<= the max gen). Load it, then // incref all files it refers to: if (infoStream != null) { Message("init: load commit \"" + fileName + "\""); } SegmentInfos sis = new SegmentInfos(); try { sis.Read(directory, fileName); } catch (System.IO.FileNotFoundException) { // LUCENE-948: on NFS (and maybe others), if // you have writers switching back and forth // between machines, it's very likely that the // dir listing will be stale and will claim a // file segments_X exists when in fact it // doesn't. So, we catch this and handle it // as if the file does not exist if (infoStream != null) { Message("init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point"); } sis = null; } catch (System.IO.IOException) { if (SegmentInfos.GenerationFromSegmentsFileName(fileName) <= currentGen) { throw; } else { // Most likely we are opening an index that // has an aborted "future" commit, so suppress // exc in this case sis = null; } } if (sis != null) { CommitPoint commitPoint = new CommitPoint(this, commitsToDelete, directory, sis); if (sis.Generation == segmentInfos.Generation) { currentCommitPoint = commitPoint; } commits.Add(commitPoint); IncRef(sis, true); if (lastSegmentInfos == null || sis.Generation > lastSegmentInfos.Generation) { lastSegmentInfos = sis; } } } } } if (currentCommitPoint == null) { // We did not in fact see the segments_N file // corresponding to the segmentInfos that was passed // in. Yet, it must exist, because our caller holds // the write lock. This can happen when the directory // listing was stale (eg when index accessed via NFS // client with stale directory listing cache). So we // try now to explicitly open this commit point: SegmentInfos sis = new SegmentInfos(); try { sis.Read(directory, segmentInfos.GetCurrentSegmentFileName()); } catch (System.IO.IOException) { throw new CorruptIndexException("failed to locate current segments_N file"); } if (infoStream != null) { Message("forced open of current segments file " + segmentInfos.GetCurrentSegmentFileName()); } currentCommitPoint = new CommitPoint(this, commitsToDelete, directory, sis); commits.Add(currentCommitPoint); IncRef(sis, true); } // We keep commits list in sorted order (oldest to newest): commits.Sort(); // Now delete anything with ref count at 0. These are // presumably abandoned files eg due to crash of // IndexWriter. foreach (KeyValuePair <string, RefCount> entry in refCounts) { string fileName = entry.Key; RefCount rc = refCounts[fileName]; if (0 == rc.count) { if (infoStream != null) { Message("init: removing unreferenced file \"" + fileName + "\""); } DeleteFile(fileName); } } // Finally, give policy a chance to remove things on // startup: policy.OnInit(commits); // Always protect the incoming segmentInfos since // sometime it may not be the most recent commit Checkpoint(segmentInfos, false); startingCommitDeleted = currentCommitPoint.IsDeleted; DeleteCommits(); }
/// <summary>Determine index files that are no longer referenced /// and therefore should be deleted. This is called once /// (by the writer), and then subsequently we add onto /// deletable any files that are no longer needed at the /// point that we create the unused file (eg when merging /// segments), and we only remove from deletable when a /// file is successfully deleted. /// </summary> public void FindDeletableFiles() { // Gather all "current" segments: System.Collections.Hashtable current = new System.Collections.Hashtable(); for (int j = 0; j < segmentInfos.Count; j++) { SegmentInfo segmentInfo = (SegmentInfo)segmentInfos[j]; current[segmentInfo.name] = segmentInfo; } // Then go through all files in the Directory that are // Lucene index files, and add to deletable if they are // not referenced by the current segments info: System.String segmentsInfosFileName = segmentInfos.GetCurrentSegmentFileName(); IndexFileNameFilter filter = IndexFileNameFilter.GetFilter(); System.String[] files = directory.List(); for (int i = 0; i < files.Length; i++) { if (filter.Accept(null, files[i]) && !files[i].Equals(segmentsInfosFileName) && !files[i].Equals(IndexFileNames.SEGMENTS_GEN)) { System.String segmentName; System.String extension; // First remove any extension: int loc = files[i].IndexOf((System.Char) '.'); if (loc != -1) { extension = files[i].Substring(1 + loc); segmentName = files[i].Substring(0, (loc) - (0)); } else { extension = null; segmentName = files[i]; } // Then, remove any generation count: loc = segmentName.IndexOf((System.Char) '_', 1); if (loc != -1) { segmentName = segmentName.Substring(0, (loc) - (0)); } // Delete this file if it's not a "current" segment, // or, it is a single index file but there is now a // corresponding compound file: bool doDelete = false; if (!current.ContainsKey(segmentName)) { // Delete if segment is not referenced: doDelete = true; } else { // OK, segment is referenced, but file may still // be orphan'd: SegmentInfo info = (SegmentInfo)current[segmentName]; if (filter.IsCFSFile(files[i]) && info.GetUseCompoundFile()) { // This file is in fact stored in a CFS file for // this segment: doDelete = true; } else { Pattern p = new System.Text.RegularExpressions.Regex("s\\d+"); if ("del".Equals(extension)) { // This is a _segmentName_N.del file: if (!files[i].Equals(info.GetDelFileName())) { // If this is a seperate .del file, but it // doesn't match the current del filename for // this segment, then delete it: doDelete = true; } } else if (extension != null && extension.StartsWith("s") && p.Match(extension).Success) { int field = System.Int32.Parse(extension.Substring(1)); // This is a _segmentName_N.sX file: if (!files[i].Equals(info.GetNormFileName(field))) { // This is an orphan'd separate norms file: doDelete = true; } } else if ("cfs".Equals(extension) && !info.GetUseCompoundFile()) { // This is a partially written // _segmentName.cfs: doDelete = true; } } } if (doDelete) { AddDeletableFile(files[i]); if (infoStream != null) { infoStream.WriteLine("IndexFileDeleter: file \"" + files[i] + "\" is unreferenced in index and will be deleted on next commit"); } } } } }