public static void ReadLegacyInfos(SegmentInfos infos, Directory directory, IndexInput input, int format) { infos.Version = input.ReadLong(); // read version infos.Counter = input.ReadInt(); // read counter Lucene3xSegmentInfoReader reader = new Lucene3xSegmentInfoReader(); for (int i = input.ReadInt(); i > 0; i--) // read segmentInfos { SegmentCommitInfo siPerCommit = reader.ReadLegacySegmentInfo(directory, format, input); SegmentInfo si = siPerCommit.Info; if (si.Version == null) { // Could be a 3.0 - try to open the doc stores - if it fails, it's a // 2.x segment, and an IndexFormatTooOldException will be thrown, // which is what we want. Directory dir = directory; if (Lucene3xSegmentInfoFormat.GetDocStoreOffset(si) != -1) { if (Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si)) { dir = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(Lucene3xSegmentInfoFormat.GetDocStoreSegment(si), "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), IOContext.READONCE, false); } } else if (si.UseCompoundFile) { dir = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), IOContext.READONCE, false); } try { Lucene3xStoredFieldsReader.CheckCodeVersion(dir, Lucene3xSegmentInfoFormat.GetDocStoreSegment(si)); } finally { // If we opened the directory, close it if (dir != directory) { dir.Dispose(); } } // Above call succeeded, so it's a 3.0 segment. Upgrade it so the next // time the segment is read, its version won't be null and we won't // need to open FieldsReader every time for each such segment. si.Version = "3.0"; } else if (si.Version.Equals("2.x")) { // If it's a 3x index touched by 3.1+ code, then segments record their // version, whether they are 2.x ones or not. We detect that and throw // appropriate exception. throw new IndexFormatTooOldException("segment " + si.Name + " in resource " + input, si.Version); } infos.Add(siPerCommit); } infos.UserData = input.ReadStringStringMap(); }
public /*protected internal*/ override System.Object DoBody(System.String segmentFileName) { var infos = new SegmentInfos(); infos.Read(directory, segmentFileName); if (readOnly) return new ReadOnlyDirectoryReader(directory, infos, deletionPolicy, termInfosIndexDivisor); else return new DirectoryReader(directory, infos, deletionPolicy, false, termInfosIndexDivisor); }
internal int[] starts; // used for binary search of mapped docID #endregion Fields #region Constructors public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount) { this.docMaps = docMaps; SegmentInfo firstSegment = merge.segments.Info(0); int i = 0; while (true) { SegmentInfo info = infos.Info(i); if (info.Equals(firstSegment)) break; minDocID += info.docCount; i++; } int numDocs = 0; for (int j = 0; j < docMaps.Length; i++, j++) { numDocs += infos.Info(i).docCount; System.Diagnostics.Debug.Assert(infos.Info(i).Equals(merge.segments.Info(j))); } maxDocID = minDocID + numDocs; starts = new int[docMaps.Length]; newStarts = new int[docMaps.Length]; starts[0] = minDocID; newStarts[0] = minDocID; for (i = 1; i < docMaps.Length; i++) { int lastDocCount = merge.segments.Info(i - 1).docCount; starts[i] = starts[i - 1] + lastDocCount; newStarts[i] = newStarts[i - 1] + lastDocCount - delCounts[i - 1]; } docShift = numDocs - mergedDocCount; // There are rare cases when docShift is 0. It happens // if you try to delete a docID that's out of bounds, // because the SegmentReader still allocates deletedDocs // and pretends it has deletions ... so we can't make // this assert here // assert docShift > 0; // Make sure it all adds up: System.Diagnostics.Debug.Assert(docShift == maxDocID - (newStarts[docMaps.Length - 1] + merge.segments.Info(docMaps.Length - 1).docCount - delCounts[docMaps.Length - 1])); }
/// <summary>Walk through all files referenced by the current /// segmentInfos and ask the Directory to sync each file, /// if it wasn't already. If that succeeds, then we /// prepare a new segments_N file but do not fully commit /// it. /// </summary> private void StartCommit(long sizeInBytes, IDictionary<string, string> commitUserData) { System.Diagnostics.Debug.Assert(TestPoint("startStartCommit")); // TODO: as of LUCENE-2095, we can simplify this method, // since only 1 thread can be in here at once if (hitOOM) { throw new System.SystemException("this writer hit an OutOfMemoryError; cannot commit"); } try { if (infoStream != null) Message("startCommit(): start sizeInBytes=" + sizeInBytes); SegmentInfos toSync = null; long myChangeCount; lock (this) { // Wait for any running addIndexes to complete // first, then block any from running until we've // copied the segmentInfos we intend to sync: BlockAddIndexes(false); // On commit the segmentInfos must never // reference a segment in another directory: System.Diagnostics.Debug.Assert(!HasExternalSegments()); try { System.Diagnostics.Debug.Assert(lastCommitChangeCount <= changeCount); myChangeCount = changeCount; if (changeCount == lastCommitChangeCount) { if (infoStream != null) Message(" skip startCommit(): no changes pending"); return ; } // First, we clone & incref the segmentInfos we intend // to sync, then, without locking, we sync() each file // referenced by toSync, in the background. Multiple // threads can be doing this at once, if say a large // merge and a small merge finish at the same time: if (infoStream != null) Message("startCommit index=" + SegString(segmentInfos) + " changeCount=" + changeCount); readerPool.Commit(); // It's possible another flush (that did not close // the open do stores) snuck in after the flush we // just did, so we remove any tail segments // referencing the open doc store from the // SegmentInfos we are about to sync (the main // SegmentInfos will keep them): toSync = (SegmentInfos) segmentInfos.Clone(); string dss = docWriter.DocStoreSegment; if (dss != null) { while (true) { String dss2 = toSync.Info(toSync.Count - 1).DocStoreSegment; if (dss2 == null || !dss2.Equals(dss)) { break; } toSync.RemoveAt(toSync.Count - 1); changeCount++; } } if (commitUserData != null) toSync.UserData = commitUserData; deleter.IncRef(toSync, false); ICollection<string> files = toSync.Files(directory, false); foreach(string fileName in files) { System.Diagnostics.Debug.Assert(directory.FileExists(fileName), "file " + fileName + " does not exist"); // If this trips it means we are missing a call to // .checkpoint somewhere, because by the time we // are called, deleter should know about every // file referenced by the current head // segmentInfos: System.Diagnostics.Debug.Assert(deleter.Exists(fileName)); } } finally { ResumeAddIndexes(); } } System.Diagnostics.Debug.Assert(TestPoint("midStartCommit")); bool setPending = false; try { // Loop until all files toSync references are sync'd: while (true) { ICollection<string> pending = new List<string>(); IEnumerator<string> it = toSync.Files(directory, false).GetEnumerator(); while (it.MoveNext()) { string fileName = it.Current; if (StartSync(fileName, pending)) { bool success = false; try { // Because we incRef'd this commit point, above, // the file had better exist: System.Diagnostics.Debug.Assert(directory.FileExists(fileName), "file '" + fileName + "' does not exist dir=" + directory); if (infoStream != null) Message("now sync " + fileName); directory.Sync(fileName); success = true; } finally { FinishSync(fileName, success); } } } // All files that I require are either synced or being // synced by other threads. If they are being synced, // we must at this point block until they are done. // If this returns false, that means an error in // another thread resulted in failing to actually // sync one of our files, so we repeat: if (WaitForAllSynced(pending)) break; } System.Diagnostics.Debug.Assert(TestPoint("midStartCommit2")); lock (this) { // If someone saved a newer version of segments file // since I first started syncing my version, I can // safely skip saving myself since I've been // superseded: while (true) { if (myChangeCount <= lastCommitChangeCount) { if (infoStream != null) { Message("sync superseded by newer infos"); } break; } else if (pendingCommit == null) { // My turn to commit if (segmentInfos.Generation > toSync.Generation) toSync.UpdateGeneration(segmentInfos); bool success = false; try { // Exception here means nothing is prepared // (this method unwinds everything it did on // an exception) try { toSync.PrepareCommit(directory); } finally { // Have our master segmentInfos record the // generations we just prepared. We do this // on error or success so we don't // double-write a segments_N file. segmentInfos.UpdateGeneration(toSync); } System.Diagnostics.Debug.Assert(pendingCommit == null); setPending = true; pendingCommit = toSync; pendingCommitChangeCount = (uint) myChangeCount; success = true; } finally { if (!success && infoStream != null) Message("hit exception committing segments file"); } break; } else { // Must wait for other commit to complete DoWait(); } } } if (infoStream != null) Message("done all syncs"); System.Diagnostics.Debug.Assert(TestPoint("midStartCommitSuccess")); } finally { lock (this) { if (!setPending) deleter.DecRef(toSync); } } } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "startCommit"); } System.Diagnostics.Debug.Assert(TestPoint("finishStartCommit")); }
/// <summary>Forcefully clear changes for the specifed segments, /// and remove from the pool. This is called on succesful merge. /// </summary> internal virtual void Clear(SegmentInfos infos) { lock (this) { if (infos == null) { foreach(KeyValuePair<SegmentInfo, SegmentReader> ent in readerMap) { ent.Value.hasChanges = false; } } else { foreach(SegmentInfo info in infos) { if (readerMap.ContainsKey(info)) { readerMap[info].hasChanges = false; } } } } }
/// <summary> Merges all segments from an array of indexes into this /// index. /// /// <p/>This may be used to parallelize batch indexing. A large document /// collection can be broken into sub-collections. Each sub-collection can be /// indexed in parallel, on a different thread, process or machine. The /// complete index can then be created by merging sub-collection indexes /// with this method. /// /// <p/><b>NOTE:</b> the index in each Directory must not be /// changed (opened by a writer) while this method is /// running. This method does not acquire a write lock in /// each input Directory, so it is up to the caller to /// enforce this. /// /// <p/><b>NOTE:</b> while this is running, any attempts to /// add or delete documents (with another thread) will be /// paused until this method completes. /// /// <p/>This method is transactional in how Exceptions are /// handled: it does not commit a new segments_N file until /// all indexes are added. This means if an Exception /// occurs (for example disk full), then either no indexes /// will have been added or they all will have been.<p/> /// /// <p/>Note that this requires temporary free space in the /// Directory up to 2X the sum of all input indexes /// (including the starting index). If readers/searchers /// are open against the starting index, then temporary /// free space required will be higher by the size of the /// starting index (see <see cref="Optimize()" /> for details). /// <p/> /// /// <p/>Once this completes, the final size of the index /// will be less than the sum of all input index sizes /// (including the starting index). It could be quite a /// bit smaller (if there were many pending deletes) or /// just slightly smaller.<p/> /// /// <p/> /// This requires this index not be among those to be added. /// /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError /// you should immediately close the writer. See <a /// href="#OOME">above</a> for details.<p/> /// /// </summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public virtual void AddIndexesNoOptimize(params Directory[] dirs) { EnsureOpen(); NoDupDirs(dirs); // Do not allow add docs or deletes while we are running: docWriter.PauseAllThreads(); try { if (infoStream != null) Message("flush at addIndexesNoOptimize"); Flush(true, false, true); bool success = false; StartTransaction(false); try { int docCount = 0; lock (this) { EnsureOpen(); for (int i = 0; i < dirs.Length; i++) { if (directory == dirs[i]) { // cannot add this index: segments may be deleted in merge before added throw new System.ArgumentException("Cannot add this index to itself"); } SegmentInfos sis = new SegmentInfos(); // read infos from dir sis.Read(dirs[i]); for (int j = 0; j < sis.Count; j++) { SegmentInfo info = sis.Info(j); System.Diagnostics.Debug.Assert(!segmentInfos.Contains(info), "dup info dir=" + info.dir + " name=" + info.name); docCount += info.docCount; segmentInfos.Add(info); // add each info } } } // Notify DocumentsWriter that the flushed count just increased docWriter.UpdateFlushedDocCount(docCount); MaybeMerge(); EnsureOpen(); // If after merging there remain segments in the index // that are in a different directory, just copy these // over into our index. This is necessary (before // finishing the transaction) to avoid leaving the // index in an unusable (inconsistent) state. ResolveExternalSegments(); EnsureOpen(); success = true; } finally { if (success) { CommitTransaction(); } else { RollbackTransaction(); } } } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "addIndexesNoOptimize"); } finally { if (docWriter != null) { docWriter.ResumeAllThreads(); } } }
/* * Commits the transaction. This will write the new * segments file and remove and pending deletions we have * accumulated during the transaction */ private void CommitTransaction() { lock (this) { if (infoStream != null) Message("now commit transaction"); // Give deleter a chance to remove files now: Checkpoint(); // Remove the incRef we did in startTransaction. deleter.DecRef(localRollbackSegmentInfos); localRollbackSegmentInfos = null; System.Diagnostics.Debug.Assert(!HasExternalSegments()); FinishAddIndexes(); } }
/* * Begin a transaction. During a transaction, any segment * merges that happen (or ram segments flushed) will not * write a new segments file and will not remove any files * that were present at the start of the transaction. You * must make a matched (try/finally) call to * commitTransaction() or rollbackTransaction() to finish * the transaction. * * Note that buffered documents and delete terms are not handled * within the transactions, so they must be flushed before the * transaction is started. */ private void StartTransaction(bool haveReadLock) { lock (this) { bool success = false; try { if (infoStream != null) Message("now start transaction"); System.Diagnostics.Debug.Assert(docWriter.GetNumBufferedDeleteTerms() == 0 , "calling startTransaction with buffered delete terms not supported: numBufferedDeleteTerms=" + docWriter.GetNumBufferedDeleteTerms()); System.Diagnostics.Debug.Assert(docWriter.NumDocsInRAM == 0 , "calling startTransaction with buffered documents not supported: numDocsInRAM=" + docWriter.NumDocsInRAM); EnsureOpen(); // If a transaction is trying to roll back (because // addIndexes hit an exception) then wait here until // that's done: lock (this) { while (stopMerges) DoWait(); } success = true; } finally { // Release the write lock if our caller held it, on // hitting an exception if (!success && haveReadLock) ReleaseRead(); } if (haveReadLock) { UpgradeReadToWrite(); } else { AcquireWrite(); } success = false; try { localRollbackSegmentInfos = (SegmentInfos) segmentInfos.Clone(); System.Diagnostics.Debug.Assert(!HasExternalSegments()); localFlushedDocCount = docWriter.GetFlushedDocCount(); // Remove the incRef we did in startTransaction: deleter.IncRef(segmentInfos, false); success = true; } finally { if (!success) FinishAddIndexes(); } } }
private void Init(Directory d, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy, int maxFieldLength, DocumentsWriter.IndexingChain indexingChain, IndexCommit commit) { directory = d; analyzer = a; SetMessageID(defaultInfoStream); this.maxFieldLength = maxFieldLength; if (indexingChain == null) indexingChain = DocumentsWriter.DefaultIndexingChain; if (create) { // Clear the write lock in case it's leftover: directory.ClearLock(WRITE_LOCK_NAME); } Lock writeLock = directory.MakeLock(WRITE_LOCK_NAME); if (!writeLock.Obtain(writeLockTimeout)) // obtain write lock { throw new LockObtainFailedException("Index locked for write: " + writeLock); } this.writeLock = writeLock; // save it bool success = false; try { if (create) { // Try to read first. This is to allow create // against an index that's currently open for // searching. In this case we write the next // segments_N file with no segments: bool doCommit; try { segmentInfos.Read(directory); segmentInfos.Clear(); doCommit = false; } catch (System.IO.IOException) { // Likely this means it's a fresh directory doCommit = true; } if (doCommit) { // Only commit if there is no segments file // in this dir already. segmentInfos.Commit(directory); synced.UnionWith(segmentInfos.Files(directory, true)); } else { // Record that we have a change (zero out all // segments) pending: changeCount++; } } else { segmentInfos.Read(directory); if (commit != null) { // Swap out all segments, but, keep metadata in // SegmentInfos, like version & generation, to // preserve write-once. This is important if // readers are open against the future commit // points. if (commit.Directory != directory) throw new System.ArgumentException("IndexCommit's directory doesn't match my directory"); SegmentInfos oldInfos = new SegmentInfos(); oldInfos.Read(directory, commit.SegmentsFileName); segmentInfos.Replace(oldInfos); changeCount++; if (infoStream != null) Message("init: loaded commit \"" + commit.SegmentsFileName + "\""); } // We assume that this segments_N was previously // properly sync'd: synced.UnionWith(segmentInfos.Files(directory, true)); } SetRollbackSegmentInfos(segmentInfos); docWriter = new DocumentsWriter(directory, this, indexingChain); docWriter.SetInfoStream(infoStream); docWriter.SetMaxFieldLength(maxFieldLength); // Default deleter (for backwards compatibility) is // KeepOnlyLastCommitDeleter: deleter = new IndexFileDeleter(directory, deletionPolicy == null?new KeepOnlyLastCommitDeletionPolicy():deletionPolicy, segmentInfos, infoStream, docWriter, synced); if (deleter.startingCommitDeleted) // Deletion policy deleted the "head" commit point. // We have to mark ourself as changed so that if we // are closed w/o any further changes we write a new // segments_N file. changeCount++; PushMaxBufferedDocs(); if (infoStream != null) { Message("init: create=" + create); MessageState(); } success = true; } finally { if (!success) { if (infoStream != null) { Message("init: hit exception on init; releasing write lock"); } try { writeLock.Release(); } catch (Exception) { // don't mask the original exception } writeLock = null; } } }
/// <summary>Called whenever a merge has completed and the merged segments had deletions </summary> internal void RemapDeletes(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergeDocCount) { lock (this) { if (docMaps == null) // The merged segments had no deletes so docIDs did not change and we have nothing to do return ; MergeDocIDRemapper mapper = new MergeDocIDRemapper(infos, docMaps, delCounts, merge, mergeDocCount); deletesInRAM.Remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount); deletesFlushed.Remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount); flushedDocCount -= mapper.docShift; } }
/// <summary>Returns a <see cref="Status" /> instance detailing /// the state of the index. /// /// </summary> /// <param name="onlySegments">list of specific segment names to check /// /// <p/>As this method checks every byte in the specified /// segments, on a large index it can take quite a long /// time to run. /// /// <p/><b>WARNING</b>: make sure /// you only call this when the index is not opened by any /// writer. /// </param> public virtual Status CheckIndex_Renamed_Method(List<string> onlySegments) { System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); Status result = new Status(); result.dir = dir; try { sis.Read(dir); } catch (System.Exception t) { Msg("ERROR: could not read any segments file in directory"); result.missingSegments = true; if (infoStream != null) infoStream.WriteLine(t.StackTrace); return result; } int numSegments = sis.Count; var segmentsFileName = sis.GetCurrentSegmentFileName(); IndexInput input = null; try { input = dir.OpenInput(segmentsFileName); } catch (System.Exception t) { Msg("ERROR: could not open segments file in directory"); if (infoStream != null) infoStream.WriteLine(t.StackTrace); result.cantOpenSegments = true; return result; } int format = 0; try { format = input.ReadInt(); } catch (System.Exception t) { Msg("ERROR: could not read segment file version in directory"); if (infoStream != null) infoStream.WriteLine(t.StackTrace); result.missingSegmentVersion = true; return result; } finally { if (input != null) input.Close(); } System.String sFormat = ""; bool skip = false; if (format == SegmentInfos.FORMAT) sFormat = "FORMAT [Lucene Pre-2.1]"; if (format == SegmentInfos.FORMAT_LOCKLESS) sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; else { if (format == SegmentInfos.FORMAT_CHECKSUM) sFormat = "FORMAT_CHECKSUM [Lucene 2.4]"; else if (format == SegmentInfos.FORMAT_DEL_COUNT) sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]"; else if (format == SegmentInfos.FORMAT_HAS_PROX) sFormat = "FORMAT_HAS_PROX [Lucene 2.4]"; else if (format == SegmentInfos.FORMAT_USER_DATA) sFormat = "FORMAT_USER_DATA [Lucene 2.9]"; else if (format == SegmentInfos.FORMAT_DIAGNOSTICS) sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]"; else if (format < SegmentInfos.CURRENT_FORMAT) { sFormat = "int=" + format + " [newer version of Lucene than this tool]"; skip = true; } else { sFormat = format + " [Lucene 1.3 or prior]"; } } result.segmentsFileName = segmentsFileName; result.numSegments = numSegments; result.segmentFormat = sFormat; result.userData = sis.UserData; System.String userDataString; if (sis.UserData.Count > 0) { userDataString = " userData=" + CollectionsHelper.CollectionToString(sis.UserData); } else { userDataString = ""; } Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString); if (onlySegments != null) { result.partial = true; if (infoStream != null) infoStream.Write("\nChecking only these segments:"); foreach(string s in onlySegments) { if (infoStream != null) { infoStream.Write(" " + s); } } result.segmentsChecked.AddRange(onlySegments); Msg(":"); } if (skip) { Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); result.toolOutOfDate = true; return result; } result.newSegments = (SegmentInfos) sis.Clone(); result.newSegments.Clear(); for (int i = 0; i < numSegments; i++) { SegmentInfo info = sis.Info(i); if (onlySegments != null && !onlySegments.Contains(info.name)) continue; var segInfoStat = new Status.SegmentInfoStatus(); result.segmentInfos.Add(segInfoStat); Msg(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); segInfoStat.name = info.name; segInfoStat.docCount = info.docCount; int toLoseDocCount = info.docCount; SegmentReader reader = null; try { Msg(" compound=" + info.GetUseCompoundFile()); segInfoStat.compound = info.GetUseCompoundFile(); Msg(" hasProx=" + info.HasProx); segInfoStat.hasProx = info.HasProx; Msg(" numFiles=" + info.Files().Count); segInfoStat.numFiles = info.Files().Count; Msg(System.String.Format(nf, " size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) })); segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0); IDictionary<string, string> diagnostics = info.Diagnostics; segInfoStat.diagnostics = diagnostics; if (diagnostics.Count > 0) { Msg(" diagnostics = " + CollectionsHelper.CollectionToString(diagnostics)); } int docStoreOffset = info.DocStoreOffset; if (docStoreOffset != - 1) { Msg(" docStoreOffset=" + docStoreOffset); segInfoStat.docStoreOffset = docStoreOffset; Msg(" docStoreSegment=" + info.DocStoreSegment); segInfoStat.docStoreSegment = info.DocStoreSegment; Msg(" docStoreIsCompoundFile=" + info.DocStoreIsCompoundFile); segInfoStat.docStoreCompoundFile = info.DocStoreIsCompoundFile; } System.String delFileName = info.GetDelFileName(); if (delFileName == null) { Msg(" no deletions"); segInfoStat.hasDeletions = false; } else { Msg(" has deletions [delFileName=" + delFileName + "]"); segInfoStat.hasDeletions = true; segInfoStat.deletionsFileName = delFileName; } if (infoStream != null) infoStream.Write(" test: open reader........."); reader = SegmentReader.Get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); segInfoStat.openReaderPassed = true; int numDocs = reader.NumDocs(); toLoseDocCount = numDocs; if (reader.HasDeletions) { if (reader.deletedDocs.Count() != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (reader.deletedDocs.Count() > reader.MaxDoc) { throw new System.SystemException("too many deleted docs: MaxDoc=" + reader.MaxDoc + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (info.docCount - numDocs != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } segInfoStat.numDeleted = info.docCount - numDocs; Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]"); } else { if (info.GetDelCount() != 0) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } Msg("OK"); } if (reader.MaxDoc != info.docCount) throw new System.SystemException("SegmentReader.MaxDoc " + reader.MaxDoc + " != SegmentInfos.docCount " + info.docCount); // Test getFieldNames() if (infoStream != null) { infoStream.Write(" test: fields.............."); } ICollection<string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL); Msg("OK [" + fieldNames.Count + " fields]"); segInfoStat.numFields = fieldNames.Count; // Test Field Norms segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader); // Test the Term Index segInfoStat.termIndexStatus = TestTermIndex(info, reader); // Test Stored Fields segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf); // Test Term Vectors segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf); // Rethrow the first exception we encountered // This will cause stats for failed segments to be incremented properly if (segInfoStat.fieldNormStatus.error != null) { throw new SystemException("Field Norm test failed"); } else if (segInfoStat.termIndexStatus.error != null) { throw new SystemException("Term Index test failed"); } else if (segInfoStat.storedFieldStatus.error != null) { throw new SystemException("Stored Field test failed"); } else if (segInfoStat.termVectorStatus.error != null) { throw new System.SystemException("Term Vector test failed"); } Msg(""); } catch (System.Exception t) { Msg("FAILED"); const string comment = "fixIndex() would remove reference to this segment"; Msg(" WARNING: " + comment + "; full exception:"); if (infoStream != null) infoStream.WriteLine(t.StackTrace); Msg(""); result.totLoseDocCount += toLoseDocCount; result.numBadSegments++; continue; } finally { if (reader != null) reader.Close(); } // Keeper result.newSegments.Add((SegmentInfo)info.Clone()); } if (0 == result.numBadSegments) { result.clean = true; Msg("No problems were detected with this index.\n"); } else Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected"); return result; }
// Remaps all buffered deletes based on a completed // merge internal virtual void Remap(MergeDocIDRemapper mapper, SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergeDocCount) { lock (this) { IDictionary<Term, Num> newDeleteTerms; // Remap delete-by-term if (terms.Count > 0) { if (doTermSort) { newDeleteTerms = new SortedDictionary<Term, Num>(); } else { newDeleteTerms = new HashMap<Term, Num>(); } foreach(var entry in terms) { Num num = entry.Value; newDeleteTerms[entry.Key] = new Num(mapper.Remap(num.GetNum())); } } else newDeleteTerms = null; // Remap delete-by-docID List<int> newDeleteDocIDs; if (docIDs.Count > 0) { newDeleteDocIDs = new List<int>(docIDs.Count); foreach(int num in docIDs) { newDeleteDocIDs.Add(mapper.Remap(num)); } } else newDeleteDocIDs = null; // Remap delete-by-query HashMap<Query, int> newDeleteQueries; if (queries.Count > 0) { newDeleteQueries = new HashMap<Query, int>(queries.Count); foreach(var entry in queries) { int num = entry.Value; newDeleteQueries[entry.Key] = mapper.Remap(num); } } else newDeleteQueries = null; if (newDeleteTerms != null) terms = newDeleteTerms; if (newDeleteDocIDs != null) docIDs = newDeleteDocIDs; if (newDeleteQueries != null) queries = newDeleteQueries; } }
internal ReadOnlyDirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor):base(writer, infos, termInfosIndexDivisor) { }
internal ReadOnlyDirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, System.Collections.Generic.IDictionary<string, byte[]> oldNormsCache, bool doClone, int termInfosIndexDivisor) : base(directory, infos, oldReaders, oldStarts, oldNormsCache, true, doClone, termInfosIndexDivisor) { }
internal ReadOnlyDirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, int termInfosIndexDivisor):base(directory, sis, deletionPolicy, true, termInfosIndexDivisor) { }
private long getEpoch(Directory taxoDir) { SegmentInfos infos = new SegmentInfos(); infos.Read(taxoDir); return Convert.ToInt64(infos.UserData[DirectoryTaxonomyWriter.INDEX_EPOCH]); }
/// <summary> /// Reads the commit data from a Directory. </summary> private static IDictionary<string, string> ReadCommitData(Directory dir) { SegmentInfos infos = new SegmentInfos(); infos.Read(dir); return infos.UserData; }
internal bool ApplyDeletes(SegmentInfos infos) { lock (this) { if (!HasDeletes()) return false; if (infoStream != null) Message("apply " + deletesFlushed.numTerms + " buffered deleted terms and " + deletesFlushed.docIDs.Count + " deleted docIDs and " + deletesFlushed.queries.Count + " deleted queries on " + (+ infos.Count) + " segments."); int infosEnd = infos.Count; int docStart = 0; bool any = false; for (int i = 0; i < infosEnd; i++) { // Make sure we never attempt to apply deletes to // segment in external dir System.Diagnostics.Debug.Assert(infos.Info(i).dir == directory); SegmentReader reader = writer.readerPool.Get(infos.Info(i), false); try { any |= ApplyDeletes(reader, docStart); docStart += reader.MaxDoc; } finally { writer.readerPool.Release(reader); } } deletesFlushed.Clear(); return any; } }
private void SetRollbackSegmentInfos(SegmentInfos infos) { lock (this) { rollbackSegmentInfos = (SegmentInfos) infos.Clone(); System.Diagnostics.Debug.Assert(!rollbackSegmentInfos.HasExternalSegments(directory)); rollbackSegments = new HashMap<SegmentInfo, int?>(); int size = rollbackSegmentInfos.Count; for (int i = 0; i < size; i++) rollbackSegments[rollbackSegmentInfos.Info(i)] = i; } }
/// <summary> Determine what set of merge operations are now necessary on the index. /// <see cref="IndexWriter" /> calls this whenever there is a change to the segments. /// This call is always synchronized on the <see cref="IndexWriter" /> instance so /// only one thread at a time will call this method. /// /// </summary> /// <param name="segmentInfos">the total set of segments in the index /// </param> public abstract MergeSpecification FindMerges(SegmentInfos segmentInfos);
/* * Rolls back the transaction and restores state to where * we were at the start. */ private void RollbackTransaction() { lock (this) { if (infoStream != null) Message("now rollback transaction"); if (docWriter != null) { docWriter.SetFlushedDocCount(localFlushedDocCount); } // Must finish merges before rolling back segmentInfos // so merges don't hit exceptions on trying to commit // themselves, don't get files deleted out from under // them, etc: FinishMerges(false); // Keep the same segmentInfos instance but replace all // of its SegmentInfo instances. This is so the next // attempt to commit using this instance of IndexWriter // will always write to a new generation ("write once"). segmentInfos.Clear(); segmentInfos.AddRange(localRollbackSegmentInfos); localRollbackSegmentInfos = null; // This must come after we rollback segmentInfos, so // that if a commit() kicks off it does not see the // segmentInfos with external segments FinishAddIndexes(); // Ask deleter to locate unreferenced files we had // created & remove them: deleter.Checkpoint(segmentInfos, false); // Remove the incRef we did in startTransaction: deleter.DecRef(segmentInfos); // Also ask deleter to remove any newly created files // that were never incref'd; this "garbage" is created // when a merge kicks off but aborts part way through // before it had a chance to incRef the files it had // partially created deleter.Refresh(); System.Threading.Monitor.PulseAll(this); System.Diagnostics.Debug.Assert(!HasExternalSegments()); } }
/// <summary> Determine what set of merge operations is necessary in order to optimize /// the index. <see cref="IndexWriter" /> calls this when its /// <see cref="IndexWriter.Optimize()" /> method is called. This call is always /// synchronized on the <see cref="IndexWriter" /> instance so only one thread at a /// time will call this method. /// /// </summary> /// <param name="segmentInfos">the total set of segments in the index /// </param> /// <param name="maxSegmentCount">requested maximum number of segments in the index (currently this /// is always 1) /// </param> /// <param name="segmentsToOptimize">contains the specific SegmentInfo instances that must be merged /// away. This may be a subset of all SegmentInfos. /// </param> public abstract MergeSpecification FindMergesForOptimize(SegmentInfos segmentInfos, int maxSegmentCount, ISet<SegmentInfo> segmentsToOptimize);
private void RollbackInternal() { bool success = false; if (infoStream != null) { Message("rollback"); } docWriter.PauseAllThreads(); try { FinishMerges(false); // Must pre-close these two, in case they increment // changeCount so that we can then set it to false // before calling closeInternal mergePolicy.Close(); mergeScheduler.Close(); lock (this) { if (pendingCommit != null) { pendingCommit.RollbackCommit(directory); deleter.DecRef(pendingCommit); pendingCommit = null; System.Threading.Monitor.PulseAll(this); } // Keep the same segmentInfos instance but replace all // of its SegmentInfo instances. This is so the next // attempt to commit using this instance of IndexWriter // will always write to a new generation ("write // once"). segmentInfos.Clear(); segmentInfos.AddRange(rollbackSegmentInfos); System.Diagnostics.Debug.Assert(!HasExternalSegments()); docWriter.Abort(); System.Diagnostics.Debug.Assert(TestPoint("rollback before checkpoint")); // Ask deleter to locate unreferenced files & remove // them: deleter.Checkpoint(segmentInfos, false); deleter.Refresh(); } // Don't bother saving any changes in our segmentInfos readerPool.Clear(null); lastCommitChangeCount = changeCount; success = true; } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "rollbackInternal"); } finally { lock (this) { if (!success) { docWriter.ResumeAllThreads(); closing = false; System.Threading.Monitor.PulseAll(this); if (infoStream != null) Message("hit exception during rollback"); } } } CloseInternal(false); }
/// <summary> Determine what set of merge operations is necessary in order to expunge all /// deletes from the index. /// /// </summary> /// <param name="segmentInfos">the total set of segments in the index /// </param> public abstract MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos);
private void FinishCommit() { lock (this) { if (pendingCommit != null) { try { if (infoStream != null) Message("commit: pendingCommit != null"); pendingCommit.FinishCommit(directory); if (infoStream != null) Message("commit: wrote segments file \"" + pendingCommit.GetCurrentSegmentFileName() + "\""); lastCommitChangeCount = pendingCommitChangeCount; segmentInfos.UpdateGeneration(pendingCommit); segmentInfos.UserData = pendingCommit.UserData; SetRollbackSegmentInfos(pendingCommit); deleter.Checkpoint(pendingCommit, true); } finally { deleter.DecRef(pendingCommit); pendingCommit = null; System.Threading.Monitor.PulseAll(this); } } else if (infoStream != null) { Message("commit: pendingCommit == null; skip"); } if (infoStream != null) { Message("commit: done"); } } }
/// <summary> Returns true if a newly flushed (not from merge) /// segment should use the compound file format. /// </summary> public abstract bool UseCompoundFile(SegmentInfos segments, SegmentInfo newSegment);
private System.String SegString(SegmentInfos infos) { lock (this) { System.Text.StringBuilder buffer = new System.Text.StringBuilder(); int count = infos.Count; for (int i = 0; i < count; i++) { if (i > 0) { buffer.Append(' '); } SegmentInfo info = infos.Info(i); buffer.Append(info.SegString(directory)); if (info.dir != directory) buffer.Append("**"); } return buffer.ToString(); } }
/// <summary> Returns true if the doc store files should use the /// compound file format. /// </summary> public abstract bool UseCompoundDocStore(SegmentInfos segments);
internal virtual bool NrtIsCurrent(SegmentInfos infos) { lock (this) { if (!infos.Equals(segmentInfos)) { // if any structural changes (new segments), we are // stale return false; } else if (infos.Generation != segmentInfos.Generation) { // if any commit took place since we were opened, we // are stale return false; } else { return !docWriter.AnyChanges; } } }
public OneMerge(SegmentInfos segments, bool useCompoundFile) { if (0 == segments.Count) throw new ArgumentException("segments must include at least one segment", "segments"); this.segments = segments; this.useCompoundFile = useCompoundFile; }