/// <summary> /// Does the actual (time-consuming) work of the merge, /// but without holding synchronized lock on IndexWriter /// instance /// </summary> private int MergeMiddle(MergePolicy.OneMerge merge) { merge.CheckAborted(directory); string mergedName = merge.Info_Renamed.Info.Name; IList<SegmentCommitInfo> sourceSegments = merge.Segments; IOContext context = new IOContext(merge.MergeInfo); MergeState.CheckAbort checkAbort = new MergeState.CheckAbort(merge, directory); TrackingDirectoryWrapper dirWrapper = new TrackingDirectoryWrapper(directory); if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "merging " + SegString(merge.Segments)); } merge.Readers = new List<SegmentReader>(); // this is try/finally to make sure merger's readers are // closed: bool success = false; try { int segUpto = 0; while (segUpto < sourceSegments.Count) { SegmentCommitInfo info = sourceSegments[segUpto]; // Hold onto the "live" reader; we will use this to // commit merged deletes ReadersAndUpdates rld = readerPool.Get(info, true); // Carefully pull the most recent live docs and reader SegmentReader reader; Bits liveDocs; int delCount; lock (this) { // Must sync to ensure BufferedDeletesStream cannot change liveDocs, // pendingDeleteCount and field updates while we pull a copy: reader = rld.GetReaderForMerge(context); liveDocs = rld.ReadOnlyLiveDocs; delCount = rld.PendingDeleteCount + info.DelCount; Debug.Assert(reader != null); Debug.Assert(rld.VerifyDocCounts()); if (infoStream.IsEnabled("IW")) { if (rld.PendingDeleteCount != 0) { infoStream.Message("IW", "seg=" + SegString(info) + " delCount=" + info.DelCount + " pendingDelCount=" + rld.PendingDeleteCount); } else if (info.DelCount != 0) { infoStream.Message("IW", "seg=" + SegString(info) + " delCount=" + info.DelCount); } else { infoStream.Message("IW", "seg=" + SegString(info) + " no deletes"); } } } // Deletes might have happened after we pulled the merge reader and // before we got a read-only copy of the segment's actual live docs // (taking pending deletes into account). In that case we need to // make a new reader with updated live docs and del count. if (reader.NumDeletedDocs() != delCount) { // fix the reader's live docs and del count Debug.Assert(delCount > reader.NumDeletedDocs()); // beware of zombies SegmentReader newReader = new SegmentReader(info, reader, liveDocs, info.Info.DocCount - delCount); bool released = false; try { rld.Release(reader); released = true; } finally { if (!released) { newReader.DecRef(); } } reader = newReader; } merge.Readers.Add(reader); Debug.Assert(delCount <= info.Info.DocCount, "delCount=" + delCount + " info.docCount=" + info.Info.DocCount + " rld.pendingDeleteCount=" + rld.PendingDeleteCount + " info.getDelCount()=" + info.DelCount); segUpto++; } // System.out.println("[" + Thread.currentThread().getName() + "] IW.mergeMiddle: merging " + merge.getMergeReaders()); // we pass merge.getMergeReaders() instead of merge.readers to allow the // OneMerge to return a view over the actual segments to merge SegmentMerger merger = new SegmentMerger(merge.MergeReaders, merge.Info_Renamed.Info, infoStream, dirWrapper, Config_Renamed.TermIndexInterval, checkAbort, GlobalFieldNumberMap, context, Config_Renamed.CheckIntegrityAtMerge); merge.CheckAborted(directory); // this is where all the work happens: MergeState mergeState; bool success3 = false; try { if (!merger.ShouldMerge()) { // would result in a 0 document segment: nothing to merge! mergeState = new MergeState(new List<AtomicReader>(), merge.Info_Renamed.Info, infoStream, checkAbort); } else { mergeState = merger.Merge(); } success3 = true; } finally { if (!success3) { lock (this) { Deleter.Refresh(merge.Info_Renamed.Info.Name); } } } Debug.Assert(mergeState.SegmentInfo == merge.Info_Renamed.Info); merge.Info_Renamed.Info.Files = new HashSet<string>(dirWrapper.CreatedFiles); // Record which codec was used to write the segment if (infoStream.IsEnabled("IW")) { if (merge.Info_Renamed.Info.DocCount == 0) { infoStream.Message("IW", "merge away fully deleted segments"); } else { infoStream.Message("IW", "merge codec=" + Codec + " docCount=" + merge.Info_Renamed.Info.DocCount + "; merged segment has " + (mergeState.FieldInfos.HasVectors() ? "vectors" : "no vectors") + "; " + (mergeState.FieldInfos.HasNorms() ? "norms" : "no norms") + "; " + (mergeState.FieldInfos.HasDocValues() ? "docValues" : "no docValues") + "; " + (mergeState.FieldInfos.HasProx() ? "prox" : "no prox") + "; " + (mergeState.FieldInfos.HasProx() ? "freqs" : "no freqs")); } } // Very important to do this before opening the reader // because codec must know if prox was written for // this segment: //System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" + merge.info.name); bool useCompoundFile; lock (this) // Guard segmentInfos { useCompoundFile = mergePolicy.UseCompoundFile(segmentInfos, merge.Info_Renamed); } if (useCompoundFile) { success = false; ICollection<string> filesToRemove = merge.Info_Renamed.Files(); try { filesToRemove = CreateCompoundFile(infoStream, directory, checkAbort, merge.Info_Renamed.Info, context); success = true; } catch (System.IO.IOException ioe) { lock (this) { if (merge.Aborted) { // this can happen if rollback or close(false) // is called -- fall through to logic below to // remove the partially created CFS: } else { HandleMergeException(ioe, merge); } } } catch (Exception t) { HandleMergeException(t, merge); } finally { if (!success) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "hit exception creating compound file during merge"); } lock (this) { Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_EXTENSION)); Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); Deleter.DeleteNewFiles(merge.Info_Renamed.Files()); } } } // So that, if we hit exc in deleteNewFiles (next) // or in commitMerge (later), we close the // per-segment readers in the finally clause below: success = false; lock (this) { // delete new non cfs files directly: they were never // registered with IFD Deleter.DeleteNewFiles(filesToRemove); if (merge.Aborted) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "abort merge after building CFS"); } Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_EXTENSION)); Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); return 0; } } merge.Info_Renamed.Info.UseCompoundFile = true; } else { // So that, if we hit exc in commitMerge (later), // we close the per-segment readers in the finally // clause below: success = false; } // Have codec write SegmentInfo. Must do this after // creating CFS so that 1) .si isn't slurped into CFS, // and 2) .si reflects useCompoundFile=true change // above: bool success2 = false; try { Codec.SegmentInfoFormat().SegmentInfoWriter.Write(directory, merge.Info_Renamed.Info, mergeState.FieldInfos, context); success2 = true; } finally { if (!success2) { lock (this) { Deleter.DeleteNewFiles(merge.Info_Renamed.Files()); } } } // TODO: ideally we would freeze merge.info here!! // because any changes after writing the .si will be // lost... if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", string.Format(CultureInfo.InvariantCulture, "merged segment size=%.3f MB vs estimate=%.3f MB", merge.Info_Renamed.SizeInBytes() / 1024.0 / 1024.0, merge.EstimatedMergeBytes / 1024 / 1024.0)); } IndexReaderWarmer mergedSegmentWarmer = Config_Renamed.MergedSegmentWarmer; if (PoolReaders && mergedSegmentWarmer != null && merge.Info_Renamed.Info.DocCount != 0) { ReadersAndUpdates rld = readerPool.Get(merge.Info_Renamed, true); SegmentReader sr = rld.GetReader(IOContext.READ); try { mergedSegmentWarmer.Warm(sr); } finally { lock (this) { rld.Release(sr); readerPool.Release(rld); } } } // Force READ context because we merge deletes onto // this reader: if (!CommitMerge(merge, mergeState)) { // commitMerge will return false if this merge was // aborted return 0; } success = true; } finally { // Readers are already closed in commitMerge if we didn't hit // an exc: if (!success) { CloseMergeReaders(merge, true); } } return merge.Info_Renamed.Info.DocCount; }
/// <summary> /// Merges the provided indexes into this index. /// /// <p> /// The provided IndexReaders are not closed. /// /// <p> /// See <seealso cref="#addIndexes"/> for details on transactional semantics, temporary /// free space required in the Directory, and non-CFS segments on an Exception. /// /// <p> /// <b>NOTE</b>: if this method hits an OutOfMemoryError you should immediately /// close the writer. See <a href="#OOME">above</a> for details. /// /// <p> /// <b>NOTE:</b> empty segments are dropped by this method and not added to this /// index. /// /// <p> /// <b>NOTE:</b> this method merges all given <seealso cref="IndexReader"/>s in one /// merge. If you intend to merge a large number of readers, it may be better /// to call this method multiple times, each time with a small set of readers. /// In principle, if you use a merge policy with a {@code mergeFactor} or /// {@code maxMergeAtOnce} parameter, you should pass that many readers in one /// call. Also, if the given readers are <seealso cref="DirectoryReader"/>s, they can be /// opened with {@code termIndexInterval=-1} to save RAM, since during merge /// the in-memory structure is not used. See /// <seealso cref="DirectoryReader#open(Directory, int)"/>. /// /// <p> /// <b>NOTE</b>: if you call <seealso cref="#close(boolean)"/> with <tt>false</tt>, which /// aborts all running merges, then any thread still running this method might /// hit a <seealso cref="MergePolicy.MergeAbortedException"/>. /// </summary> /// <exception cref="CorruptIndexException"> /// if the index is corrupt </exception> /// <exception cref="IOException"> /// if there is a low-level IO error </exception> public virtual void AddIndexes(params IndexReader[] readers) { EnsureOpen(); int numDocs = 0; try { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "flush at addIndexes(IndexReader...)"); } Flush(false, true); string mergedName = NewSegmentName(); IList<AtomicReader> mergeReaders = new List<AtomicReader>(); foreach (IndexReader indexReader in readers) { numDocs += indexReader.NumDocs(); foreach (AtomicReaderContext ctx in indexReader.Leaves()) { mergeReaders.Add(ctx.AtomicReader); } } IOContext context = new IOContext(new MergeInfo(numDocs, -1, true, -1)); // TODO: somehow we should fix this merge so it's // abortable so that IW.close(false) is able to stop it TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory); SegmentInfo info = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergedName, -1, false, Codec, null); SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir, Config_Renamed.TermIndexInterval, MergeState.CheckAbort.NONE, GlobalFieldNumberMap, context, Config_Renamed.CheckIntegrityAtMerge); if (!merger.ShouldMerge()) { return; } MergeState mergeState; bool success = false; try { mergeState = merger.Merge(); // merge 'em success = true; } finally { if (!success) { lock (this) { Deleter.Refresh(info.Name); } } } SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, 0, -1L, -1L); info.Files = new HashSet<string>(trackingDir.CreatedFiles); trackingDir.CreatedFiles.Clear(); SetDiagnostics(info, SOURCE_ADDINDEXES_READERS); bool useCompoundFile; lock (this) // Guard segmentInfos { if (StopMerges) { Deleter.DeleteNewFiles(infoPerCommit.Files()); return; } EnsureOpen(); useCompoundFile = mergePolicy.UseCompoundFile(segmentInfos, infoPerCommit); } // Now create the compound file if needed if (useCompoundFile) { ICollection<string> filesToDelete = infoPerCommit.Files(); try { CreateCompoundFile(infoStream, directory, MergeState.CheckAbort.NONE, info, context); } finally { // delete new non cfs files directly: they were never // registered with IFD lock (this) { Deleter.DeleteNewFiles(filesToDelete); } } info.UseCompoundFile = true; } // Have codec write SegmentInfo. Must do this after // creating CFS so that 1) .si isn't slurped into CFS, // and 2) .si reflects useCompoundFile=true change // above: success = false; try { Codec.SegmentInfoFormat().SegmentInfoWriter.Write(trackingDir, info, mergeState.FieldInfos, context); success = true; } finally { if (!success) { lock (this) { Deleter.Refresh(info.Name); } } } info.AddFiles(trackingDir.CreatedFiles); // Register the new segment lock (this) { if (StopMerges) { Deleter.DeleteNewFiles(info.Files); return; } EnsureOpen(); segmentInfos.Add(infoPerCommit); Checkpoint(); } } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "addIndexes(IndexReader...)"); } }