/// <summary> /// Does the actual (time-consuming) work of the merge, /// but without holding synchronized lock on IndexWriter /// instance /// </summary> private int MergeMiddle(MergePolicy.OneMerge merge) { merge.CheckAborted(directory); string mergedName = merge.Info_Renamed.Info.Name; IList<SegmentCommitInfo> sourceSegments = merge.Segments; IOContext context = new IOContext(merge.MergeInfo); MergeState.CheckAbort checkAbort = new MergeState.CheckAbort(merge, directory); TrackingDirectoryWrapper dirWrapper = new TrackingDirectoryWrapper(directory); if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "merging " + SegString(merge.Segments)); } merge.Readers = new List<SegmentReader>(); // this is try/finally to make sure merger's readers are // closed: bool success = false; try { int segUpto = 0; while (segUpto < sourceSegments.Count) { SegmentCommitInfo info = sourceSegments[segUpto]; // Hold onto the "live" reader; we will use this to // commit merged deletes ReadersAndUpdates rld = readerPool.Get(info, true); // Carefully pull the most recent live docs and reader SegmentReader reader; Bits liveDocs; int delCount; lock (this) { // Must sync to ensure BufferedDeletesStream cannot change liveDocs, // pendingDeleteCount and field updates while we pull a copy: reader = rld.GetReaderForMerge(context); liveDocs = rld.ReadOnlyLiveDocs; delCount = rld.PendingDeleteCount + info.DelCount; Debug.Assert(reader != null); Debug.Assert(rld.VerifyDocCounts()); if (infoStream.IsEnabled("IW")) { if (rld.PendingDeleteCount != 0) { infoStream.Message("IW", "seg=" + SegString(info) + " delCount=" + info.DelCount + " pendingDelCount=" + rld.PendingDeleteCount); } else if (info.DelCount != 0) { infoStream.Message("IW", "seg=" + SegString(info) + " delCount=" + info.DelCount); } else { infoStream.Message("IW", "seg=" + SegString(info) + " no deletes"); } } } // Deletes might have happened after we pulled the merge reader and // before we got a read-only copy of the segment's actual live docs // (taking pending deletes into account). In that case we need to // make a new reader with updated live docs and del count. if (reader.NumDeletedDocs() != delCount) { // fix the reader's live docs and del count Debug.Assert(delCount > reader.NumDeletedDocs()); // beware of zombies SegmentReader newReader = new SegmentReader(info, reader, liveDocs, info.Info.DocCount - delCount); bool released = false; try { rld.Release(reader); released = true; } finally { if (!released) { newReader.DecRef(); } } reader = newReader; } merge.Readers.Add(reader); Debug.Assert(delCount <= info.Info.DocCount, "delCount=" + delCount + " info.docCount=" + info.Info.DocCount + " rld.pendingDeleteCount=" + rld.PendingDeleteCount + " info.getDelCount()=" + info.DelCount); segUpto++; } // System.out.println("[" + Thread.currentThread().getName() + "] IW.mergeMiddle: merging " + merge.getMergeReaders()); // we pass merge.getMergeReaders() instead of merge.readers to allow the // OneMerge to return a view over the actual segments to merge SegmentMerger merger = new SegmentMerger(merge.MergeReaders, merge.Info_Renamed.Info, infoStream, dirWrapper, Config_Renamed.TermIndexInterval, checkAbort, GlobalFieldNumberMap, context, Config_Renamed.CheckIntegrityAtMerge); merge.CheckAborted(directory); // this is where all the work happens: MergeState mergeState; bool success3 = false; try { if (!merger.ShouldMerge()) { // would result in a 0 document segment: nothing to merge! mergeState = new MergeState(new List<AtomicReader>(), merge.Info_Renamed.Info, infoStream, checkAbort); } else { mergeState = merger.Merge(); } success3 = true; } finally { if (!success3) { lock (this) { Deleter.Refresh(merge.Info_Renamed.Info.Name); } } } Debug.Assert(mergeState.SegmentInfo == merge.Info_Renamed.Info); merge.Info_Renamed.Info.Files = new HashSet<string>(dirWrapper.CreatedFiles); // Record which codec was used to write the segment if (infoStream.IsEnabled("IW")) { if (merge.Info_Renamed.Info.DocCount == 0) { infoStream.Message("IW", "merge away fully deleted segments"); } else { infoStream.Message("IW", "merge codec=" + Codec + " docCount=" + merge.Info_Renamed.Info.DocCount + "; merged segment has " + (mergeState.FieldInfos.HasVectors() ? "vectors" : "no vectors") + "; " + (mergeState.FieldInfos.HasNorms() ? "norms" : "no norms") + "; " + (mergeState.FieldInfos.HasDocValues() ? "docValues" : "no docValues") + "; " + (mergeState.FieldInfos.HasProx() ? "prox" : "no prox") + "; " + (mergeState.FieldInfos.HasProx() ? "freqs" : "no freqs")); } } // Very important to do this before opening the reader // because codec must know if prox was written for // this segment: //System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" + merge.info.name); bool useCompoundFile; lock (this) // Guard segmentInfos { useCompoundFile = mergePolicy.UseCompoundFile(segmentInfos, merge.Info_Renamed); } if (useCompoundFile) { success = false; ICollection<string> filesToRemove = merge.Info_Renamed.Files(); try { filesToRemove = CreateCompoundFile(infoStream, directory, checkAbort, merge.Info_Renamed.Info, context); success = true; } catch (System.IO.IOException ioe) { lock (this) { if (merge.Aborted) { // this can happen if rollback or close(false) // is called -- fall through to logic below to // remove the partially created CFS: } else { HandleMergeException(ioe, merge); } } } catch (Exception t) { HandleMergeException(t, merge); } finally { if (!success) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "hit exception creating compound file during merge"); } lock (this) { Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_EXTENSION)); Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); Deleter.DeleteNewFiles(merge.Info_Renamed.Files()); } } } // So that, if we hit exc in deleteNewFiles (next) // or in commitMerge (later), we close the // per-segment readers in the finally clause below: success = false; lock (this) { // delete new non cfs files directly: they were never // registered with IFD Deleter.DeleteNewFiles(filesToRemove); if (merge.Aborted) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "abort merge after building CFS"); } Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_EXTENSION)); Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); return 0; } } merge.Info_Renamed.Info.UseCompoundFile = true; } else { // So that, if we hit exc in commitMerge (later), // we close the per-segment readers in the finally // clause below: success = false; } // Have codec write SegmentInfo. Must do this after // creating CFS so that 1) .si isn't slurped into CFS, // and 2) .si reflects useCompoundFile=true change // above: bool success2 = false; try { Codec.SegmentInfoFormat().SegmentInfoWriter.Write(directory, merge.Info_Renamed.Info, mergeState.FieldInfos, context); success2 = true; } finally { if (!success2) { lock (this) { Deleter.DeleteNewFiles(merge.Info_Renamed.Files()); } } } // TODO: ideally we would freeze merge.info here!! // because any changes after writing the .si will be // lost... if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", string.Format(CultureInfo.InvariantCulture, "merged segment size=%.3f MB vs estimate=%.3f MB", merge.Info_Renamed.SizeInBytes() / 1024.0 / 1024.0, merge.EstimatedMergeBytes / 1024 / 1024.0)); } IndexReaderWarmer mergedSegmentWarmer = Config_Renamed.MergedSegmentWarmer; if (PoolReaders && mergedSegmentWarmer != null && merge.Info_Renamed.Info.DocCount != 0) { ReadersAndUpdates rld = readerPool.Get(merge.Info_Renamed, true); SegmentReader sr = rld.GetReader(IOContext.READ); try { mergedSegmentWarmer.Warm(sr); } finally { lock (this) { rld.Release(sr); readerPool.Release(rld); } } } // Force READ context because we merge deletes onto // this reader: if (!CommitMerge(merge, mergeState)) { // commitMerge will return false if this merge was // aborted return 0; } success = true; } finally { // Readers are already closed in commitMerge if we didn't hit // an exc: if (!success) { CloseMergeReaders(merge, true); } } return merge.Info_Renamed.Info.DocCount; }
// note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!! public SegmentMerger(IList <AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir, int termIndexInterval, MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context, bool validate) { // validate incoming readers if (validate) { foreach (AtomicReader reader in readers) { reader.CheckIntegrity(); } } MergeState = new MergeState(readers, segmentInfo, infoStream, checkAbort); Directory = dir; this.TermIndexInterval = termIndexInterval; this.Codec = segmentInfo.Codec; this.Context = context; this.FieldInfosBuilder = new FieldInfos.Builder(fieldNumbers); MergeState.SegmentInfo.DocCount = SetDocMaps(); }