/// <summary>Does the actual (time-consuming) work of the merge, /// but without holding synchronized lock on IndexWriter /// instance /// </summary> private int MergeMiddle(MergePolicy.OneMerge merge) { merge.CheckAborted(directory); System.String mergedName = merge.info.name; SegmentMerger merger = null; int mergedDocCount = 0; SegmentInfos sourceSegments = merge.segments; int numSegments = sourceSegments.Count; if (infoStream != null) Message("merging " + merge.SegString(directory)); merger = new SegmentMerger(this, mergedName, merge); merge.readers = new SegmentReader[numSegments]; merge.readersClone = new SegmentReader[numSegments]; bool mergeDocStores = false; System.Collections.Hashtable dss = new System.Collections.Hashtable(); String currentDocStoreSegment; lock(this) { currentDocStoreSegment = docWriter.GetDocStoreSegment(); } bool currentDSSMerged = false; // This is try/finally to make sure merger's readers are // closed: bool success = false; try { int totDocCount = 0; for (int i = 0; i < numSegments; i++) { SegmentInfo info = sourceSegments.Info(i); // Hold onto the "live" reader; we will use this to // commit merged deletes SegmentReader reader = merge.readers[i] = readerPool.Get(info, merge.mergeDocStores, MERGE_READ_BUFFER_SIZE, -1); // We clone the segment readers because other // deletes may come in while we're merging so we // need readers that will not change SegmentReader clone = merge.readersClone[i] = (SegmentReader)reader.Clone(true); merger.Add(clone); if (clone.HasDeletions()) { mergeDocStores = true; } if (info.GetDocStoreOffset() != -1 && currentDocStoreSegment != null) { currentDSSMerged |= currentDocStoreSegment.Equals(info.GetDocStoreSegment()); } totDocCount += clone.NumDocs(); } if (infoStream != null) { Message("merge: total " + totDocCount + " docs"); } merge.CheckAborted(directory); // If deletions have arrived and it has now become // necessary to merge doc stores, go and open them: if (mergeDocStores && !merge.mergeDocStores) { merge.mergeDocStores = true; lock (this) { if (currentDSSMerged) { if (infoStream != null) { Message("now flush at mergeMiddle"); } DoFlush(true, false); } } for (int i = 0; i < numSegments; i++) { merge.readersClone[i].OpenDocStores(); } // Clear DSS merge.info.SetDocStore(-1, null, false); } // This is where all the work happens: mergedDocCount = merge.info.docCount = merger.Merge(merge.mergeDocStores); System.Diagnostics.Debug.Assert(mergedDocCount == totDocCount); if (merge.useCompoundFile) { success = false; string compoundFileName = IndexFileNames.SegmentFileName(mergedName, IndexFileNames.COMPOUND_FILE_EXTENSION); try { if (infoStream != null) { Message("create compound file " + compoundFileName); } merger.CreateCompoundFile(compoundFileName); success = true; } catch (System.IO.IOException ioe) { lock (this) { if (merge.IsAborted()) { // This can happen if rollback or close(false) // is called -- fall through to logic below to // remove the partially created CFS: } else { HandleMergeException(ioe, merge); } } } catch (Exception t) { HandleMergeException(t, merge); } finally { if (!success) { if (infoStream != null) { Message("hit exception creating compound file during merge"); } lock (this) { deleter.DeleteFile(compoundFileName); deleter.DeleteNewFiles(merger.GetMergedFiles()); } } } success = false; lock (this) { // delete new non cfs files directly: they were never // registered with IFD deleter.DeleteNewFiles(merger.GetMergedFiles()); if (merge.IsAborted()) { if (infoStream != null) { Message("abort merge after building CFS"); } deleter.DeleteFile(compoundFileName); return 0; } } merge.info.SetUseCompoundFile(true); } int termsIndexDivisor; bool loadDocStores; // if the merged segment warmer was not installed when // this merge was started, causing us to not force // the docStores to close, we can't warm it now bool canWarm = merge.info.GetDocStoreSegment() == null || currentDocStoreSegment == null || !merge.info.GetDocStoreSegment().Equals(currentDocStoreSegment); if (poolReaders && mergedSegmentWarmer != null && canWarm) { // Load terms index & doc stores so the segment // warmer can run searches, load documents/term // vectors termsIndexDivisor = readerTermsIndexDivisor; loadDocStores = true; } else { termsIndexDivisor = -1; loadDocStores = false; } // TODO: in the non-realtime case, we may want to only // keep deletes (it's costly to open entire reader // when we just need deletes) SegmentReader mergedReader = readerPool.Get(merge.info, loadDocStores, BufferedIndexInput.BUFFER_SIZE, termsIndexDivisor); try { if (poolReaders && mergedSegmentWarmer != null) { mergedSegmentWarmer.Warm(mergedReader); } if (!CommitMerge(merge, merger, mergedDocCount, mergedReader)) { // commitMerge will return false if this merge was aborted return 0; } } finally { lock (this) { readerPool.Release(mergedReader); } } success = true; } finally { // Readers are already closed in commitMerge if we didn't hit // an exc: if (!success) { CloseMergeReaders(merge, true); } } merge.mergeDone = true; lock (mergeScheduler) { System.Threading.Monitor.PulseAll(mergeScheduler); } // Force a sync after commiting the merge. Once this // sync completes then all index files referenced by the // current segmentInfos are on stable storage so if the // OS/machine crashes, or power cord is yanked, the // index will be intact. Note that this is just one // (somewhat arbitrary) policy; we could try other // policies like only sync if it's been > X minutes or // more than Y bytes have been written, etc. if (autoCommit) { long size; lock (this) { size = merge.info.SizeInBytes(); } Commit(size); } return mergedDocCount; }
/// <summary> Merges the indicated segments, replacing them in the stack with a /// single segment. /// </summary> internal void Merge(MergePolicy.OneMerge merge) { bool success = false; try { try { try { MergeInit(merge); if (infoStream != null) { Message("now merge\n merge=" + merge.SegString(directory) + "\n merge=" + merge + "\n index=" + SegString()); } MergeMiddle(merge); MergeSuccess(merge); success = true; } catch (System.Exception t) { HandleMergeException(t, merge); } } finally { lock (this) { MergeFinish(merge); if (!success) { if (infoStream != null) Message("hit exception during merge"); if (merge.info != null && !segmentInfos.Contains(merge.info)) deleter.Refresh(merge.info.name); } // This merge (and, generally, any change to the // segments) may now enable new merges, so we call // merge policy & update pending merges. if (success && !merge.IsAborted() && !closed && !closing) UpdatePendingMerges(merge.maxNumSegmentsOptimize, merge.optimize); } } } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "merge"); } }
private void _MergeInit(MergePolicy.OneMerge merge) { lock (this) { System.Diagnostics.Debug.Assert(TestPoint("startMergeInit")); System.Diagnostics.Debug.Assert(merge.registerDone); System.Diagnostics.Debug.Assert(!merge.optimize || merge.maxNumSegmentsOptimize > 0); if (hitOOM) { throw new System.SystemException("this writer hit an OutOfMemoryError; cannot merge"); } if (merge.info != null) // mergeInit already done return ; if (merge.IsAborted()) return ; bool changed = ApplyDeletes(); // If autoCommit == true then all deletes should have // been flushed when we flushed the last segment System.Diagnostics.Debug.Assert(!changed || !autoCommit); SegmentInfos sourceSegments = merge.segments; int end = sourceSegments.Count; // Check whether this merge will allow us to skip // merging the doc stores (stored field & vectors). // This is a very substantial optimization (saves tons // of IO) that can only be applied with // autoCommit=false. Directory lastDir = directory; System.String lastDocStoreSegment = null; int next = - 1; bool mergeDocStores = false; bool doFlushDocStore = false; System.String currentDocStoreSegment = docWriter.GetDocStoreSegment(); // Test each segment to be merged: check if we need to // flush/merge doc stores for (int i = 0; i < end; i++) { SegmentInfo si = sourceSegments.Info(i); // If it has deletions we must merge the doc stores if (si.HasDeletions()) mergeDocStores = true; // If it has its own (private) doc stores we must // merge the doc stores if (- 1 == si.GetDocStoreOffset()) mergeDocStores = true; // If it has a different doc store segment than // previous segments, we must merge the doc stores System.String docStoreSegment = si.GetDocStoreSegment(); if (docStoreSegment == null) mergeDocStores = true; else if (lastDocStoreSegment == null) lastDocStoreSegment = docStoreSegment; else if (!lastDocStoreSegment.Equals(docStoreSegment)) mergeDocStores = true; // Segments' docScoreOffsets must be in-order, // contiguous. For the default merge policy now // this will always be the case but for an arbitrary // merge policy this may not be the case if (- 1 == next) next = si.GetDocStoreOffset() + si.docCount; else if (next != si.GetDocStoreOffset()) mergeDocStores = true; else next = si.GetDocStoreOffset() + si.docCount; // If the segment comes from a different directory // we must merge if (lastDir != si.dir) mergeDocStores = true; // If the segment is referencing the current "live" // doc store outputs then we must merge if (si.GetDocStoreOffset() != - 1 && currentDocStoreSegment != null && si.GetDocStoreSegment().Equals(currentDocStoreSegment)) { doFlushDocStore = true; } } // if a mergedSegmentWarmer is installed, we must merge // the doc stores because we will open a full // SegmentReader on the merged segment: if (!mergeDocStores && mergedSegmentWarmer != null && currentDocStoreSegment != null && lastDocStoreSegment != null && lastDocStoreSegment.Equals(currentDocStoreSegment)) { mergeDocStores = true; } int docStoreOffset; System.String docStoreSegment2; bool docStoreIsCompoundFile; if (mergeDocStores) { docStoreOffset = - 1; docStoreSegment2 = null; docStoreIsCompoundFile = false; } else { SegmentInfo si = sourceSegments.Info(0); docStoreOffset = si.GetDocStoreOffset(); docStoreSegment2 = si.GetDocStoreSegment(); docStoreIsCompoundFile = si.GetDocStoreIsCompoundFile(); } if (mergeDocStores && doFlushDocStore) { // SegmentMerger intends to merge the doc stores // (stored fields, vectors), and at least one of the // segments to be merged refers to the currently // live doc stores. // TODO: if we know we are about to merge away these // newly flushed doc store files then we should not // make compound file out of them... if (infoStream != null) Message("now flush at merge"); DoFlush(true, false); } merge.mergeDocStores = mergeDocStores; // Bind a new segment name here so even with // ConcurrentMergePolicy we keep deterministic segment // names. merge.info = new SegmentInfo(NewSegmentName(), 0, directory, false, true, docStoreOffset, docStoreSegment2, docStoreIsCompoundFile, false); System.Collections.Generic.IDictionary<string, string> details = new System.Collections.Generic.Dictionary<string, string>(); details["optimize"] = merge.optimize + ""; details["mergeFactor"] = end + ""; details["mergeDocStores"] = mergeDocStores + ""; SetDiagnostics(merge.info, "merge", details); // Also enroll the merged segment into mergingSegments; // this prevents it from getting selected for a merge // after our merge is done but while we are building the // CFS: mergingSegments[merge.info] = merge.info; } }
/* FIXME if we want to support non-contiguous segment merges */ private bool CommitMerge(MergePolicy.OneMerge merge, SegmentMerger merger, int mergedDocCount, SegmentReader mergedReader) { lock (this) { System.Diagnostics.Debug.Assert(TestPoint("startCommitMerge")); if (hitOOM) { throw new System.SystemException("this writer hit an OutOfMemoryError; cannot complete merge"); } if (infoStream != null) Message("commitMerge: " + merge.SegString(directory) + " index=" + SegString()); System.Diagnostics.Debug.Assert(merge.registerDone); // If merge was explicitly aborted, or, if rollback() or // rollbackTransaction() had been called since our merge // started (which results in an unqualified // deleter.refresh() call that will remove any index // file that current segments does not reference), we // abort this merge if (merge.IsAborted()) { if (infoStream != null) Message("commitMerge: skipping merge " + merge.SegString(directory) + ": it was aborted"); return false; } int start = EnsureContiguousMerge(merge); CommitMergedDeletes(merge, mergedReader); docWriter.RemapDeletes(segmentInfos, merger.GetDocMaps(), merger.GetDelCounts(), merge, mergedDocCount); // If the doc store we are using has been closed and // is in now compound format (but wasn't when we // started), then we will switch to the compound // format as well: SetMergeDocStoreIsCompoundFile(merge); merge.info.SetHasProx(merger.HasProx()); ((System.Collections.IList) ((System.Collections.ArrayList) segmentInfos).GetRange(start, start + merge.segments.Count - start)).Clear(); System.Diagnostics.Debug.Assert(!segmentInfos.Contains(merge.info)); segmentInfos.Insert(start, merge.info); CloseMergeReaders(merge, false); // Must note the change to segmentInfos so any commits // in-flight don't lose it: Checkpoint(); // If the merged segments had pending changes, clear // them so that they don't bother writing them to // disk, updating SegmentInfo, etc.: readerPool.Clear(merge.segments); if (merge.optimize) { // cascade the optimize: segmentsToOptimize[merge.info] = merge.info; } return true; } }