/// <summary>Checks whether this merge involves any segments /// already participating in a merge. If not, this merge /// is "registered", meaning we record that its segments /// are now participating in a merge, and true is /// returned. Else (the merge conflicts) false is /// returned. /// </summary> internal bool RegisterMerge(MergePolicy.OneMerge merge) { lock (this) { if (merge.registerDone) return true; if (stopMerges) { merge.Abort(); throw new MergePolicy.MergeAbortedException("merge is aborted: " + merge.SegString(directory)); } int count = merge.segments.Count; bool isExternal = false; for (int i = 0; i < count; i++) { SegmentInfo info = merge.segments.Info(i); if (mergingSegments.Contains(info)) { return false; } if (segmentInfos.IndexOf(info) == -1) { return false; } if (info.dir != directory) { isExternal = true; } if (segmentsToOptimize.Contains(info)) { merge.optimize = true; merge.maxNumSegmentsOptimize = optimizeMaxNumSegments; } } EnsureContiguousMerge(merge); pendingMerges.AddLast(merge); if (infoStream != null) Message("add merge to pendingMerges: " + merge.SegString(directory) + " [total " + pendingMerges.Count + " pending]"); merge.mergeGen = mergeGen; merge.isExternal = isExternal; // OK it does not conflict; now record that this merge // is running (while synchronized) to avoid race // condition where two conflicting merges from different // threads, start for (int i = 0; i < count; i++) { SegmentInfo si = merge.segments.Info(i); mergingSegments[si] = si; } // Merge is now registered merge.registerDone = true; return true; } }
/// <summary>Does the actual (time-consuming) work of the merge, /// but without holding synchronized lock on IndexWriter /// instance /// </summary> private int MergeMiddle(MergePolicy.OneMerge merge) { merge.CheckAborted(directory); System.String mergedName = merge.info.name; SegmentMerger merger = null; int mergedDocCount = 0; SegmentInfos sourceSegments = merge.segments; int numSegments = sourceSegments.Count; if (infoStream != null) Message("merging " + merge.SegString(directory)); merger = new SegmentMerger(this, mergedName, merge); merge.readers = new SegmentReader[numSegments]; merge.readersClone = new SegmentReader[numSegments]; bool mergeDocStores = false; System.Collections.Hashtable dss = new System.Collections.Hashtable(); String currentDocStoreSegment; lock(this) { currentDocStoreSegment = docWriter.GetDocStoreSegment(); } bool currentDSSMerged = false; // This is try/finally to make sure merger's readers are // closed: bool success = false; try { int totDocCount = 0; for (int i = 0; i < numSegments; i++) { SegmentInfo info = sourceSegments.Info(i); // Hold onto the "live" reader; we will use this to // commit merged deletes SegmentReader reader = merge.readers[i] = readerPool.Get(info, merge.mergeDocStores, MERGE_READ_BUFFER_SIZE, -1); // We clone the segment readers because other // deletes may come in while we're merging so we // need readers that will not change SegmentReader clone = merge.readersClone[i] = (SegmentReader)reader.Clone(true); merger.Add(clone); if (clone.HasDeletions()) { mergeDocStores = true; } if (info.GetDocStoreOffset() != -1 && currentDocStoreSegment != null) { currentDSSMerged |= currentDocStoreSegment.Equals(info.GetDocStoreSegment()); } totDocCount += clone.NumDocs(); } if (infoStream != null) { Message("merge: total " + totDocCount + " docs"); } merge.CheckAborted(directory); // If deletions have arrived and it has now become // necessary to merge doc stores, go and open them: if (mergeDocStores && !merge.mergeDocStores) { merge.mergeDocStores = true; lock (this) { if (currentDSSMerged) { if (infoStream != null) { Message("now flush at mergeMiddle"); } DoFlush(true, false); } } for (int i = 0; i < numSegments; i++) { merge.readersClone[i].OpenDocStores(); } // Clear DSS merge.info.SetDocStore(-1, null, false); } // This is where all the work happens: mergedDocCount = merge.info.docCount = merger.Merge(merge.mergeDocStores); System.Diagnostics.Debug.Assert(mergedDocCount == totDocCount); if (merge.useCompoundFile) { success = false; string compoundFileName = IndexFileNames.SegmentFileName(mergedName, IndexFileNames.COMPOUND_FILE_EXTENSION); try { if (infoStream != null) { Message("create compound file " + compoundFileName); } merger.CreateCompoundFile(compoundFileName); success = true; } catch (System.IO.IOException ioe) { lock (this) { if (merge.IsAborted()) { // This can happen if rollback or close(false) // is called -- fall through to logic below to // remove the partially created CFS: } else { HandleMergeException(ioe, merge); } } } catch (Exception t) { HandleMergeException(t, merge); } finally { if (!success) { if (infoStream != null) { Message("hit exception creating compound file during merge"); } lock (this) { deleter.DeleteFile(compoundFileName); deleter.DeleteNewFiles(merger.GetMergedFiles()); } } } success = false; lock (this) { // delete new non cfs files directly: they were never // registered with IFD deleter.DeleteNewFiles(merger.GetMergedFiles()); if (merge.IsAborted()) { if (infoStream != null) { Message("abort merge after building CFS"); } deleter.DeleteFile(compoundFileName); return 0; } } merge.info.SetUseCompoundFile(true); } int termsIndexDivisor; bool loadDocStores; // if the merged segment warmer was not installed when // this merge was started, causing us to not force // the docStores to close, we can't warm it now bool canWarm = merge.info.GetDocStoreSegment() == null || currentDocStoreSegment == null || !merge.info.GetDocStoreSegment().Equals(currentDocStoreSegment); if (poolReaders && mergedSegmentWarmer != null && canWarm) { // Load terms index & doc stores so the segment // warmer can run searches, load documents/term // vectors termsIndexDivisor = readerTermsIndexDivisor; loadDocStores = true; } else { termsIndexDivisor = -1; loadDocStores = false; } // TODO: in the non-realtime case, we may want to only // keep deletes (it's costly to open entire reader // when we just need deletes) SegmentReader mergedReader = readerPool.Get(merge.info, loadDocStores, BufferedIndexInput.BUFFER_SIZE, termsIndexDivisor); try { if (poolReaders && mergedSegmentWarmer != null) { mergedSegmentWarmer.Warm(mergedReader); } if (!CommitMerge(merge, merger, mergedDocCount, mergedReader)) { // commitMerge will return false if this merge was aborted return 0; } } finally { lock (this) { readerPool.Release(mergedReader); } } success = true; } finally { // Readers are already closed in commitMerge if we didn't hit // an exc: if (!success) { CloseMergeReaders(merge, true); } } merge.mergeDone = true; lock (mergeScheduler) { System.Threading.Monitor.PulseAll(mergeScheduler); } // Force a sync after commiting the merge. Once this // sync completes then all index files referenced by the // current segmentInfos are on stable storage so if the // OS/machine crashes, or power cord is yanked, the // index will be intact. Note that this is just one // (somewhat arbitrary) policy; we could try other // policies like only sync if it's been > X minutes or // more than Y bytes have been written, etc. if (autoCommit) { long size; lock (this) { size = merge.info.SizeInBytes(); } Commit(size); } return mergedDocCount; }
private void HandleMergeException(System.Exception t, MergePolicy.OneMerge merge) { if (infoStream != null) { Message("handleMergeException: merge=" + merge.SegString(directory) + " exc=" + t); } // Set the exception on the merge, so if // optimize() is waiting on us it sees the root // cause exception: merge.SetException(t); AddMergeException(merge); if (t is MergePolicy.MergeAbortedException) { // We can ignore this exception (it happens when // close(false) or rollback is called), unless the // merge involves segments from external directories, // in which case we must throw it so, for example, the // rollbackTransaction code in addIndexes* is // executed. if (merge.isExternal) throw (MergePolicy.MergeAbortedException) t; } else if (t is System.IO.IOException) throw (System.IO.IOException) t; else if (t is System.SystemException) throw (System.SystemException) t; else if (t is System.ApplicationException) throw (System.ApplicationException) t; // Should not get here else throw new System.SystemException(null, t); }
/// <summary> Merges the indicated segments, replacing them in the stack with a /// single segment. /// </summary> internal void Merge(MergePolicy.OneMerge merge) { bool success = false; try { try { try { MergeInit(merge); if (infoStream != null) { Message("now merge\n merge=" + merge.SegString(directory) + "\n merge=" + merge + "\n index=" + SegString()); } MergeMiddle(merge); MergeSuccess(merge); success = true; } catch (System.Exception t) { HandleMergeException(t, merge); } } finally { lock (this) { MergeFinish(merge); if (!success) { if (infoStream != null) Message("hit exception during merge"); if (merge.info != null && !segmentInfos.Contains(merge.info)) deleter.Refresh(merge.info.name); } // This merge (and, generally, any change to the // segments) may now enable new merges, so we call // merge policy & update pending merges. if (success && !merge.IsAborted() && !closed && !closing) UpdatePendingMerges(merge.maxNumSegmentsOptimize, merge.optimize); } } } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "merge"); } }
/// <summary>Carefully merges deletes for the segments we just /// merged. This is tricky because, although merging will /// clear all deletes (compacts the documents), new /// deletes may have been flushed to the segments since /// the merge was started. This method "carries over" /// such new deletes onto the newly merged segment, and /// saves the resulting deletes file (incrementing the /// delete generation for merge.info). If no deletes were /// flushed, no new deletes file is saved. /// </summary> private void CommitMergedDeletes(MergePolicy.OneMerge merge, SegmentReader mergeReader) { lock (this) { System.Diagnostics.Debug.Assert(TestPoint("startCommitMergeDeletes")); SegmentInfos sourceSegments = merge.segments; if (infoStream != null) Message("commitMergeDeletes " + merge.SegString(directory)); // Carefully merge deletes that occurred after we // started merging: int docUpto = 0; int delCount = 0; for (int i = 0; i < sourceSegments.Count; i++) { SegmentInfo info = sourceSegments.Info(i); int docCount = info.docCount; SegmentReader previousReader = merge.readersClone[i]; SegmentReader currentReader = merge.readers[i]; if (previousReader.HasDeletions()) { // There were deletes on this segment when the merge // started. The merge has collapsed away those // deletes, but, if new deletes were flushed since // the merge started, we must now carefully keep any // newly flushed deletes but mapping them to the new // docIDs. if (currentReader.NumDeletedDocs() > previousReader.NumDeletedDocs()) { // This means this segment has had new deletes // committed since we started the merge, so we // must merge them: for (int j = 0; j < docCount; j++) { if (previousReader.IsDeleted(j)) { System.Diagnostics.Debug.Assert(currentReader.IsDeleted(j)); } else { if (currentReader.IsDeleted(j)) { mergeReader.DoDelete(docUpto); delCount++; } docUpto++; } } } else { docUpto += docCount - previousReader.NumDeletedDocs(); } } else if (currentReader.HasDeletions()) { // This segment had no deletes before but now it // does: for (int j = 0; j < docCount; j++) { if (currentReader.IsDeleted(j)) { mergeReader.DoDelete(docUpto); delCount++; } docUpto++; } } // No deletes before or after else docUpto += info.docCount; } System.Diagnostics.Debug.Assert(mergeReader.NumDeletedDocs() == delCount); mergeReader.hasChanges = delCount > 0; } }
/* FIXME if we want to support non-contiguous segment merges */ private bool CommitMerge(MergePolicy.OneMerge merge, SegmentMerger merger, int mergedDocCount, SegmentReader mergedReader) { lock (this) { System.Diagnostics.Debug.Assert(TestPoint("startCommitMerge")); if (hitOOM) { throw new System.SystemException("this writer hit an OutOfMemoryError; cannot complete merge"); } if (infoStream != null) Message("commitMerge: " + merge.SegString(directory) + " index=" + SegString()); System.Diagnostics.Debug.Assert(merge.registerDone); // If merge was explicitly aborted, or, if rollback() or // rollbackTransaction() had been called since our merge // started (which results in an unqualified // deleter.refresh() call that will remove any index // file that current segments does not reference), we // abort this merge if (merge.IsAborted()) { if (infoStream != null) Message("commitMerge: skipping merge " + merge.SegString(directory) + ": it was aborted"); return false; } int start = EnsureContiguousMerge(merge); CommitMergedDeletes(merge, mergedReader); docWriter.RemapDeletes(segmentInfos, merger.GetDocMaps(), merger.GetDelCounts(), merge, mergedDocCount); // If the doc store we are using has been closed and // is in now compound format (but wasn't when we // started), then we will switch to the compound // format as well: SetMergeDocStoreIsCompoundFile(merge); merge.info.SetHasProx(merger.HasProx()); ((System.Collections.IList) ((System.Collections.ArrayList) segmentInfos).GetRange(start, start + merge.segments.Count - start)).Clear(); System.Diagnostics.Debug.Assert(!segmentInfos.Contains(merge.info)); segmentInfos.Insert(start, merge.info); CloseMergeReaders(merge, false); // Must note the change to segmentInfos so any commits // in-flight don't lose it: Checkpoint(); // If the merged segments had pending changes, clear // them so that they don't bother writing them to // disk, updating SegmentInfo, etc.: readerPool.Clear(merge.segments); if (merge.optimize) { // cascade the optimize: segmentsToOptimize[merge.info] = merge.info; } return true; } }
/** Does the actual (time-consuming) work of the merge, * but without holding synchronized lock on IndexWriter * instance */ private int MergeMiddle(MergePolicy.OneMerge merge) { merge.CheckAborted(directory); string mergedName = merge.info.name; SegmentMerger merger = null; int mergedDocCount = 0; SegmentInfos sourceSegments = merge.segments; SegmentInfos sourceSegmentsClone = merge.segmentsClone; int numSegments = sourceSegments.Count; if (infoStream != null) Message("merging " + merge.SegString(directory)); merger = new SegmentMerger(this, mergedName, merge); bool success = false; // This is try/finally to make sure merger's readers are // closed: try { int totDocCount = 0; for (int i = 0; i < numSegments; i++) { SegmentInfo si = sourceSegmentsClone.Info(i); IndexReader reader = SegmentReader.Get(true, si, MERGE_READ_BUFFER_SIZE, merge.mergeDocStores); // no need to set deleter (yet) merger.Add(reader); totDocCount += reader.NumDocs(); } if (infoStream != null) { Message("merge: total " + totDocCount + " docs"); } merge.CheckAborted(directory); // This is where all the work happens: mergedDocCount = merge.info.docCount = merger.Merge(merge.mergeDocStores); System.Diagnostics.Debug.Assert(mergedDocCount == totDocCount); success = true; } finally { // close readers before we attempt to delete // now-obsolete segments if (merger != null) { merger.CloseReaders(); } } if (!CommitMerge(merge, merger, mergedDocCount)) // commitMerge will return false if this merge was aborted return 0; if (merge.useCompoundFile) { // Maybe force a sync here to allow reclaiming of the // disk space used by the segments we just merged: if (autoCommit && DoCommitBeforeMergeCFS(merge)) { long size; lock (this) { size = merge.info.SizeInBytes(); } Commit(size); } success = false; string compoundFileName = mergedName + "." + IndexFileNames.COMPOUND_FILE_EXTENSION; try { merger.CreateCompoundFile(compoundFileName); success = true; } catch (System.IO.IOException ioe) { lock (this) { if (merge.IsAborted()) { // This can happen if rollback or close(false) // is called -- fall through to logic below to // remove the partially created CFS: success = true; } else HandleMergeException(ioe, merge); } } catch (System.Exception t) { HandleMergeException(t, merge); } finally { if (!success) { if (infoStream != null) Message("hit exception creating compound file during merge"); lock (this) { deleter.DeleteFile(compoundFileName); } } } if (merge.IsAborted()) { if (infoStream != null) Message("abort merge after building CFS"); deleter.DeleteFile(compoundFileName); return 0; } lock (this) { if (segmentInfos.IndexOf(merge.info) == -1 || merge.IsAborted()) { // Our segment (committed in non-compound // format) got merged away while we were // building the compound format. deleter.DeleteFile(compoundFileName); } else { merge.info.SetUseCompoundFile(true); Checkpoint(); } } } // Force a sync after commiting the merge. Once this // sync completes then all index files referenced by the // current segmentInfos are on stable storage so if the // OS/machine crashes, or power cord is yanked, the // index will be intact. Note that this is just one // (somewhat arbitrary) policy; we could try other // policies like only sync if it's been > X minutes or // more than Y bytes have been written, etc. if (autoCommit) { long size; lock (this) { size = merge.info.SizeInBytes(); } Commit(size); } return mergedDocCount; }
private int EnsureContiguousMerge(MergePolicy.OneMerge merge) { int first = segmentInfos.IndexOf(merge.segments.Info(0)); if (first == - 1) throw new MergePolicy.MergeException("could not find segment " + merge.segments.Info(0).name + " in current index " + SegString(), directory); int numSegments = segmentInfos.Count; int numSegmentsToMerge = merge.segments.Count; for (int i = 0; i < numSegmentsToMerge; i++) { SegmentInfo info = merge.segments.Info(i); if (first + i >= numSegments || !segmentInfos.Info(first + i).Equals(info)) { if (segmentInfos.IndexOf(info) == - 1) throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + SegString(), directory); else throw new MergePolicy.MergeException("MergePolicy selected non-contiguous segments to merge (" + merge.SegString(directory) + " vs " + SegString() + "), which IndexWriter (currently) cannot handle", directory); } } return first; }
/* FIXME if we want to support non-contiguous segment merges */ private bool CommitMerge(MergePolicy.OneMerge merge, SegmentMerger merger, int mergedDocCount) { lock (this) { System.Diagnostics.Debug.Assert(TestPoint("startCommitMerge")); if (hitOOM) return false; if (infoStream != null) Message("commitMerge: " + merge.SegString(directory) + " index=" + SegString()); System.Diagnostics.Debug.Assert(merge.registerDone); // If merge was explicitly aborted, or, if rollback() or // RollbackTransaction() had been called since our merge // started (which results in an unqualified // deleter.Refresh() call that will remove any index // file that current segments does not reference), we // abort this merge if (merge.IsAborted()) { if (infoStream != null) Message("commitMerge: skipping merge " + merge.SegString(directory) + ": it was aborted"); deleter.Refresh(merge.info.name); return false; } int start = EnsureContiguousMerge(merge); CommitMergedDeletes(merge); docWriter.RemapDeletes(segmentInfos, merger.GetDocMaps(), merger.GetDelCounts(), merge, mergedDocCount); // Simple optimization: if the doc store we are using // has been closed and is in now compound format (but // wasn't when we started), then we will switch to the // compound format as well: string mergeDocStoreSegment = merge.info.GetDocStoreSegment(); if (mergeDocStoreSegment != null && !merge.info.GetDocStoreIsCompoundFile()) { int size = segmentInfos.Count; for (int i = 0; i < size; i++) { SegmentInfo info = segmentInfos.Info(i); string docStoreSegment = info.GetDocStoreSegment(); if (docStoreSegment != null && docStoreSegment.Equals(mergeDocStoreSegment) && info.GetDocStoreIsCompoundFile()) { merge.info.SetDocStoreIsCompoundFile(true); break; } } } merge.info.SetHasProx(merger.HasProx()); //segmentInfos.RemoveRange(start, start + merge.segments.Count); segmentInfos.RemoveRange(start, merge.segments.Count); System.Diagnostics.Debug.Assert(!segmentInfos.Contains(merge.info)); segmentInfos.Insert(start, merge.info); // Must checkpoint before decrefing so any newly // referenced files in the new merge.info are incref'd // first: Checkpoint(); DecrefMergeSegments(merge); if (merge.optimize) segmentsToOptimize[merge.info] = merge.info; return true; } }
/** Carefully merges deletes for the segments we just * merged. This is tricky because, although merging will * clear all deletes (compacts the documents), new * deletes may have been flushed to the segments since * the merge was started. This method "carries over" * such new deletes onto the newly merged segment, and * saves the resulting deletes file (incrementing the * delete generation for merge.info). If no deletes were * flushed, no new deletes file is saved. */ private void CommitMergedDeletes(MergePolicy.OneMerge merge) { lock (this) { System.Diagnostics.Debug.Assert(TestPoint("startCommitMergeDeletes")); SegmentInfos sourceSegmentsClone = merge.segmentsClone; SegmentInfos sourceSegments = merge.segments; if (infoStream != null) Message("commitMergeDeletes " + merge.SegString(directory)); // Carefully merge deletes that occurred after we // started merging: BitVector deletes = null; int docUpto = 0; int delCount = 0; int numSegmentsToMerge = sourceSegments.Count; for (int i = 0; i < numSegmentsToMerge; i++) { SegmentInfo previousInfo = sourceSegmentsClone.Info(i); SegmentInfo currentInfo = sourceSegments.Info(i); System.Diagnostics.Debug.Assert(currentInfo.docCount == previousInfo.docCount); int docCount = currentInfo.docCount; if (previousInfo.HasDeletions()) { // There were deletes on this segment when the merge // started. The merge has collapsed away those // deletes, but, if new deletes were flushed since // the merge started, we must now carefully keep any // newly flushed deletes but mapping them to the new // docIDs. System.Diagnostics.Debug.Assert(currentInfo.HasDeletions()); // Load deletes present @ start of merge, for this segment: BitVector previousDeletes = new BitVector(previousInfo.dir, previousInfo.GetDelFileName()); if (!currentInfo.GetDelFileName().Equals(previousInfo.GetDelFileName())) { // This means this segment has had new deletes // committed since we started the merge, so we // must merge them: if (deletes == null) deletes = new BitVector(merge.info.docCount); BitVector currentDeletes = new BitVector(currentInfo.dir, currentInfo.GetDelFileName()); for (int j = 0; j < docCount; j++) { if (previousDeletes.Get(j)) System.Diagnostics.Debug.Assert(currentDeletes.Get(j)); else { if (currentDeletes.Get(j)) { deletes.Set(docUpto); delCount++; } docUpto++; } } } else docUpto += docCount - previousDeletes.Count(); } else if (currentInfo.HasDeletions()) { // This segment had no deletes before but now it // does: if (deletes == null) deletes = new BitVector(merge.info.docCount); BitVector currentDeletes = new BitVector(directory, currentInfo.GetDelFileName()); for (int j = 0; j < docCount; j++) { if (currentDeletes.Get(j)) { deletes.Set(docUpto); delCount++; } docUpto++; } } else // No deletes before or after docUpto += currentInfo.docCount; } if (deletes != null) { merge.info.AdvanceDelGen(); Message("commit merge deletes to " + merge.info.GetDelFileName()); deletes.Write(directory, merge.info.GetDelFileName()); merge.info.SetDelCount(delCount); System.Diagnostics.Debug.Assert(delCount == deletes.Count()); } } }
/// <summary>Does the actual (time-consuming) work of the merge, /// but without holding synchronized lock on IndexWriter /// instance /// </summary> private int MergeMiddle(MergePolicy.OneMerge merge) { merge.CheckAborted(directory); System.String mergedName = merge.info.name; SegmentMerger merger = null; int mergedDocCount = 0; SegmentInfos sourceSegments = merge.segments; int numSegments = sourceSegments.Count; if (infoStream != null) Message("merging " + merge.SegString(directory)); merger = new SegmentMerger(this, mergedName, merge); merge.readers = new SegmentReader[numSegments]; merge.readersClone = new SegmentReader[numSegments]; bool mergeDocStores = false; System.Collections.Hashtable dss = new System.Collections.Hashtable(); // This is try/finally to make sure merger's readers are // closed: bool success = false; try { int totDocCount = 0; for (int i = 0; i < numSegments; i++) { SegmentInfo info = sourceSegments.Info(i); // Hold onto the "live" reader; we will use this to // commit merged deletes SegmentReader reader = merge.readers[i] = readerPool.Get(info, merge.mergeDocStores, MERGE_READ_BUFFER_SIZE, - 1); // We clone the segment readers because other // deletes may come in while we're merging so we // need readers that will not change SegmentReader clone = merge.readersClone[i] = (SegmentReader) reader.Clone(true); merger.Add(clone); if (clone.HasDeletions()) { mergeDocStores = true; } if (info.GetDocStoreOffset() != - 1) { dss[info.GetDocStoreSegment()] = info.GetDocStoreSegment(); } totDocCount += clone.NumDocs(); } if (infoStream != null) { Message("merge: total " + totDocCount + " docs"); } merge.CheckAborted(directory); // If deletions have arrived and it has now become // necessary to merge doc stores, go and open them: if (mergeDocStores && !merge.mergeDocStores) { merge.mergeDocStores = true; lock (this) { String key = docWriter.GetDocStoreSegment(); if (key!=null && dss.Contains(key)) { if (infoStream != null) Message("now flush at mergeMiddle"); DoFlush(true, false); } } for (int i = 0; i < numSegments; i++) { merge.readersClone[i].OpenDocStores(); } // Clear DSS lock (this) { merge.info.SetDocStore(- 1, null, false); } } // This is where all the work happens: mergedDocCount = merge.info.docCount = merger.Merge(merge.mergeDocStores); System.Diagnostics.Debug.Assert(mergedDocCount == totDocCount); // TODO: in the non-realtime case, we may want to only // keep deletes (it's costly to open entire reader // when we just need deletes) SegmentReader mergedReader = readerPool.Get(merge.info, false, BufferedIndexInput.BUFFER_SIZE, - 1); try { if (poolReaders && mergedSegmentWarmer != null) { mergedSegmentWarmer.Warm(mergedReader); } if (!CommitMerge(merge, merger, mergedDocCount, mergedReader)) // commitMerge will return false if this merge was aborted return 0; } finally { lock (this) { readerPool.Release(mergedReader); } } success = true; } finally { lock (this) { if (!success) { // Suppress any new exceptions so we throw the // original cause for (int i = 0; i < numSegments; i++) { if (merge.readers[i] != null) { try { readerPool.Release(merge.readers[i], true); } catch (System.Exception t) { } } if (merge.readersClone[i] != null) { try { merge.readersClone[i].Close(); } catch (System.Exception t) { } // This was a private clone and we had the only reference System.Diagnostics.Debug.Assert(merge.readersClone[i].GetRefCount() == 0); } } } else { for (int i = 0; i < numSegments; i++) { if (merge.readers[i] != null) { readerPool.Release(merge.readers[i], true); } if (merge.readersClone[i] != null) { merge.readersClone[i].Close(); // This was a private clone and we had the only reference System.Diagnostics.Debug.Assert(merge.readersClone[i].GetRefCount() == 0); } } } } } // Must checkpoint before decrefing so any newly // referenced files in the new merge.info are incref'd // first: lock (this) { deleter.Checkpoint(segmentInfos, false); } DecrefMergeSegments(merge); if (merge.useCompoundFile) { // Maybe force a sync here to allow reclaiming of the // disk space used by the segments we just merged: if (autoCommit && DoCommitBeforeMergeCFS(merge)) { long size; lock (this) { size = merge.info.SizeInBytes(); } Commit(size); } success = false; System.String compoundFileName = mergedName + "." + IndexFileNames.COMPOUND_FILE_EXTENSION; try { merger.CreateCompoundFile(compoundFileName); success = true; } catch (System.IO.IOException ioe) { lock (this) { if (merge.IsAborted()) { // This can happen if rollback or close(false) // is called -- fall through to logic below to // remove the partially created CFS: success = true; } else HandleMergeException(ioe, merge); } } catch (System.Exception t) { HandleMergeException(t, merge); } finally { if (!success) { if (infoStream != null) Message("hit exception creating compound file during merge"); lock (this) { deleter.DeleteFile(compoundFileName); } } } if (merge.IsAborted()) { if (infoStream != null) Message("abort merge after building CFS"); deleter.DeleteFile(compoundFileName); return 0; } lock (this) { if (segmentInfos.IndexOf(merge.info) == - 1 || merge.IsAborted()) { // Our segment (committed in non-compound // format) got merged away while we were // building the compound format. deleter.DeleteFile(compoundFileName); } else { merge.info.SetUseCompoundFile(true); Checkpoint(); } } } // Force a sync after commiting the merge. Once this // sync completes then all index files referenced by the // current segmentInfos are on stable storage so if the // OS/machine crashes, or power cord is yanked, the // index will be intact. Note that this is just one // (somewhat arbitrary) policy; we could try other // policies like only sync if it's been > X minutes or // more than Y bytes have been written, etc. if (autoCommit) { long size; lock (this) { size = merge.info.SizeInBytes(); } Commit(size); } return mergedDocCount; }
/// <summary>Does the actual (time-consuming) work of the merge, /// but without holding synchronized lock on IndexWriter /// instance /// </summary> private int MergeMiddle(MergePolicy.OneMerge merge) { merge.CheckAborted(directory); System.String mergedName = merge.info.name; SegmentMerger merger = null; int mergedDocCount = 0; SegmentInfos sourceSegments = merge.segments; SegmentInfos sourceSegmentsClone = merge.segmentsClone; int numSegments = sourceSegments.Count; if (infoStream != null) Message("merging " + merge.SegString(directory)); merger = new SegmentMerger(this, mergedName, merge); // This is try/finally to make sure merger's readers are // closed: bool success = false; try { int totDocCount = 0; for (int i = 0; i < numSegments; i++) { SegmentInfo si = sourceSegmentsClone.Info(i); IndexReader reader = SegmentReader.Get(si, MERGE_READ_BUFFER_SIZE, merge.mergeDocStores); // no need to set deleter (yet) merger.Add(reader); totDocCount += reader.NumDocs(); } if (infoStream != null) { Message("merge: total " + totDocCount + " docs"); } merge.CheckAborted(directory); mergedDocCount = merge.info.docCount = merger.Merge(merge.mergeDocStores); System.Diagnostics.Debug.Assert(mergedDocCount == totDocCount); success = true; } finally { // close readers before we attempt to delete // now-obsolete segments if (merger != null) { merger.CloseReaders(); } if (!success) { if (infoStream != null) Message("hit exception during merge; now refresh deleter on segment " + mergedName); lock (this) { AddMergeException(merge); deleter.Refresh(mergedName); } } } if (!CommitMerge(merge)) // commitMerge will return false if this merge was aborted return 0; if (merge.useCompoundFile) { success = false; bool skip = false; System.String compoundFileName = mergedName + "." + IndexFileNames.COMPOUND_FILE_EXTENSION; try { try { merger.CreateCompoundFile(compoundFileName); success = true; } catch (System.IO.IOException ioe) { lock (this) { if (segmentInfos.IndexOf(merge.info) == - 1) { // If another merge kicked in and merged our // new segment away while we were trying to // build the compound file, we can hit a // FileNotFoundException and possibly // IOException over NFS. We can tell this has // happened because our SegmentInfo is no // longer in the segments; if this has // happened it is safe to ignore the exception // & skip finishing/committing our compound // file creating. if (infoStream != null) Message("hit exception creating compound file; ignoring it because our info (segment " + merge.info.name + ") has been merged away"); skip = true; } else throw ioe; } } } finally { if (!success) { if (infoStream != null) Message("hit exception creating compound file during merge: skip=" + skip); lock (this) { if (!skip) AddMergeException(merge); deleter.DeleteFile(compoundFileName); } } } if (!skip) { lock (this) { if (skip || segmentInfos.IndexOf(merge.info) == - 1 || merge.IsAborted()) { // Our segment (committed in non-compound // format) got merged away while we were // building the compound format. deleter.DeleteFile(compoundFileName); } else { success = false; try { merge.info.SetUseCompoundFile(true); Checkpoint(); success = true; } finally { if (!success) { if (infoStream != null) Message("hit exception checkpointing compound file during merge"); // Must rollback: AddMergeException(merge); merge.info.SetUseCompoundFile(false); DeletePartialSegmentsFile(); deleter.DeleteFile(compoundFileName); } } // Give deleter a chance to remove files now. deleter.Checkpoint(segmentInfos, autoCommit); } } } } return mergedDocCount; }
/// <summary>Checks whether this merge involves any segments /// already participating in a merge. If not, this merge /// is "registered", meaning we record that its segments /// are now participating in a merge, and true is /// returned. Else (the merge conflicts) false is /// returned. /// </summary> internal bool RegisterMerge(MergePolicy.OneMerge merge) { lock (this) { if (merge.registerDone) return true; int count = merge.segments.Count; bool isExternal = false; for (int i = 0; i < count; i++) { SegmentInfo info = merge.segments.Info(i); if (mergingSegments.Contains(info)) return false; if (segmentInfos.IndexOf(info) == - 1) return false; if (info.dir != directory) isExternal = true; } pendingMerges.Add(merge); if (infoStream != null) Message("add merge to pendingMerges: " + merge.SegString(directory) + " [total " + pendingMerges.Count + " pending]"); merge.mergeGen = mergeGen; merge.isExternal = isExternal; // OK it does not conflict; now record that this merge // is running (while synchronized) to avoid race // condition where two conflicting merges from different // threads, start for (int i = 0; i < count; i++) if (!mergingSegments.Contains(merge.segments.Info(i))) mergingSegments.Add(merge.segments.Info(i), merge.segments.Info(i)); // Merge is now registered merge.registerDone = true; return true; } }
/// <summary> Merges the indicated segments, replacing them in the stack with a /// single segment. /// </summary> public /*internal*/ void Merge(MergePolicy.OneMerge merge) { System.Diagnostics.Debug.Assert(merge.registerDone); System.Diagnostics.Debug.Assert(!merge.optimize || merge.maxNumSegmentsOptimize > 0); bool success = false; try { try { try { if (merge.info == null) MergeInit(merge); if (infoStream != null) Message("now merge\n merge=" + merge.SegString(directory) + "\n index=" + SegString()); MergeMiddle(merge); success = true; } catch (MergePolicy.MergeAbortedException e) { merge.SetException(e); AddMergeException(merge); // We can ignore this exception, unless the merge // involves segments from external directories, in // which case we must throw it so, for example, the // rollbackTransaction code in addIndexes* is // executed. if (merge.isExternal) throw e; } } finally { lock (this) { try { MergeFinish(merge); if (!success) { if (infoStream != null) Message("hit exception during merge"); AddMergeException(merge); if (merge.info != null && !segmentInfos.Contains(merge.info)) deleter.Refresh(merge.info.name); } // This merge (and, generally, any change to the // segments) may now enable new merges, so we call // merge policy & update pending merges. if (success && !merge.IsAborted() && !closed && !closing) UpdatePendingMerges(merge.maxNumSegmentsOptimize, merge.optimize); } finally { runningMerges.Remove(merge); // Optimize may be waiting on the final optimize // merge to finish; and finishMerges() may be // waiting for all merges to finish: System.Threading.Monitor.PulseAll(this); } } } } catch (OutOfMemoryException oom) { hitOOM = true; throw oom; } }
/* FIXME if we want to support non-contiguous segment merges */ private bool CommitMerge(MergePolicy.OneMerge merge) { lock (this) { System.Diagnostics.Debug.Assert(merge.registerDone); if (hitOOM) return false; if (infoStream != null) Message("CommitMerge: " + merge.SegString(directory)); // If merge was explicitly aborted, or, if abort() or // rollbackTransaction() had been called since our merge // started (which results in an unqualified // deleter.refresh() call that will remove any index // file that current segments does not reference), we // abort this merge if (merge.IsAborted()) { if (infoStream != null) Message("commitMerge: skipping merge " + merge.SegString(directory) + ": it was aborted"); System.Diagnostics.Debug.Assert(merge.increfDone); DecrefMergeSegments(merge); deleter.Refresh(merge.info.name); return false; } bool success = false; int start; try { SegmentInfos sourceSegmentsClone = merge.segmentsClone; SegmentInfos sourceSegments = merge.segments; start = EnsureContiguousMerge(merge); if (infoStream != null) Message("commitMerge " + merge.SegString(directory)); // Carefully merge deletes that occurred after we // started merging: BitVector deletes = null; int docUpto = 0; int numSegmentsToMerge = sourceSegments.Count; for (int i = 0; i < numSegmentsToMerge; i++) { SegmentInfo previousInfo = sourceSegmentsClone.Info(i); SegmentInfo currentInfo = sourceSegments.Info(i); System.Diagnostics.Debug.Assert(currentInfo.docCount == previousInfo.docCount); int docCount = currentInfo.docCount; if (previousInfo.HasDeletions()) { // There were deletes on this segment when the merge // started. The merge has collapsed away those // deletes, but, if new deletes were flushed since // the merge started, we must now carefully keep any // newly flushed deletes but mapping them to the new // docIDs. System.Diagnostics.Debug.Assert(currentInfo.HasDeletions()); // Load deletes present @ start of merge, for this segment: BitVector previousDeletes = new BitVector(previousInfo.dir, previousInfo.GetDelFileName()); if (!currentInfo.GetDelFileName().Equals(previousInfo.GetDelFileName())) { // This means this segment has had new deletes // committed since we started the merge, so we // must merge them: if (deletes == null) deletes = new BitVector(merge.info.docCount); BitVector currentDeletes = new BitVector(currentInfo.dir, currentInfo.GetDelFileName()); for (int j = 0; j < docCount; j++) { if (previousDeletes.Get(j)) System.Diagnostics.Debug.Assert(currentDeletes.Get(j)); else { if (currentDeletes.Get(j)) deletes.Set(docUpto); docUpto++; } } } else docUpto += docCount - previousDeletes.Count(); } else if (currentInfo.HasDeletions()) { // This segment had no deletes before but now it // does: if (deletes == null) deletes = new BitVector(merge.info.docCount); BitVector currentDeletes = new BitVector(directory, currentInfo.GetDelFileName()); for (int j = 0; j < docCount; j++) { if (currentDeletes.Get(j)) deletes.Set(docUpto); docUpto++; } } // No deletes before or after else docUpto += currentInfo.docCount; merge.CheckAborted(directory); } if (deletes != null) { merge.info.AdvanceDelGen(); deletes.Write(directory, merge.info.GetDelFileName()); } success = true; } finally { if (!success) { if (infoStream != null) Message("hit exception creating merged deletes file"); deleter.Refresh(merge.info.name); } } // Simple optimization: if the doc store we are using // has been closed and is in now compound format (but // wasn't when we started), then we will switch to the // compound format as well: System.String mergeDocStoreSegment = merge.info.GetDocStoreSegment(); if (mergeDocStoreSegment != null && !merge.info.GetDocStoreIsCompoundFile()) { int size = segmentInfos.Count; for (int i = 0; i < size; i++) { SegmentInfo info = segmentInfos.Info(i); System.String docStoreSegment = info.GetDocStoreSegment(); if (docStoreSegment != null && docStoreSegment.Equals(mergeDocStoreSegment) && info.GetDocStoreIsCompoundFile()) { merge.info.SetDocStoreIsCompoundFile(true); break; } } } success = false; SegmentInfos rollback = null; try { rollback = (SegmentInfos) segmentInfos.Clone(); ((System.Collections.IList) ((System.Collections.ArrayList) segmentInfos).GetRange(start, start + merge.segments.Count - start)).Clear(); segmentInfos.Insert(start, merge.info); Checkpoint(); success = true; } finally { if (!success && rollback != null) { if (infoStream != null) Message("hit exception when checkpointing after merge"); segmentInfos.Clear(); segmentInfos.AddRange(rollback); DeletePartialSegmentsFile(); deleter.Refresh(merge.info.name); } } if (merge.optimize) segmentsToOptimize.Add(merge.info, merge.info); // Must checkpoint before decrefing so any newly // referenced files in the new merge.info are incref'd // first: deleter.Checkpoint(segmentInfos, autoCommit); DecrefMergeSegments(merge); return true; } }