private void _MergeInit(MergePolicy.OneMerge merge) { lock (this) { System.Diagnostics.Debug.Assert(TestPoint("startMergeInit")); System.Diagnostics.Debug.Assert(merge.registerDone); System.Diagnostics.Debug.Assert(!merge.optimize || merge.maxNumSegmentsOptimize > 0); if (hitOOM) { throw new System.SystemException("this writer hit an OutOfMemoryError; cannot merge"); } if (merge.info != null) // mergeInit already done return ; if (merge.IsAborted()) return ; bool changed = ApplyDeletes(); // If autoCommit == true then all deletes should have // been flushed when we flushed the last segment System.Diagnostics.Debug.Assert(!changed || !autoCommit); SegmentInfos sourceSegments = merge.segments; int end = sourceSegments.Count; // Check whether this merge will allow us to skip // merging the doc stores (stored field & vectors). // This is a very substantial optimization (saves tons // of IO) that can only be applied with // autoCommit=false. Directory lastDir = directory; System.String lastDocStoreSegment = null; int next = - 1; bool mergeDocStores = false; bool doFlushDocStore = false; System.String currentDocStoreSegment = docWriter.GetDocStoreSegment(); // Test each segment to be merged: check if we need to // flush/merge doc stores for (int i = 0; i < end; i++) { SegmentInfo si = sourceSegments.Info(i); // If it has deletions we must merge the doc stores if (si.HasDeletions()) mergeDocStores = true; // If it has its own (private) doc stores we must // merge the doc stores if (- 1 == si.GetDocStoreOffset()) mergeDocStores = true; // If it has a different doc store segment than // previous segments, we must merge the doc stores System.String docStoreSegment = si.GetDocStoreSegment(); if (docStoreSegment == null) mergeDocStores = true; else if (lastDocStoreSegment == null) lastDocStoreSegment = docStoreSegment; else if (!lastDocStoreSegment.Equals(docStoreSegment)) mergeDocStores = true; // Segments' docScoreOffsets must be in-order, // contiguous. For the default merge policy now // this will always be the case but for an arbitrary // merge policy this may not be the case if (- 1 == next) next = si.GetDocStoreOffset() + si.docCount; else if (next != si.GetDocStoreOffset()) mergeDocStores = true; else next = si.GetDocStoreOffset() + si.docCount; // If the segment comes from a different directory // we must merge if (lastDir != si.dir) mergeDocStores = true; // If the segment is referencing the current "live" // doc store outputs then we must merge if (si.GetDocStoreOffset() != - 1 && currentDocStoreSegment != null && si.GetDocStoreSegment().Equals(currentDocStoreSegment)) { doFlushDocStore = true; } } // if a mergedSegmentWarmer is installed, we must merge // the doc stores because we will open a full // SegmentReader on the merged segment: if (!mergeDocStores && mergedSegmentWarmer != null && currentDocStoreSegment != null && lastDocStoreSegment != null && lastDocStoreSegment.Equals(currentDocStoreSegment)) { mergeDocStores = true; } int docStoreOffset; System.String docStoreSegment2; bool docStoreIsCompoundFile; if (mergeDocStores) { docStoreOffset = - 1; docStoreSegment2 = null; docStoreIsCompoundFile = false; } else { SegmentInfo si = sourceSegments.Info(0); docStoreOffset = si.GetDocStoreOffset(); docStoreSegment2 = si.GetDocStoreSegment(); docStoreIsCompoundFile = si.GetDocStoreIsCompoundFile(); } if (mergeDocStores && doFlushDocStore) { // SegmentMerger intends to merge the doc stores // (stored fields, vectors), and at least one of the // segments to be merged refers to the currently // live doc stores. // TODO: if we know we are about to merge away these // newly flushed doc store files then we should not // make compound file out of them... if (infoStream != null) Message("now flush at merge"); DoFlush(true, false); } merge.mergeDocStores = mergeDocStores; // Bind a new segment name here so even with // ConcurrentMergePolicy we keep deterministic segment // names. merge.info = new SegmentInfo(NewSegmentName(), 0, directory, false, true, docStoreOffset, docStoreSegment2, docStoreIsCompoundFile, false); System.Collections.Generic.IDictionary<string, string> details = new System.Collections.Generic.Dictionary<string, string>(); details["optimize"] = merge.optimize + ""; details["mergeFactor"] = end + ""; details["mergeDocStores"] = mergeDocStores + ""; SetDiagnostics(merge.info, "merge", details); // Also enroll the merged segment into mergingSegments; // this prevents it from getting selected for a merge // after our merge is done but while we are building the // CFS: mergingSegments[merge.info] = merge.info; } }
/// <summary>Does the actual (time-consuming) work of the merge, /// but without holding synchronized lock on IndexWriter /// instance /// </summary> private int MergeMiddle(MergePolicy.OneMerge merge) { merge.CheckAborted(directory); System.String mergedName = merge.info.name; SegmentMerger merger = null; int mergedDocCount = 0; SegmentInfos sourceSegments = merge.segments; int numSegments = sourceSegments.Count; if (infoStream != null) Message("merging " + merge.SegString(directory)); merger = new SegmentMerger(this, mergedName, merge); merge.readers = new SegmentReader[numSegments]; merge.readersClone = new SegmentReader[numSegments]; bool mergeDocStores = false; System.Collections.Hashtable dss = new System.Collections.Hashtable(); String currentDocStoreSegment; lock(this) { currentDocStoreSegment = docWriter.GetDocStoreSegment(); } bool currentDSSMerged = false; // This is try/finally to make sure merger's readers are // closed: bool success = false; try { int totDocCount = 0; for (int i = 0; i < numSegments; i++) { SegmentInfo info = sourceSegments.Info(i); // Hold onto the "live" reader; we will use this to // commit merged deletes SegmentReader reader = merge.readers[i] = readerPool.Get(info, merge.mergeDocStores, MERGE_READ_BUFFER_SIZE, -1); // We clone the segment readers because other // deletes may come in while we're merging so we // need readers that will not change SegmentReader clone = merge.readersClone[i] = (SegmentReader)reader.Clone(true); merger.Add(clone); if (clone.HasDeletions()) { mergeDocStores = true; } if (info.GetDocStoreOffset() != -1 && currentDocStoreSegment != null) { currentDSSMerged |= currentDocStoreSegment.Equals(info.GetDocStoreSegment()); } totDocCount += clone.NumDocs(); } if (infoStream != null) { Message("merge: total " + totDocCount + " docs"); } merge.CheckAborted(directory); // If deletions have arrived and it has now become // necessary to merge doc stores, go and open them: if (mergeDocStores && !merge.mergeDocStores) { merge.mergeDocStores = true; lock (this) { if (currentDSSMerged) { if (infoStream != null) { Message("now flush at mergeMiddle"); } DoFlush(true, false); } } for (int i = 0; i < numSegments; i++) { merge.readersClone[i].OpenDocStores(); } // Clear DSS merge.info.SetDocStore(-1, null, false); } // This is where all the work happens: mergedDocCount = merge.info.docCount = merger.Merge(merge.mergeDocStores); System.Diagnostics.Debug.Assert(mergedDocCount == totDocCount); if (merge.useCompoundFile) { success = false; string compoundFileName = IndexFileNames.SegmentFileName(mergedName, IndexFileNames.COMPOUND_FILE_EXTENSION); try { if (infoStream != null) { Message("create compound file " + compoundFileName); } merger.CreateCompoundFile(compoundFileName); success = true; } catch (System.IO.IOException ioe) { lock (this) { if (merge.IsAborted()) { // This can happen if rollback or close(false) // is called -- fall through to logic below to // remove the partially created CFS: } else { HandleMergeException(ioe, merge); } } } catch (Exception t) { HandleMergeException(t, merge); } finally { if (!success) { if (infoStream != null) { Message("hit exception creating compound file during merge"); } lock (this) { deleter.DeleteFile(compoundFileName); deleter.DeleteNewFiles(merger.GetMergedFiles()); } } } success = false; lock (this) { // delete new non cfs files directly: they were never // registered with IFD deleter.DeleteNewFiles(merger.GetMergedFiles()); if (merge.IsAborted()) { if (infoStream != null) { Message("abort merge after building CFS"); } deleter.DeleteFile(compoundFileName); return 0; } } merge.info.SetUseCompoundFile(true); } int termsIndexDivisor; bool loadDocStores; // if the merged segment warmer was not installed when // this merge was started, causing us to not force // the docStores to close, we can't warm it now bool canWarm = merge.info.GetDocStoreSegment() == null || currentDocStoreSegment == null || !merge.info.GetDocStoreSegment().Equals(currentDocStoreSegment); if (poolReaders && mergedSegmentWarmer != null && canWarm) { // Load terms index & doc stores so the segment // warmer can run searches, load documents/term // vectors termsIndexDivisor = readerTermsIndexDivisor; loadDocStores = true; } else { termsIndexDivisor = -1; loadDocStores = false; } // TODO: in the non-realtime case, we may want to only // keep deletes (it's costly to open entire reader // when we just need deletes) SegmentReader mergedReader = readerPool.Get(merge.info, loadDocStores, BufferedIndexInput.BUFFER_SIZE, termsIndexDivisor); try { if (poolReaders && mergedSegmentWarmer != null) { mergedSegmentWarmer.Warm(mergedReader); } if (!CommitMerge(merge, merger, mergedDocCount, mergedReader)) { // commitMerge will return false if this merge was aborted return 0; } } finally { lock (this) { readerPool.Release(mergedReader); } } success = true; } finally { // Readers are already closed in commitMerge if we didn't hit // an exc: if (!success) { CloseMergeReaders(merge, true); } } merge.mergeDone = true; lock (mergeScheduler) { System.Threading.Monitor.PulseAll(mergeScheduler); } // Force a sync after commiting the merge. Once this // sync completes then all index files referenced by the // current segmentInfos are on stable storage so if the // OS/machine crashes, or power cord is yanked, the // index will be intact. Note that this is just one // (somewhat arbitrary) policy; we could try other // policies like only sync if it's been > X minutes or // more than Y bytes have been written, etc. if (autoCommit) { long size; lock (this) { size = merge.info.SizeInBytes(); } Commit(size); } return mergedDocCount; }
/* FIXME if we want to support non-contiguous segment merges */ private bool CommitMerge(MergePolicy.OneMerge merge, SegmentMerger merger, int mergedDocCount, SegmentReader mergedReader) { lock (this) { System.Diagnostics.Debug.Assert(TestPoint("startCommitMerge")); if (hitOOM) { throw new System.SystemException("this writer hit an OutOfMemoryError; cannot complete merge"); } if (infoStream != null) Message("commitMerge: " + merge.SegString(directory) + " index=" + SegString()); System.Diagnostics.Debug.Assert(merge.registerDone); // If merge was explicitly aborted, or, if rollback() or // rollbackTransaction() had been called since our merge // started (which results in an unqualified // deleter.refresh() call that will remove any index // file that current segments does not reference), we // abort this merge if (merge.IsAborted()) { if (infoStream != null) Message("commitMerge: skipping merge " + merge.SegString(directory) + ": it was aborted"); return false; } int start = EnsureContiguousMerge(merge); CommitMergedDeletes(merge, mergedReader); docWriter.RemapDeletes(segmentInfos, merger.GetDocMaps(), merger.GetDelCounts(), merge, mergedDocCount); // If the doc store we are using has been closed and // is in now compound format (but wasn't when we // started), then we will switch to the compound // format as well: SetMergeDocStoreIsCompoundFile(merge); merge.info.SetHasProx(merger.HasProx()); ((System.Collections.IList) ((System.Collections.ArrayList) segmentInfos).GetRange(start, start + merge.segments.Count - start)).Clear(); System.Diagnostics.Debug.Assert(!segmentInfos.Contains(merge.info)); segmentInfos.Insert(start, merge.info); CloseMergeReaders(merge, false); // Must note the change to segmentInfos so any commits // in-flight don't lose it: Checkpoint(); // If the merged segments had pending changes, clear // them so that they don't bother writing them to // disk, updating SegmentInfo, etc.: readerPool.Clear(merge.segments); if (merge.optimize) { // cascade the optimize: segmentsToOptimize[merge.info] = merge.info; } return true; } }
/// <summary> Merges the indicated segments, replacing them in the stack with a /// single segment. /// </summary> internal void Merge(MergePolicy.OneMerge merge) { bool success = false; try { try { try { MergeInit(merge); if (infoStream != null) { Message("now merge\n merge=" + merge.SegString(directory) + "\n merge=" + merge + "\n index=" + SegString()); } MergeMiddle(merge); MergeSuccess(merge); success = true; } catch (System.Exception t) { HandleMergeException(t, merge); } } finally { lock (this) { MergeFinish(merge); if (!success) { if (infoStream != null) Message("hit exception during merge"); if (merge.info != null && !segmentInfos.Contains(merge.info)) deleter.Refresh(merge.info.name); } // This merge (and, generally, any change to the // segments) may now enable new merges, so we call // merge policy & update pending merges. if (success && !merge.IsAborted() && !closed && !closing) UpdatePendingMerges(merge.maxNumSegmentsOptimize, merge.optimize); } } } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "merge"); } }
/* FIXME if we want to support non-contiguous segment merges */ private bool CommitMerge(MergePolicy.OneMerge merge, SegmentMerger merger, int mergedDocCount) { lock (this) { System.Diagnostics.Debug.Assert(TestPoint("startCommitMerge")); if (hitOOM) return false; if (infoStream != null) Message("commitMerge: " + merge.SegString(directory) + " index=" + SegString()); System.Diagnostics.Debug.Assert(merge.registerDone); // If merge was explicitly aborted, or, if rollback() or // RollbackTransaction() had been called since our merge // started (which results in an unqualified // deleter.Refresh() call that will remove any index // file that current segments does not reference), we // abort this merge if (merge.IsAborted()) { if (infoStream != null) Message("commitMerge: skipping merge " + merge.SegString(directory) + ": it was aborted"); deleter.Refresh(merge.info.name); return false; } int start = EnsureContiguousMerge(merge); CommitMergedDeletes(merge); docWriter.RemapDeletes(segmentInfos, merger.GetDocMaps(), merger.GetDelCounts(), merge, mergedDocCount); // Simple optimization: if the doc store we are using // has been closed and is in now compound format (but // wasn't when we started), then we will switch to the // compound format as well: string mergeDocStoreSegment = merge.info.GetDocStoreSegment(); if (mergeDocStoreSegment != null && !merge.info.GetDocStoreIsCompoundFile()) { int size = segmentInfos.Count; for (int i = 0; i < size; i++) { SegmentInfo info = segmentInfos.Info(i); string docStoreSegment = info.GetDocStoreSegment(); if (docStoreSegment != null && docStoreSegment.Equals(mergeDocStoreSegment) && info.GetDocStoreIsCompoundFile()) { merge.info.SetDocStoreIsCompoundFile(true); break; } } } merge.info.SetHasProx(merger.HasProx()); //segmentInfos.RemoveRange(start, start + merge.segments.Count); segmentInfos.RemoveRange(start, merge.segments.Count); System.Diagnostics.Debug.Assert(!segmentInfos.Contains(merge.info)); segmentInfos.Insert(start, merge.info); // Must checkpoint before decrefing so any newly // referenced files in the new merge.info are incref'd // first: Checkpoint(); DecrefMergeSegments(merge); if (merge.optimize) segmentsToOptimize[merge.info] = merge.info; return true; } }
/** Does the actual (time-consuming) work of the merge, * but without holding synchronized lock on IndexWriter * instance */ private int MergeMiddle(MergePolicy.OneMerge merge) { merge.CheckAborted(directory); string mergedName = merge.info.name; SegmentMerger merger = null; int mergedDocCount = 0; SegmentInfos sourceSegments = merge.segments; SegmentInfos sourceSegmentsClone = merge.segmentsClone; int numSegments = sourceSegments.Count; if (infoStream != null) Message("merging " + merge.SegString(directory)); merger = new SegmentMerger(this, mergedName, merge); bool success = false; // This is try/finally to make sure merger's readers are // closed: try { int totDocCount = 0; for (int i = 0; i < numSegments; i++) { SegmentInfo si = sourceSegmentsClone.Info(i); IndexReader reader = SegmentReader.Get(true, si, MERGE_READ_BUFFER_SIZE, merge.mergeDocStores); // no need to set deleter (yet) merger.Add(reader); totDocCount += reader.NumDocs(); } if (infoStream != null) { Message("merge: total " + totDocCount + " docs"); } merge.CheckAborted(directory); // This is where all the work happens: mergedDocCount = merge.info.docCount = merger.Merge(merge.mergeDocStores); System.Diagnostics.Debug.Assert(mergedDocCount == totDocCount); success = true; } finally { // close readers before we attempt to delete // now-obsolete segments if (merger != null) { merger.CloseReaders(); } } if (!CommitMerge(merge, merger, mergedDocCount)) // commitMerge will return false if this merge was aborted return 0; if (merge.useCompoundFile) { // Maybe force a sync here to allow reclaiming of the // disk space used by the segments we just merged: if (autoCommit && DoCommitBeforeMergeCFS(merge)) { long size; lock (this) { size = merge.info.SizeInBytes(); } Commit(size); } success = false; string compoundFileName = mergedName + "." + IndexFileNames.COMPOUND_FILE_EXTENSION; try { merger.CreateCompoundFile(compoundFileName); success = true; } catch (System.IO.IOException ioe) { lock (this) { if (merge.IsAborted()) { // This can happen if rollback or close(false) // is called -- fall through to logic below to // remove the partially created CFS: success = true; } else HandleMergeException(ioe, merge); } } catch (System.Exception t) { HandleMergeException(t, merge); } finally { if (!success) { if (infoStream != null) Message("hit exception creating compound file during merge"); lock (this) { deleter.DeleteFile(compoundFileName); } } } if (merge.IsAborted()) { if (infoStream != null) Message("abort merge after building CFS"); deleter.DeleteFile(compoundFileName); return 0; } lock (this) { if (segmentInfos.IndexOf(merge.info) == -1 || merge.IsAborted()) { // Our segment (committed in non-compound // format) got merged away while we were // building the compound format. deleter.DeleteFile(compoundFileName); } else { merge.info.SetUseCompoundFile(true); Checkpoint(); } } } // Force a sync after commiting the merge. Once this // sync completes then all index files referenced by the // current segmentInfos are on stable storage so if the // OS/machine crashes, or power cord is yanked, the // index will be intact. Note that this is just one // (somewhat arbitrary) policy; we could try other // policies like only sync if it's been > X minutes or // more than Y bytes have been written, etc. if (autoCommit) { long size; lock (this) { size = merge.info.SizeInBytes(); } Commit(size); } return mergedDocCount; }
/// <summary>Does the actual (time-consuming) work of the merge, /// but without holding synchronized lock on IndexWriter /// instance /// </summary> private int MergeMiddle(MergePolicy.OneMerge merge) { merge.CheckAborted(directory); System.String mergedName = merge.info.name; SegmentMerger merger = null; int mergedDocCount = 0; SegmentInfos sourceSegments = merge.segments; int numSegments = sourceSegments.Count; if (infoStream != null) Message("merging " + merge.SegString(directory)); merger = new SegmentMerger(this, mergedName, merge); merge.readers = new SegmentReader[numSegments]; merge.readersClone = new SegmentReader[numSegments]; bool mergeDocStores = false; System.Collections.Hashtable dss = new System.Collections.Hashtable(); // This is try/finally to make sure merger's readers are // closed: bool success = false; try { int totDocCount = 0; for (int i = 0; i < numSegments; i++) { SegmentInfo info = sourceSegments.Info(i); // Hold onto the "live" reader; we will use this to // commit merged deletes SegmentReader reader = merge.readers[i] = readerPool.Get(info, merge.mergeDocStores, MERGE_READ_BUFFER_SIZE, - 1); // We clone the segment readers because other // deletes may come in while we're merging so we // need readers that will not change SegmentReader clone = merge.readersClone[i] = (SegmentReader) reader.Clone(true); merger.Add(clone); if (clone.HasDeletions()) { mergeDocStores = true; } if (info.GetDocStoreOffset() != - 1) { dss[info.GetDocStoreSegment()] = info.GetDocStoreSegment(); } totDocCount += clone.NumDocs(); } if (infoStream != null) { Message("merge: total " + totDocCount + " docs"); } merge.CheckAborted(directory); // If deletions have arrived and it has now become // necessary to merge doc stores, go and open them: if (mergeDocStores && !merge.mergeDocStores) { merge.mergeDocStores = true; lock (this) { String key = docWriter.GetDocStoreSegment(); if (key!=null && dss.Contains(key)) { if (infoStream != null) Message("now flush at mergeMiddle"); DoFlush(true, false); } } for (int i = 0; i < numSegments; i++) { merge.readersClone[i].OpenDocStores(); } // Clear DSS lock (this) { merge.info.SetDocStore(- 1, null, false); } } // This is where all the work happens: mergedDocCount = merge.info.docCount = merger.Merge(merge.mergeDocStores); System.Diagnostics.Debug.Assert(mergedDocCount == totDocCount); // TODO: in the non-realtime case, we may want to only // keep deletes (it's costly to open entire reader // when we just need deletes) SegmentReader mergedReader = readerPool.Get(merge.info, false, BufferedIndexInput.BUFFER_SIZE, - 1); try { if (poolReaders && mergedSegmentWarmer != null) { mergedSegmentWarmer.Warm(mergedReader); } if (!CommitMerge(merge, merger, mergedDocCount, mergedReader)) // commitMerge will return false if this merge was aborted return 0; } finally { lock (this) { readerPool.Release(mergedReader); } } success = true; } finally { lock (this) { if (!success) { // Suppress any new exceptions so we throw the // original cause for (int i = 0; i < numSegments; i++) { if (merge.readers[i] != null) { try { readerPool.Release(merge.readers[i], true); } catch (System.Exception t) { } } if (merge.readersClone[i] != null) { try { merge.readersClone[i].Close(); } catch (System.Exception t) { } // This was a private clone and we had the only reference System.Diagnostics.Debug.Assert(merge.readersClone[i].GetRefCount() == 0); } } } else { for (int i = 0; i < numSegments; i++) { if (merge.readers[i] != null) { readerPool.Release(merge.readers[i], true); } if (merge.readersClone[i] != null) { merge.readersClone[i].Close(); // This was a private clone and we had the only reference System.Diagnostics.Debug.Assert(merge.readersClone[i].GetRefCount() == 0); } } } } } // Must checkpoint before decrefing so any newly // referenced files in the new merge.info are incref'd // first: lock (this) { deleter.Checkpoint(segmentInfos, false); } DecrefMergeSegments(merge); if (merge.useCompoundFile) { // Maybe force a sync here to allow reclaiming of the // disk space used by the segments we just merged: if (autoCommit && DoCommitBeforeMergeCFS(merge)) { long size; lock (this) { size = merge.info.SizeInBytes(); } Commit(size); } success = false; System.String compoundFileName = mergedName + "." + IndexFileNames.COMPOUND_FILE_EXTENSION; try { merger.CreateCompoundFile(compoundFileName); success = true; } catch (System.IO.IOException ioe) { lock (this) { if (merge.IsAborted()) { // This can happen if rollback or close(false) // is called -- fall through to logic below to // remove the partially created CFS: success = true; } else HandleMergeException(ioe, merge); } } catch (System.Exception t) { HandleMergeException(t, merge); } finally { if (!success) { if (infoStream != null) Message("hit exception creating compound file during merge"); lock (this) { deleter.DeleteFile(compoundFileName); } } } if (merge.IsAborted()) { if (infoStream != null) Message("abort merge after building CFS"); deleter.DeleteFile(compoundFileName); return 0; } lock (this) { if (segmentInfos.IndexOf(merge.info) == - 1 || merge.IsAborted()) { // Our segment (committed in non-compound // format) got merged away while we were // building the compound format. deleter.DeleteFile(compoundFileName); } else { merge.info.SetUseCompoundFile(true); Checkpoint(); } } } // Force a sync after commiting the merge. Once this // sync completes then all index files referenced by the // current segmentInfos are on stable storage so if the // OS/machine crashes, or power cord is yanked, the // index will be intact. Note that this is just one // (somewhat arbitrary) policy; we could try other // policies like only sync if it's been > X minutes or // more than Y bytes have been written, etc. if (autoCommit) { long size; lock (this) { size = merge.info.SizeInBytes(); } Commit(size); } return mergedDocCount; }
/// <summary>Does the actual (time-consuming) work of the merge, /// but without holding synchronized lock on IndexWriter /// instance /// </summary> private int MergeMiddle(MergePolicy.OneMerge merge) { merge.CheckAborted(directory); System.String mergedName = merge.info.name; SegmentMerger merger = null; int mergedDocCount = 0; SegmentInfos sourceSegments = merge.segments; SegmentInfos sourceSegmentsClone = merge.segmentsClone; int numSegments = sourceSegments.Count; if (infoStream != null) Message("merging " + merge.SegString(directory)); merger = new SegmentMerger(this, mergedName, merge); // This is try/finally to make sure merger's readers are // closed: bool success = false; try { int totDocCount = 0; for (int i = 0; i < numSegments; i++) { SegmentInfo si = sourceSegmentsClone.Info(i); IndexReader reader = SegmentReader.Get(si, MERGE_READ_BUFFER_SIZE, merge.mergeDocStores); // no need to set deleter (yet) merger.Add(reader); totDocCount += reader.NumDocs(); } if (infoStream != null) { Message("merge: total " + totDocCount + " docs"); } merge.CheckAborted(directory); mergedDocCount = merge.info.docCount = merger.Merge(merge.mergeDocStores); System.Diagnostics.Debug.Assert(mergedDocCount == totDocCount); success = true; } finally { // close readers before we attempt to delete // now-obsolete segments if (merger != null) { merger.CloseReaders(); } if (!success) { if (infoStream != null) Message("hit exception during merge; now refresh deleter on segment " + mergedName); lock (this) { AddMergeException(merge); deleter.Refresh(mergedName); } } } if (!CommitMerge(merge)) // commitMerge will return false if this merge was aborted return 0; if (merge.useCompoundFile) { success = false; bool skip = false; System.String compoundFileName = mergedName + "." + IndexFileNames.COMPOUND_FILE_EXTENSION; try { try { merger.CreateCompoundFile(compoundFileName); success = true; } catch (System.IO.IOException ioe) { lock (this) { if (segmentInfos.IndexOf(merge.info) == - 1) { // If another merge kicked in and merged our // new segment away while we were trying to // build the compound file, we can hit a // FileNotFoundException and possibly // IOException over NFS. We can tell this has // happened because our SegmentInfo is no // longer in the segments; if this has // happened it is safe to ignore the exception // & skip finishing/committing our compound // file creating. if (infoStream != null) Message("hit exception creating compound file; ignoring it because our info (segment " + merge.info.name + ") has been merged away"); skip = true; } else throw ioe; } } } finally { if (!success) { if (infoStream != null) Message("hit exception creating compound file during merge: skip=" + skip); lock (this) { if (!skip) AddMergeException(merge); deleter.DeleteFile(compoundFileName); } } } if (!skip) { lock (this) { if (skip || segmentInfos.IndexOf(merge.info) == - 1 || merge.IsAborted()) { // Our segment (committed in non-compound // format) got merged away while we were // building the compound format. deleter.DeleteFile(compoundFileName); } else { success = false; try { merge.info.SetUseCompoundFile(true); Checkpoint(); success = true; } finally { if (!success) { if (infoStream != null) Message("hit exception checkpointing compound file during merge"); // Must rollback: AddMergeException(merge); merge.info.SetUseCompoundFile(false); DeletePartialSegmentsFile(); deleter.DeleteFile(compoundFileName); } } // Give deleter a chance to remove files now. deleter.Checkpoint(segmentInfos, autoCommit); } } } } return mergedDocCount; }
internal void _MergeInit(MergePolicy.OneMerge merge) { lock (this) { System.Diagnostics.Debug.Assert(TestPoint("startMergeInit")); System.Diagnostics.Debug.Assert(merge.registerDone); if (merge.info != null) // mergeInit already done return; if (merge.IsAborted()) return ; SegmentInfos sourceSegments = merge.segments; int end = sourceSegments.Count; EnsureContiguousMerge(merge); // Check whether this merge will allow us to skip // merging the doc stores (stored field & vectors). // This is a very substantial optimization (saves tons // of IO) that can only be applied with // autoCommit=false. Directory lastDir = directory; System.String lastDocStoreSegment = null; int next = - 1; bool mergeDocStores = false; bool doFlushDocStore = false; System.String currentDocStoreSegment = docWriter.GetDocStoreSegment(); // Test each segment to be merged: check if we need to // flush/merge doc stores for (int i = 0; i < end; i++) { SegmentInfo si = sourceSegments.Info(i); // If it has deletions we must merge the doc stores if (si.HasDeletions()) mergeDocStores = true; // If it has its own (private) doc stores we must // merge the doc stores if (- 1 == si.GetDocStoreOffset()) mergeDocStores = true; // If it has a different doc store segment than // previous segments, we must merge the doc stores System.String docStoreSegment = si.GetDocStoreSegment(); if (docStoreSegment == null) mergeDocStores = true; else if (lastDocStoreSegment == null) lastDocStoreSegment = docStoreSegment; else if (!lastDocStoreSegment.Equals(docStoreSegment)) mergeDocStores = true; // Segments' docScoreOffsets must be in-order, // contiguous. For the default merge policy now // this will always be the case but for an arbitrary // merge policy this may not be the case if (- 1 == next) next = si.GetDocStoreOffset() + si.docCount; else if (next != si.GetDocStoreOffset()) mergeDocStores = true; else next = si.GetDocStoreOffset() + si.docCount; // If the segment comes from a different directory // we must merge if (lastDir != si.dir) mergeDocStores = true; // If the segment is referencing the current "live" // doc store outputs then we must merge if (si.GetDocStoreOffset() != - 1 && currentDocStoreSegment != null && si.GetDocStoreSegment().Equals(currentDocStoreSegment)) doFlushDocStore = true; } int docStoreOffset; System.String docStoreSegment2; bool docStoreIsCompoundFile; if (mergeDocStores) { docStoreOffset = - 1; docStoreSegment2 = null; docStoreIsCompoundFile = false; } else { SegmentInfo si = sourceSegments.Info(0); docStoreOffset = si.GetDocStoreOffset(); docStoreSegment2 = si.GetDocStoreSegment(); docStoreIsCompoundFile = si.GetDocStoreIsCompoundFile(); } if (mergeDocStores && doFlushDocStore) { // SegmentMerger intends to merge the doc stores // (stored fields, vectors), and at least one of the // segments to be merged refers to the currently // live doc stores. // TODO: if we know we are about to merge away these // newly flushed doc store files then we should not // make compound file out of them... if (infoStream != null) Message("flush at merge"); Flush(false, true); } // We must take a full copy at this point so that we can // properly merge deletes in commitMerge() merge.segmentsClone = (SegmentInfos) merge.segments.Clone(); for (int i = 0; i < end; i++) { SegmentInfo si = merge.segmentsClone.Info(i); // IncRef all files for this segment info to make sure // they are not removed while we are trying to merge. if (si.dir == directory) deleter.IncRef(si.Files()); } merge.increfDone = true; merge.mergeDocStores = mergeDocStores; // Bind a new segment name here so even with // ConcurrentMergePolicy we keep deterministic segment // names. merge.info = new SegmentInfo(NewSegmentName(), 0, directory, false, true, docStoreOffset, docStoreSegment2, docStoreIsCompoundFile); // Also enroll the merged segment into mergingSegments; // this prevents it from getting selected for a merge // after our merge is done but while we are building the // CFS: mergingSegments.Add(merge.info, merge.info); } }
/// <summary> Merges the indicated segments, replacing them in the stack with a /// single segment. /// </summary> public /*internal*/ void Merge(MergePolicy.OneMerge merge) { System.Diagnostics.Debug.Assert(merge.registerDone); System.Diagnostics.Debug.Assert(!merge.optimize || merge.maxNumSegmentsOptimize > 0); bool success = false; try { try { try { if (merge.info == null) MergeInit(merge); if (infoStream != null) Message("now merge\n merge=" + merge.SegString(directory) + "\n index=" + SegString()); MergeMiddle(merge); success = true; } catch (MergePolicy.MergeAbortedException e) { merge.SetException(e); AddMergeException(merge); // We can ignore this exception, unless the merge // involves segments from external directories, in // which case we must throw it so, for example, the // rollbackTransaction code in addIndexes* is // executed. if (merge.isExternal) throw e; } } finally { lock (this) { try { MergeFinish(merge); if (!success) { if (infoStream != null) Message("hit exception during merge"); AddMergeException(merge); if (merge.info != null && !segmentInfos.Contains(merge.info)) deleter.Refresh(merge.info.name); } // This merge (and, generally, any change to the // segments) may now enable new merges, so we call // merge policy & update pending merges. if (success && !merge.IsAborted() && !closed && !closing) UpdatePendingMerges(merge.maxNumSegmentsOptimize, merge.optimize); } finally { runningMerges.Remove(merge); // Optimize may be waiting on the final optimize // merge to finish; and finishMerges() may be // waiting for all merges to finish: System.Threading.Monitor.PulseAll(this); } } } } catch (OutOfMemoryException oom) { hitOOM = true; throw oom; } }
/* FIXME if we want to support non-contiguous segment merges */ private bool CommitMerge(MergePolicy.OneMerge merge) { lock (this) { System.Diagnostics.Debug.Assert(merge.registerDone); if (hitOOM) return false; if (infoStream != null) Message("CommitMerge: " + merge.SegString(directory)); // If merge was explicitly aborted, or, if abort() or // rollbackTransaction() had been called since our merge // started (which results in an unqualified // deleter.refresh() call that will remove any index // file that current segments does not reference), we // abort this merge if (merge.IsAborted()) { if (infoStream != null) Message("commitMerge: skipping merge " + merge.SegString(directory) + ": it was aborted"); System.Diagnostics.Debug.Assert(merge.increfDone); DecrefMergeSegments(merge); deleter.Refresh(merge.info.name); return false; } bool success = false; int start; try { SegmentInfos sourceSegmentsClone = merge.segmentsClone; SegmentInfos sourceSegments = merge.segments; start = EnsureContiguousMerge(merge); if (infoStream != null) Message("commitMerge " + merge.SegString(directory)); // Carefully merge deletes that occurred after we // started merging: BitVector deletes = null; int docUpto = 0; int numSegmentsToMerge = sourceSegments.Count; for (int i = 0; i < numSegmentsToMerge; i++) { SegmentInfo previousInfo = sourceSegmentsClone.Info(i); SegmentInfo currentInfo = sourceSegments.Info(i); System.Diagnostics.Debug.Assert(currentInfo.docCount == previousInfo.docCount); int docCount = currentInfo.docCount; if (previousInfo.HasDeletions()) { // There were deletes on this segment when the merge // started. The merge has collapsed away those // deletes, but, if new deletes were flushed since // the merge started, we must now carefully keep any // newly flushed deletes but mapping them to the new // docIDs. System.Diagnostics.Debug.Assert(currentInfo.HasDeletions()); // Load deletes present @ start of merge, for this segment: BitVector previousDeletes = new BitVector(previousInfo.dir, previousInfo.GetDelFileName()); if (!currentInfo.GetDelFileName().Equals(previousInfo.GetDelFileName())) { // This means this segment has had new deletes // committed since we started the merge, so we // must merge them: if (deletes == null) deletes = new BitVector(merge.info.docCount); BitVector currentDeletes = new BitVector(currentInfo.dir, currentInfo.GetDelFileName()); for (int j = 0; j < docCount; j++) { if (previousDeletes.Get(j)) System.Diagnostics.Debug.Assert(currentDeletes.Get(j)); else { if (currentDeletes.Get(j)) deletes.Set(docUpto); docUpto++; } } } else docUpto += docCount - previousDeletes.Count(); } else if (currentInfo.HasDeletions()) { // This segment had no deletes before but now it // does: if (deletes == null) deletes = new BitVector(merge.info.docCount); BitVector currentDeletes = new BitVector(directory, currentInfo.GetDelFileName()); for (int j = 0; j < docCount; j++) { if (currentDeletes.Get(j)) deletes.Set(docUpto); docUpto++; } } // No deletes before or after else docUpto += currentInfo.docCount; merge.CheckAborted(directory); } if (deletes != null) { merge.info.AdvanceDelGen(); deletes.Write(directory, merge.info.GetDelFileName()); } success = true; } finally { if (!success) { if (infoStream != null) Message("hit exception creating merged deletes file"); deleter.Refresh(merge.info.name); } } // Simple optimization: if the doc store we are using // has been closed and is in now compound format (but // wasn't when we started), then we will switch to the // compound format as well: System.String mergeDocStoreSegment = merge.info.GetDocStoreSegment(); if (mergeDocStoreSegment != null && !merge.info.GetDocStoreIsCompoundFile()) { int size = segmentInfos.Count; for (int i = 0; i < size; i++) { SegmentInfo info = segmentInfos.Info(i); System.String docStoreSegment = info.GetDocStoreSegment(); if (docStoreSegment != null && docStoreSegment.Equals(mergeDocStoreSegment) && info.GetDocStoreIsCompoundFile()) { merge.info.SetDocStoreIsCompoundFile(true); break; } } } success = false; SegmentInfos rollback = null; try { rollback = (SegmentInfos) segmentInfos.Clone(); ((System.Collections.IList) ((System.Collections.ArrayList) segmentInfos).GetRange(start, start + merge.segments.Count - start)).Clear(); segmentInfos.Insert(start, merge.info); Checkpoint(); success = true; } finally { if (!success && rollback != null) { if (infoStream != null) Message("hit exception when checkpointing after merge"); segmentInfos.Clear(); segmentInfos.AddRange(rollback); DeletePartialSegmentsFile(); deleter.Refresh(merge.info.name); } } if (merge.optimize) segmentsToOptimize.Add(merge.info, merge.info); // Must checkpoint before decrefing so any newly // referenced files in the new merge.info are incref'd // first: deleter.Checkpoint(segmentInfos, autoCommit); DecrefMergeSegments(merge); return true; } }