// note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!! public SegmentMerger(IList<AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir, int termIndexInterval, MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context, bool validate) { // validate incoming readers if (validate) { foreach (AtomicReader reader in readers) { reader.CheckIntegrity(); } } MergeState = new MergeState(readers, segmentInfo, infoStream, checkAbort); Directory = dir; this.TermIndexInterval = termIndexInterval; this.Codec = segmentInfo.Codec; this.Context = context; this.FieldInfosBuilder = new FieldInfos.Builder(fieldNumbers); MergeState.SegmentInfo.DocCount = SetDocMaps(); }
public override int Merge(MergeState mergeState) { int docCount = 0; int idx = 0; foreach (AtomicReader reader in mergeState.Readers) { SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++]; CompressingStoredFieldsReader matchingFieldsReader = null; if (matchingSegmentReader != null) { StoredFieldsReader fieldsReader = matchingSegmentReader.FieldsReader; // we can only bulk-copy if the matching reader is also a CompressingStoredFieldsReader if (fieldsReader != null && fieldsReader is CompressingStoredFieldsReader) { matchingFieldsReader = (CompressingStoredFieldsReader)fieldsReader; } } int maxDoc = reader.MaxDoc(); Bits liveDocs = reader.LiveDocs; if (matchingFieldsReader == null || matchingFieldsReader.Version != VERSION_CURRENT || matchingFieldsReader.CompressionMode != CompressionMode || matchingFieldsReader.ChunkSize != ChunkSize) // the way data is decompressed depends on the chunk size - means reader version is not the same as the writer version { // naive merge... for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc)) { Document doc = reader.Document(i); AddDocument(doc, mergeState.FieldInfos); ++docCount; mergeState.checkAbort.Work(300); } } else { int docID = NextLiveDoc(0, liveDocs, maxDoc); if (docID < maxDoc) { // not all docs were deleted CompressingStoredFieldsReader.ChunkIterator it = matchingFieldsReader.GetChunkIterator(docID); int[] startOffsets = new int[0]; do { // go to the next chunk that contains docID it.Next(docID); // transform lengths into offsets if (startOffsets.Length < it.ChunkDocs) { startOffsets = new int[ArrayUtil.Oversize(it.ChunkDocs, 4)]; } for (int i = 1; i < it.ChunkDocs; ++i) { startOffsets[i] = startOffsets[i - 1] + it.Lengths[i - 1]; } if (NumBufferedDocs == 0 && startOffsets[it.ChunkDocs - 1] < ChunkSize && startOffsets[it.ChunkDocs - 1] + it.Lengths[it.ChunkDocs - 1] >= ChunkSize && NextDeletedDoc(it.DocBase, liveDocs, it.DocBase + it.ChunkDocs) == it.DocBase + it.ChunkDocs) // no deletion in the chunk - chunk is large enough - chunk is small enough - starting a new chunk { Debug.Assert(docID == it.DocBase); // no need to decompress, just copy data IndexWriter.WriteIndex(it.ChunkDocs, FieldsStream.FilePointer); WriteHeader(this.DocBase, it.ChunkDocs, it.NumStoredFields, it.Lengths); it.CopyCompressedData(FieldsStream); this.DocBase += it.ChunkDocs; docID = NextLiveDoc(it.DocBase + it.ChunkDocs, liveDocs, maxDoc); docCount += it.ChunkDocs; mergeState.checkAbort.Work(300 * it.ChunkDocs); } else { // decompress it.Decompress(); if (startOffsets[it.ChunkDocs - 1] + it.Lengths[it.ChunkDocs - 1] != it.Bytes.Length) { throw new CorruptIndexException("Corrupted: expected chunk size=" + startOffsets[it.ChunkDocs - 1] + it.Lengths[it.ChunkDocs - 1] + ", got " + it.Bytes.Length); } // copy non-deleted docs for (; docID < it.DocBase + it.ChunkDocs; docID = NextLiveDoc(docID + 1, liveDocs, maxDoc)) { int diff = docID - it.DocBase; StartDocument(it.NumStoredFields[diff]); BufferedDocs.WriteBytes(it.Bytes.Bytes, it.Bytes.Offset + startOffsets[diff], it.Lengths[diff]); FinishDocument(); ++docCount; mergeState.checkAbort.Work(300); } } } while (docID < maxDoc); it.CheckIntegrity(); } } } Finish(mergeState.FieldInfos, docCount); return docCount; }
/// <summary> /// Default merge impl: append documents, mapping around /// deletes /// </summary> public virtual TermStats Merge(MergeState mergeState, FieldInfo.IndexOptions? indexOptions, DocsEnum postings, FixedBitSet visitedDocs) { int df = 0; long totTF = 0; if (indexOptions == FieldInfo.IndexOptions.DOCS_ONLY) { while (true) { int doc = postings.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } visitedDocs.Set(doc); this.StartDoc(doc, -1); this.FinishDoc(); df++; } totTF = -1; } else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS) { while (true) { int doc = postings.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } visitedDocs.Set(doc); int freq = postings.Freq(); this.StartDoc(doc, freq); this.FinishDoc(); df++; totTF += freq; } } else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { var postingsEnum = (DocsAndPositionsEnum)postings; while (true) { int doc = postingsEnum.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } visitedDocs.Set(doc); int freq = postingsEnum.Freq(); this.StartDoc(doc, freq); totTF += freq; for (int i = 0; i < freq; i++) { int position = postingsEnum.NextPosition(); BytesRef payload = postingsEnum.Payload; this.AddPosition(position, payload, -1, -1); } this.FinishDoc(); df++; } } else { Debug.Assert(indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); var postingsEnum = (DocsAndPositionsEnum)postings; while (true) { int doc = postingsEnum.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } visitedDocs.Set(doc); int freq = postingsEnum.Freq(); this.StartDoc(doc, freq); totTF += freq; for (int i = 0; i < freq; i++) { int position = postingsEnum.NextPosition(); BytesRef payload = postingsEnum.Payload; this.AddPosition(position, payload, postingsEnum.StartOffset(), postingsEnum.EndOffset()); } this.FinishDoc(); df++; } } return new TermStats(df, indexOptions == FieldInfo.IndexOptions.DOCS_ONLY ? -1 : totTF); }
private bool CommitMerge(MergePolicy.OneMerge merge, MergeState mergeState) { lock (this) { Debug.Assert(TestPoint("startCommitMerge")); if (HitOOM) { throw new InvalidOperationException("this writer hit an OutOfMemoryError; cannot complete merge"); } if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "commitMerge: " + SegString(merge.Segments) + " index=" + SegString()); } Debug.Assert(merge.RegisterDone); // If merge was explicitly aborted, or, if rollback() or // rollbackTransaction() had been called since our merge // started (which results in an unqualified // deleter.refresh() call that will remove any index // file that current segments does not reference), we // abort this merge if (merge.Aborted) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "commitMerge: skip: it was aborted"); } // In case we opened and pooled a reader for this // segment, drop it now. this ensures that we close // the reader before trying to delete any of its // files. this is not a very big deal, since this // reader will never be used by any NRT reader, and // another thread is currently running close(false) // so it will be dropped shortly anyway, but not // doing this makes MockDirWrapper angry in // TestNRTThreads (LUCENE-5434): readerPool.Drop(merge.Info_Renamed); Deleter.DeleteNewFiles(merge.Info_Renamed.Files()); return false; } ReadersAndUpdates mergedUpdates = merge.Info_Renamed.Info.DocCount == 0 ? null : CommitMergedDeletesAndUpdates(merge, mergeState); // System.out.println("[" + Thread.currentThread().getName() + "] IW.commitMerge: mergedDeletes=" + mergedDeletes); // If the doc store we are using has been closed and // is in now compound format (but wasn't when we // started), then we will switch to the compound // format as well: Debug.Assert(!segmentInfos.Contains(merge.Info_Renamed)); bool allDeleted = merge.Segments.Count == 0 || merge.Info_Renamed.Info.DocCount == 0 || (mergedUpdates != null && mergedUpdates.PendingDeleteCount == merge.Info_Renamed.Info.DocCount); if (infoStream.IsEnabled("IW")) { if (allDeleted) { infoStream.Message("IW", "merged segment " + merge.Info_Renamed + " is 100% deleted" + (KeepFullyDeletedSegments_Renamed ? "" : "; skipping insert")); } } bool dropSegment = allDeleted && !KeepFullyDeletedSegments_Renamed; // If we merged no segments then we better be dropping // the new segment: Debug.Assert(merge.Segments.Count > 0 || dropSegment); Debug.Assert(merge.Info_Renamed.Info.DocCount != 0 || KeepFullyDeletedSegments_Renamed || dropSegment); if (mergedUpdates != null) { bool success = false; try { if (dropSegment) { mergedUpdates.DropChanges(); } // Pass false for assertInfoLive because the merged // segment is not yet live (only below do we commit it // to the segmentInfos): readerPool.Release(mergedUpdates, false); success = true; } finally { if (!success) { mergedUpdates.DropChanges(); readerPool.Drop(merge.Info_Renamed); } } } // Must do this after readerPool.release, in case an // exception is hit e.g. writing the live docs for the // merge segment, in which case we need to abort the // merge: segmentInfos.ApplyMergeChanges(merge, dropSegment); if (dropSegment) { Debug.Assert(!segmentInfos.Contains(merge.Info_Renamed)); readerPool.Drop(merge.Info_Renamed); Deleter.DeleteNewFiles(merge.Info_Renamed.Files()); } bool success_ = false; try { // Must close before checkpoint, otherwise IFD won't be // able to delete the held-open files from the merge // readers: CloseMergeReaders(merge, false); success_ = true; } finally { // Must note the change to segmentInfos so any commits // in-flight don't lose it (IFD will incRef/protect the // new files we created): if (success_) { Checkpoint(); } else { try { Checkpoint(); } catch (Exception) { // Ignore so we keep throwing original exception. } } } Deleter.DeletePendingFiles(); if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "after commitMerge: " + SegString()); } if (merge.MaxNumSegments != -1 && !dropSegment) { // cascade the forceMerge: if (!SegmentsToMerge.ContainsKey(merge.Info_Renamed)) { SegmentsToMerge[merge.Info_Renamed] = false; } } return true; } }
/// <summary> /// Does the actual (time-consuming) work of the merge, /// but without holding synchronized lock on IndexWriter /// instance /// </summary> private int MergeMiddle(MergePolicy.OneMerge merge) { merge.CheckAborted(directory); string mergedName = merge.Info_Renamed.Info.Name; IList<SegmentCommitInfo> sourceSegments = merge.Segments; IOContext context = new IOContext(merge.MergeInfo); MergeState.CheckAbort checkAbort = new MergeState.CheckAbort(merge, directory); TrackingDirectoryWrapper dirWrapper = new TrackingDirectoryWrapper(directory); if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "merging " + SegString(merge.Segments)); } merge.Readers = new List<SegmentReader>(); // this is try/finally to make sure merger's readers are // closed: bool success = false; try { int segUpto = 0; while (segUpto < sourceSegments.Count) { SegmentCommitInfo info = sourceSegments[segUpto]; // Hold onto the "live" reader; we will use this to // commit merged deletes ReadersAndUpdates rld = readerPool.Get(info, true); // Carefully pull the most recent live docs and reader SegmentReader reader; Bits liveDocs; int delCount; lock (this) { // Must sync to ensure BufferedDeletesStream cannot change liveDocs, // pendingDeleteCount and field updates while we pull a copy: reader = rld.GetReaderForMerge(context); liveDocs = rld.ReadOnlyLiveDocs; delCount = rld.PendingDeleteCount + info.DelCount; Debug.Assert(reader != null); Debug.Assert(rld.VerifyDocCounts()); if (infoStream.IsEnabled("IW")) { if (rld.PendingDeleteCount != 0) { infoStream.Message("IW", "seg=" + SegString(info) + " delCount=" + info.DelCount + " pendingDelCount=" + rld.PendingDeleteCount); } else if (info.DelCount != 0) { infoStream.Message("IW", "seg=" + SegString(info) + " delCount=" + info.DelCount); } else { infoStream.Message("IW", "seg=" + SegString(info) + " no deletes"); } } } // Deletes might have happened after we pulled the merge reader and // before we got a read-only copy of the segment's actual live docs // (taking pending deletes into account). In that case we need to // make a new reader with updated live docs and del count. if (reader.NumDeletedDocs() != delCount) { // fix the reader's live docs and del count Debug.Assert(delCount > reader.NumDeletedDocs()); // beware of zombies SegmentReader newReader = new SegmentReader(info, reader, liveDocs, info.Info.DocCount - delCount); bool released = false; try { rld.Release(reader); released = true; } finally { if (!released) { newReader.DecRef(); } } reader = newReader; } merge.Readers.Add(reader); Debug.Assert(delCount <= info.Info.DocCount, "delCount=" + delCount + " info.docCount=" + info.Info.DocCount + " rld.pendingDeleteCount=" + rld.PendingDeleteCount + " info.getDelCount()=" + info.DelCount); segUpto++; } // System.out.println("[" + Thread.currentThread().getName() + "] IW.mergeMiddle: merging " + merge.getMergeReaders()); // we pass merge.getMergeReaders() instead of merge.readers to allow the // OneMerge to return a view over the actual segments to merge SegmentMerger merger = new SegmentMerger(merge.MergeReaders, merge.Info_Renamed.Info, infoStream, dirWrapper, Config_Renamed.TermIndexInterval, checkAbort, GlobalFieldNumberMap, context, Config_Renamed.CheckIntegrityAtMerge); merge.CheckAborted(directory); // this is where all the work happens: MergeState mergeState; bool success3 = false; try { if (!merger.ShouldMerge()) { // would result in a 0 document segment: nothing to merge! mergeState = new MergeState(new List<AtomicReader>(), merge.Info_Renamed.Info, infoStream, checkAbort); } else { mergeState = merger.Merge(); } success3 = true; } finally { if (!success3) { lock (this) { Deleter.Refresh(merge.Info_Renamed.Info.Name); } } } Debug.Assert(mergeState.SegmentInfo == merge.Info_Renamed.Info); merge.Info_Renamed.Info.Files = new HashSet<string>(dirWrapper.CreatedFiles); // Record which codec was used to write the segment if (infoStream.IsEnabled("IW")) { if (merge.Info_Renamed.Info.DocCount == 0) { infoStream.Message("IW", "merge away fully deleted segments"); } else { infoStream.Message("IW", "merge codec=" + Codec + " docCount=" + merge.Info_Renamed.Info.DocCount + "; merged segment has " + (mergeState.FieldInfos.HasVectors() ? "vectors" : "no vectors") + "; " + (mergeState.FieldInfos.HasNorms() ? "norms" : "no norms") + "; " + (mergeState.FieldInfos.HasDocValues() ? "docValues" : "no docValues") + "; " + (mergeState.FieldInfos.HasProx() ? "prox" : "no prox") + "; " + (mergeState.FieldInfos.HasProx() ? "freqs" : "no freqs")); } } // Very important to do this before opening the reader // because codec must know if prox was written for // this segment: //System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" + merge.info.name); bool useCompoundFile; lock (this) // Guard segmentInfos { useCompoundFile = mergePolicy.UseCompoundFile(segmentInfos, merge.Info_Renamed); } if (useCompoundFile) { success = false; ICollection<string> filesToRemove = merge.Info_Renamed.Files(); try { filesToRemove = CreateCompoundFile(infoStream, directory, checkAbort, merge.Info_Renamed.Info, context); success = true; } catch (System.IO.IOException ioe) { lock (this) { if (merge.Aborted) { // this can happen if rollback or close(false) // is called -- fall through to logic below to // remove the partially created CFS: } else { HandleMergeException(ioe, merge); } } } catch (Exception t) { HandleMergeException(t, merge); } finally { if (!success) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "hit exception creating compound file during merge"); } lock (this) { Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_EXTENSION)); Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); Deleter.DeleteNewFiles(merge.Info_Renamed.Files()); } } } // So that, if we hit exc in deleteNewFiles (next) // or in commitMerge (later), we close the // per-segment readers in the finally clause below: success = false; lock (this) { // delete new non cfs files directly: they were never // registered with IFD Deleter.DeleteNewFiles(filesToRemove); if (merge.Aborted) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "abort merge after building CFS"); } Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_EXTENSION)); Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); return 0; } } merge.Info_Renamed.Info.UseCompoundFile = true; } else { // So that, if we hit exc in commitMerge (later), // we close the per-segment readers in the finally // clause below: success = false; } // Have codec write SegmentInfo. Must do this after // creating CFS so that 1) .si isn't slurped into CFS, // and 2) .si reflects useCompoundFile=true change // above: bool success2 = false; try { Codec.SegmentInfoFormat().SegmentInfoWriter.Write(directory, merge.Info_Renamed.Info, mergeState.FieldInfos, context); success2 = true; } finally { if (!success2) { lock (this) { Deleter.DeleteNewFiles(merge.Info_Renamed.Files()); } } } // TODO: ideally we would freeze merge.info here!! // because any changes after writing the .si will be // lost... if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", string.Format(CultureInfo.InvariantCulture, "merged segment size=%.3f MB vs estimate=%.3f MB", merge.Info_Renamed.SizeInBytes() / 1024.0 / 1024.0, merge.EstimatedMergeBytes / 1024 / 1024.0)); } IndexReaderWarmer mergedSegmentWarmer = Config_Renamed.MergedSegmentWarmer; if (PoolReaders && mergedSegmentWarmer != null && merge.Info_Renamed.Info.DocCount != 0) { ReadersAndUpdates rld = readerPool.Get(merge.Info_Renamed, true); SegmentReader sr = rld.GetReader(IOContext.READ); try { mergedSegmentWarmer.Warm(sr); } finally { lock (this) { rld.Release(sr); readerPool.Release(rld); } } } // Force READ context because we merge deletes onto // this reader: if (!CommitMerge(merge, mergeState)) { // commitMerge will return false if this merge was // aborted return 0; } success = true; } finally { // Readers are already closed in commitMerge if we didn't hit // an exc: if (!success) { CloseMergeReaders(merge, true); } } return merge.Info_Renamed.Info.DocCount; }
private void MaybeApplyMergedDVUpdates(MergePolicy.OneMerge merge, MergeState mergeState, int docUpto, MergedDeletesAndUpdates holder, string[] mergingFields, DocValuesFieldUpdates[] dvFieldUpdates, DocValuesFieldUpdates.Iterator[] updatesIters, int curDoc) { int newDoc = -1; for (int idx = 0; idx < mergingFields.Length; idx++) { DocValuesFieldUpdates.Iterator updatesIter = updatesIters[idx]; if (updatesIter.Doc() == curDoc) // document has an update { if (holder.MergedDeletesAndUpdates_Renamed == null) { holder.Init(readerPool, merge, mergeState, false); } if (newDoc == -1) // map once per all field updates, but only if there are any updates { newDoc = holder.DocMap.Map(docUpto); } DocValuesFieldUpdates dvUpdates = dvFieldUpdates[idx]; dvUpdates.Add(newDoc, updatesIter.Value()); updatesIter.NextDoc(); // advance to next document } else { Debug.Assert(updatesIter.Doc() > curDoc, "field=" + mergingFields[idx] + " updateDoc=" + updatesIter.Doc() + " curDoc=" + curDoc); } } }
/// <summary> /// Carefully merges deletes and updates for the segments we just merged. this /// is tricky because, although merging will clear all deletes (compacts the /// documents) and compact all the updates, new deletes and updates may have /// been flushed to the segments since the merge was started. this method /// "carries over" such new deletes and updates onto the newly merged segment, /// and saves the resulting deletes and updates files (incrementing the delete /// and DV generations for merge.info). If no deletes were flushed, no new /// deletes file is saved. /// </summary> private ReadersAndUpdates CommitMergedDeletesAndUpdates(MergePolicy.OneMerge merge, MergeState mergeState) { lock (this) { Debug.Assert(TestPoint("startCommitMergeDeletes")); IList<SegmentCommitInfo> sourceSegments = merge.Segments; if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "commitMergeDeletes " + SegString(merge.Segments)); } // Carefully merge deletes that occurred after we // started merging: int docUpto = 0; long minGen = long.MaxValue; // Lazy init (only when we find a delete to carry over): MergedDeletesAndUpdates holder = new MergedDeletesAndUpdates(); DocValuesFieldUpdates.Container mergedDVUpdates = new DocValuesFieldUpdates.Container(); for (int i = 0; i < sourceSegments.Count; i++) { SegmentCommitInfo info = sourceSegments[i]; minGen = Math.Min(info.BufferedDeletesGen, minGen); int docCount = info.Info.DocCount; Bits prevLiveDocs = merge.Readers[i].LiveDocs; ReadersAndUpdates rld = readerPool.Get(info, false); // We hold a ref so it should still be in the pool: Debug.Assert(rld != null, "seg=" + info.Info.Name); Bits currentLiveDocs = rld.LiveDocs; IDictionary<string, DocValuesFieldUpdates> mergingFieldUpdates = rld.MergingFieldUpdates; string[] mergingFields; DocValuesFieldUpdates[] dvFieldUpdates; DocValuesFieldUpdates.Iterator[] updatesIters; if (mergingFieldUpdates.Count == 0) { mergingFields = null; updatesIters = null; dvFieldUpdates = null; } else { mergingFields = new string[mergingFieldUpdates.Count]; dvFieldUpdates = new DocValuesFieldUpdates[mergingFieldUpdates.Count]; updatesIters = new DocValuesFieldUpdates.Iterator[mergingFieldUpdates.Count]; int idx = 0; foreach (KeyValuePair<string, DocValuesFieldUpdates> e in mergingFieldUpdates) { string field = e.Key; DocValuesFieldUpdates updates = e.Value; mergingFields[idx] = field; dvFieldUpdates[idx] = mergedDVUpdates.GetUpdates(field, updates.Type); if (dvFieldUpdates[idx] == null) { dvFieldUpdates[idx] = mergedDVUpdates.NewUpdates(field, updates.Type, mergeState.SegmentInfo.DocCount); } updatesIters[idx] = updates.GetIterator(); updatesIters[idx].NextDoc(); // advance to first update doc ++idx; } } // System.out.println("[" + Thread.currentThread().getName() + "] IW.commitMergedDeletes: info=" + info + ", mergingUpdates=" + mergingUpdates); if (prevLiveDocs != null) { // If we had deletions on starting the merge we must // still have deletions now: Debug.Assert(currentLiveDocs != null); Debug.Assert(prevLiveDocs.Length() == docCount); Debug.Assert(currentLiveDocs.Length() == docCount); // There were deletes on this segment when the merge // started. The merge has collapsed away those // deletes, but, if new deletes were flushed since // the merge started, we must now carefully keep any // newly flushed deletes but mapping them to the new // docIDs. // Since we copy-on-write, if any new deletes were // applied after merging has started, we can just // check if the before/after liveDocs have changed. // If so, we must carefully merge the liveDocs one // doc at a time: if (currentLiveDocs != prevLiveDocs) { // this means this segment received new deletes // since we started the merge, so we // must merge them: for (int j = 0; j < docCount; j++) { if (!prevLiveDocs.Get(j)) { Debug.Assert(!currentLiveDocs.Get(j)); } else { if (!currentLiveDocs.Get(j)) { if (holder.MergedDeletesAndUpdates_Renamed == null || !holder.InitializedWritableLiveDocs) { holder.Init(readerPool, merge, mergeState, true); } holder.MergedDeletesAndUpdates_Renamed.Delete(holder.DocMap.Map(docUpto)); if (mergingFields != null) // advance all iters beyond the deleted document { SkipDeletedDoc(updatesIters, j); } } else if (mergingFields != null) { MaybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j); } docUpto++; } } } else if (mergingFields != null) { // need to check each non-deleted document if it has any updates for (int j = 0; j < docCount; j++) { if (prevLiveDocs.Get(j)) { // document isn't deleted, check if any of the fields have an update to it MaybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j); // advance docUpto for every non-deleted document docUpto++; } else { // advance all iters beyond the deleted document SkipDeletedDoc(updatesIters, j); } } } else { docUpto += info.Info.DocCount - info.DelCount - rld.PendingDeleteCount; } } else if (currentLiveDocs != null) { Debug.Assert(currentLiveDocs.Length() == docCount); // this segment had no deletes before but now it // does: for (int j = 0; j < docCount; j++) { if (!currentLiveDocs.Get(j)) { if (holder.MergedDeletesAndUpdates_Renamed == null || !holder.InitializedWritableLiveDocs) { holder.Init(readerPool, merge, mergeState, true); } holder.MergedDeletesAndUpdates_Renamed.Delete(holder.DocMap.Map(docUpto)); if (mergingFields != null) // advance all iters beyond the deleted document { SkipDeletedDoc(updatesIters, j); } } else if (mergingFields != null) { MaybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j); } docUpto++; } } else if (mergingFields != null) { // no deletions before or after, but there were updates for (int j = 0; j < docCount; j++) { MaybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j); // advance docUpto for every non-deleted document docUpto++; } } else { // No deletes or updates before or after docUpto += info.Info.DocCount; } } Debug.Assert(docUpto == merge.Info_Renamed.Info.DocCount); if (mergedDVUpdates.Any()) { // System.out.println("[" + Thread.currentThread().getName() + "] IW.commitMergedDeletes: mergedDeletes.info=" + mergedDeletes.info + ", mergedFieldUpdates=" + mergedFieldUpdates); bool success = false; try { // if any error occurs while writing the field updates we should release // the info, otherwise it stays in the pool but is considered not "live" // which later causes false exceptions in pool.dropAll(). // NOTE: currently this is the only place which throws a true // IOException. If this ever changes, we need to extend that try/finally // block to the rest of the method too. holder.MergedDeletesAndUpdates_Renamed.WriteFieldUpdates(directory, mergedDVUpdates); success = true; } finally { if (!success) { holder.MergedDeletesAndUpdates_Renamed.DropChanges(); readerPool.Drop(merge.Info_Renamed); } } } if (infoStream.IsEnabled("IW")) { if (holder.MergedDeletesAndUpdates_Renamed == null) { infoStream.Message("IW", "no new deletes or field updates since merge started"); } else { string msg = holder.MergedDeletesAndUpdates_Renamed.PendingDeleteCount + " new deletes"; if (mergedDVUpdates.Any()) { msg += " and " + mergedDVUpdates.Size() + " new field updates"; } msg += " since merge started"; infoStream.Message("IW", msg); } } merge.Info_Renamed.BufferedDeletesGen = minGen; return holder.MergedDeletesAndUpdates_Renamed; } }
internal void Init(ReaderPool readerPool, MergePolicy.OneMerge merge, MergeState mergeState, bool initWritableLiveDocs) { if (MergedDeletesAndUpdates_Renamed == null) { MergedDeletesAndUpdates_Renamed = readerPool.Get(merge.Info_Renamed, true); DocMap = merge.GetDocMap(mergeState); Debug.Assert(DocMap.IsConsistent(merge.Info_Renamed.Info.DocCount)); } if (initWritableLiveDocs && !InitializedWritableLiveDocs) { MergedDeletesAndUpdates_Renamed.InitWritableLiveDocs(); this.InitializedWritableLiveDocs = true; } }
/// <summary> /// Expert: If <seealso cref="#getMergeReaders()"/> reorders document IDs, this method /// must be overridden to return a mapping from the <i>natural</i> doc ID /// (the doc ID that would result from a natural merge) to the actual doc /// ID. this mapping is used to apply deletions that happened during the /// merge to the new segment. /// </summary> public virtual DocMap GetDocMap(MergeState mergeState) { return new DocMapAnonymousInnerClassHelper(this); }
private static bool Equals(MergeState.DocMap map1, MergeState.DocMap map2) { if (map1.MaxDoc != map2.MaxDoc) { return false; } for (int i = 0; i < map1.MaxDoc; ++i) { if (map1.Get(i) != map2.Get(i)) { return false; } } return true; }
/// <summary> /// Expert: If <seealso cref="#getMergeReaders()"/> reorders document IDs, this method /// must be overridden to return a mapping from the <i>natural</i> doc ID /// (the doc ID that would result from a natural merge) to the actual doc /// ID. this mapping is used to apply deletions that happened during the /// merge to the new segment. /// </summary> public virtual DocMap GetDocMap(MergeState mergeState) { return(new DocMapAnonymousInnerClassHelper(this)); }
public virtual void TestMerge() { Codec codec = Codec.Default; SegmentInfo si = new SegmentInfo(MergedDir, Constants.LUCENE_MAIN_VERSION, MergedSegment, -1, false, codec, null); SegmentMerger merger = new SegmentMerger(Arrays.AsList <AtomicReader>(Reader1, Reader2), si, InfoStream.Default, MergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, CheckAbort.NONE, new FieldInfos.FieldNumbers(), NewIOContext(Random()), true); MergeState mergeState = merger.Merge(); int docsMerged = mergeState.SegmentInfo.DocCount; Assert.IsTrue(docsMerged == 2); //Should be able to open a new SegmentReader against the new directory SegmentReader mergedReader = new SegmentReader(new SegmentCommitInfo(new SegmentInfo(MergedDir, Constants.LUCENE_MAIN_VERSION, MergedSegment, docsMerged, false, codec, null), 0, -1L, -1L), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random())); Assert.IsTrue(mergedReader != null); Assert.IsTrue(mergedReader.NumDocs == 2); Document newDoc1 = mergedReader.Document(0); Assert.IsTrue(newDoc1 != null); //There are 2 unstored fields on the document Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(Doc1) - DocHelper.Unstored.Count); Document newDoc2 = mergedReader.Document(1); Assert.IsTrue(newDoc2 != null); Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(Doc2) - DocHelper.Unstored.Count); DocsEnum termDocs = TestUtil.Docs(Random(), mergedReader, DocHelper.TEXT_FIELD_2_KEY, new BytesRef("field"), MultiFields.GetLiveDocs(mergedReader), null, 0); Assert.IsTrue(termDocs != null); Assert.IsTrue(termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int tvCount = 0; foreach (FieldInfo fieldInfo in mergedReader.FieldInfos) { if (fieldInfo.HasVectors) { tvCount++; } } //System.out.println("stored size: " + stored.Size()); Assert.AreEqual(3, tvCount, "We do not have 3 fields that were indexed with term vector"); Terms vector = mergedReader.GetTermVectors(0).GetTerms(DocHelper.TEXT_FIELD_2_KEY); Assert.IsNotNull(vector); Assert.AreEqual(3, vector.Count); TermsEnum termsEnum = vector.GetIterator(null); int i = 0; while (termsEnum.Next() != null) { string term = termsEnum.Term.Utf8ToString(); int freq = (int)termsEnum.TotalTermFreq; //System.out.println("Term: " + term + " Freq: " + freq); Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term, StringComparison.Ordinal) != -1); Assert.IsTrue(DocHelper.FIELD_2_FREQS[i] == freq); i++; } TestSegmentReader.CheckNorms(mergedReader); mergedReader.Dispose(); }