public virtual void WriteFieldUpdates(Directory dir, DocValuesFieldUpdates.Container dvUpdates) { UninterruptableMonitor.Enter(this); try { if (Debugging.AssertsEnabled) { Debugging.Assert(UninterruptableMonitor.IsEntered(writer)); } //System.out.println("rld.writeFieldUpdates: seg=" + info + " numericFieldUpdates=" + numericFieldUpdates); if (Debugging.AssertsEnabled) { Debugging.Assert(dvUpdates.Any()); } // Do this so we can delete any created files on // exception; this saves all codecs from having to do // it: TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir); FieldInfos fieldInfos = null; bool success = false; try { Codec codec = Info.Info.Codec; // reader could be null e.g. for a just merged segment (from // IndexWriter.commitMergedDeletes). SegmentReader reader = this.reader ?? new SegmentReader(Info, writer.Config.ReaderTermsIndexDivisor, IOContext.READ_ONCE); try { // clone FieldInfos so that we can update their dvGen separately from // the reader's infos and write them to a new fieldInfos_gen file FieldInfos.Builder builder = new FieldInfos.Builder(writer.globalFieldNumberMap); // cannot use builder.add(reader.getFieldInfos()) because it does not // clone FI.attributes as well FI.dvGen foreach (FieldInfo fi in reader.FieldInfos) { FieldInfo clone = builder.Add(fi); // copy the stuff FieldInfos.Builder doesn't copy if (fi.Attributes != null) { foreach (KeyValuePair <string, string> e in fi.Attributes) { clone.PutAttribute(e.Key, e.Value); } } clone.DocValuesGen = fi.DocValuesGen; } // create new fields or update existing ones to have NumericDV type foreach (string f in dvUpdates.numericDVUpdates.Keys) { builder.AddOrUpdate(f, NumericDocValuesField.TYPE); } // create new fields or update existing ones to have BinaryDV type foreach (string f in dvUpdates.binaryDVUpdates.Keys) { builder.AddOrUpdate(f, BinaryDocValuesField.TYPE); } fieldInfos = builder.Finish(); long nextFieldInfosGen = Info.NextFieldInfosGen; // LUCENENET specific: We created the segments names wrong in 4.8.0-beta00001 - 4.8.0-beta00015, // so we added a switch to be able to read these indexes in later versions. This logic as well as an // optimization on the first 100 segment values is implmeneted in SegmentInfos.SegmentNumberToString(). string segmentSuffix = SegmentInfos.SegmentNumberToString(nextFieldInfosGen); SegmentWriteState state = new SegmentWriteState(null, trackingDir, Info.Info, fieldInfos, writer.Config.TermIndexInterval, null, IOContext.DEFAULT, segmentSuffix); DocValuesFormat docValuesFormat = codec.DocValuesFormat; DocValuesConsumer fieldsConsumer = docValuesFormat.FieldsConsumer(state); bool fieldsConsumerSuccess = false; try { // System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeFieldUpdates: applying numeric updates; seg=" + info + " updates=" + numericFieldUpdates); foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.numericDVUpdates) { string field = e.Key; NumericDocValuesFieldUpdates fieldUpdates = e.Value; FieldInfo fieldInfo = fieldInfos.FieldInfo(field); if (Debugging.AssertsEnabled) { Debugging.Assert(fieldInfo != null); } fieldInfo.DocValuesGen = nextFieldInfosGen; // write the numeric updates to a new gen'd docvalues file fieldsConsumer.AddNumericField(fieldInfo, GetInt64Enumerable(reader, field, fieldUpdates)); } // System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " updates=" + dvUpdates.binaryDVUpdates); foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.binaryDVUpdates) { string field = e.Key; BinaryDocValuesFieldUpdates dvFieldUpdates = e.Value; FieldInfo fieldInfo = fieldInfos.FieldInfo(field); if (Debugging.AssertsEnabled) { Debugging.Assert(fieldInfo != null); } // System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " f=" + dvFieldUpdates + ", updates=" + dvFieldUpdates); fieldInfo.DocValuesGen = nextFieldInfosGen; // write the numeric updates to a new gen'd docvalues file fieldsConsumer.AddBinaryField(fieldInfo, GetBytesRefEnumerable(reader, field, dvFieldUpdates)); } codec.FieldInfosFormat.FieldInfosWriter.Write(trackingDir, Info.Info.Name, segmentSuffix, fieldInfos, IOContext.DEFAULT); fieldsConsumerSuccess = true; } finally { if (fieldsConsumerSuccess) { fieldsConsumer.Dispose(); } else { IOUtils.DisposeWhileHandlingException(fieldsConsumer); } } } finally { if (reader != this.reader) { // System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: closeReader " + reader); reader.Dispose(); } } success = true; } finally { if (!success) { // Advance only the nextWriteDocValuesGen so that a 2nd // attempt to write will write to a new file Info.AdvanceNextWriteFieldInfosGen(); // Delete any partially created file(s): foreach (string fileName in trackingDir.CreatedFiles) { try { dir.DeleteFile(fileName); } catch (Exception t) when(t.IsThrowable()) { // Ignore so we throw only the first exc } } } } Info.AdvanceFieldInfosGen(); // copy all the updates to mergingUpdates, so they can later be applied to the merged segment if (isMerging) { foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.numericDVUpdates) { if (!mergingDVUpdates.TryGetValue(e.Key, out DocValuesFieldUpdates updates)) { mergingDVUpdates[e.Key] = e.Value; } else { updates.Merge(e.Value); } } foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.binaryDVUpdates) { if (!mergingDVUpdates.TryGetValue(e.Key, out DocValuesFieldUpdates updates)) { mergingDVUpdates[e.Key] = e.Value; } else { updates.Merge(e.Value); } } } // create a new map, keeping only the gens that are in use IDictionary <long, ISet <string> > genUpdatesFiles = Info.UpdatesFiles; IDictionary <long, ISet <string> > newGenUpdatesFiles = new Dictionary <long, ISet <string> >(); long fieldInfosGen = Info.FieldInfosGen; foreach (FieldInfo fi in fieldInfos) { long dvGen = fi.DocValuesGen; if (dvGen != -1 && !newGenUpdatesFiles.ContainsKey(dvGen)) { if (dvGen == fieldInfosGen) { newGenUpdatesFiles[fieldInfosGen] = trackingDir.CreatedFiles; } else { newGenUpdatesFiles[dvGen] = genUpdatesFiles[dvGen]; } } } Info.SetGenUpdatesFiles(newGenUpdatesFiles); // wrote new files, should checkpoint() writer.Checkpoint(); // if there is a reader open, reopen it to reflect the updates if (reader != null) { SegmentReader newReader = new SegmentReader(Info, reader, liveDocs, Info.Info.DocCount - Info.DelCount - pendingDeleteCount); bool reopened = false; try { reader.DecRef(); reader = newReader; reopened = true; } finally { if (!reopened) { newReader.DecRef(); } } } } finally { UninterruptableMonitor.Exit(this); } }
// Writes field updates (new _X_N updates files) to the directory public virtual void WriteFieldUpdates(Directory dir, DocValuesFieldUpdates.Container dvUpdates) { lock (this) { //Debug.Assert(Thread.holdsLock(Writer)); //System.out.println("rld.writeFieldUpdates: seg=" + info + " numericFieldUpdates=" + numericFieldUpdates); Debug.Assert(dvUpdates.Any()); // Do this so we can delete any created files on // exception; this saves all codecs from having to do // it: TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir); FieldInfos fieldInfos = null; bool success = false; try { Codec codec = Info.Info.Codec; // reader could be null e.g. for a just merged segment (from // IndexWriter.commitMergedDeletes). SegmentReader reader = this.Reader == null ? new SegmentReader(Info, Writer.Config.ReaderTermsIndexDivisor, IOContext.READONCE) : this.Reader; try { // clone FieldInfos so that we can update their dvGen separately from // the reader's infos and write them to a new fieldInfos_gen file FieldInfos.Builder builder = new FieldInfos.Builder(Writer.GlobalFieldNumberMap); // cannot use builder.add(reader.getFieldInfos()) because it does not // clone FI.attributes as well FI.dvGen foreach (FieldInfo fi in reader.FieldInfos) { FieldInfo clone = builder.Add(fi); // copy the stuff FieldInfos.Builder doesn't copy if (fi.Attributes() != null) { foreach (KeyValuePair <string, string> e in fi.Attributes()) { clone.PutAttribute(e.Key, e.Value); } } clone.DocValuesGen = fi.DocValuesGen; } // create new fields or update existing ones to have NumericDV type foreach (string f in dvUpdates.NumericDVUpdates.Keys) { builder.AddOrUpdate(f, NumericDocValuesField.TYPE); } // create new fields or update existing ones to have BinaryDV type foreach (string f in dvUpdates.BinaryDVUpdates.Keys) { builder.AddOrUpdate(f, BinaryDocValuesField.fType); } fieldInfos = builder.Finish(); long nextFieldInfosGen = Info.NextFieldInfosGen; string segmentSuffix = nextFieldInfosGen.ToString(CultureInfo.InvariantCulture);//Convert.ToString(nextFieldInfosGen, Character.MAX_RADIX)); SegmentWriteState state = new SegmentWriteState(null, trackingDir, Info.Info, fieldInfos, Writer.Config.TermIndexInterval, null, IOContext.DEFAULT, segmentSuffix); DocValuesFormat docValuesFormat = codec.DocValuesFormat(); DocValuesConsumer fieldsConsumer = docValuesFormat.FieldsConsumer(state); bool fieldsConsumerSuccess = false; try { // System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeFieldUpdates: applying numeric updates; seg=" + info + " updates=" + numericFieldUpdates); foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.NumericDVUpdates) { string field = e.Key; NumericDocValuesFieldUpdates fieldUpdates = e.Value; FieldInfo fieldInfo = fieldInfos.FieldInfo(field); Debug.Assert(fieldInfo != null); fieldInfo.DocValuesGen = nextFieldInfosGen; // write the numeric updates to a new gen'd docvalues file fieldsConsumer.AddNumericField(fieldInfo, GetLongEnumerable(reader, field, fieldUpdates)); } // System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " updates=" + dvUpdates.binaryDVUpdates); foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.BinaryDVUpdates) { string field = e.Key; BinaryDocValuesFieldUpdates dvFieldUpdates = e.Value; FieldInfo fieldInfo = fieldInfos.FieldInfo(field); Debug.Assert(fieldInfo != null); // System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " f=" + dvFieldUpdates + ", updates=" + dvFieldUpdates); fieldInfo.DocValuesGen = nextFieldInfosGen; // write the numeric updates to a new gen'd docvalues file fieldsConsumer.AddBinaryField(fieldInfo, GetBytesRefEnumerable(reader, field, dvFieldUpdates)); } codec.FieldInfosFormat().FieldInfosWriter.Write(trackingDir, Info.Info.Name, segmentSuffix, fieldInfos, IOContext.DEFAULT); fieldsConsumerSuccess = true; } finally { if (fieldsConsumerSuccess) { fieldsConsumer.Dispose(); } else { IOUtils.CloseWhileHandlingException(fieldsConsumer); } } } finally { if (reader != this.Reader) { // System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: closeReader " + reader); reader.Dispose(); } } success = true; } finally { if (!success) { // Advance only the nextWriteDocValuesGen so that a 2nd // attempt to write will write to a new file Info.AdvanceNextWriteFieldInfosGen(); // Delete any partially created file(s): foreach (string fileName in trackingDir.CreatedFiles) { try { dir.DeleteFile(fileName); } catch (Exception) { // Ignore so we throw only the first exc } } } } Info.AdvanceFieldInfosGen(); // copy all the updates to mergingUpdates, so they can later be applied to the merged segment if (IsMerging) { foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.NumericDVUpdates) { DocValuesFieldUpdates updates; if (!MergingDVUpdates.TryGetValue(e.Key, out updates)) { MergingDVUpdates[e.Key] = e.Value; } else { updates.Merge(e.Value); } } foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.BinaryDVUpdates) { DocValuesFieldUpdates updates; if (!MergingDVUpdates.TryGetValue(e.Key, out updates)) { MergingDVUpdates[e.Key] = e.Value; } else { updates.Merge(e.Value); } } } // create a new map, keeping only the gens that are in use IDictionary <long, ISet <string> > genUpdatesFiles = Info.UpdatesFiles; IDictionary <long, ISet <string> > newGenUpdatesFiles = new Dictionary <long, ISet <string> >(); long fieldInfosGen = Info.FieldInfosGen; foreach (FieldInfo fi in fieldInfos) { long dvGen = fi.DocValuesGen; if (dvGen != -1 && !newGenUpdatesFiles.ContainsKey(dvGen)) { if (dvGen == fieldInfosGen) { newGenUpdatesFiles[fieldInfosGen] = trackingDir.CreatedFiles; } else { newGenUpdatesFiles[dvGen] = genUpdatesFiles[dvGen]; } } } Info.GenUpdatesFiles = newGenUpdatesFiles; // wrote new files, should checkpoint() Writer.Checkpoint(); // if there is a reader open, reopen it to reflect the updates if (Reader != null) { SegmentReader newReader = new SegmentReader(Info, Reader, LiveDocs_Renamed, Info.Info.DocCount - Info.DelCount - PendingDeleteCount_Renamed); bool reopened = false; try { Reader.DecRef(); Reader = newReader; reopened = true; } finally { if (!reopened) { newReader.DecRef(); } } } } }
/// <summary> /// Resolves the buffered deleted Term/Query/docIDs, into /// actual deleted docIDs in the liveDocs MutableBits for /// each SegmentReader. /// </summary> public virtual ApplyDeletesResult ApplyDeletesAndUpdates(IndexWriter.ReaderPool readerPool, IList <SegmentCommitInfo> infos) { lock (this) { long t0 = DateTime.Now.Millisecond; if (infos.Count == 0) { return(new ApplyDeletesResult(false, NextGen_Renamed++, null)); } Debug.Assert(CheckDeleteStats()); if (!Any()) { if (InfoStream.IsEnabled("BD")) { InfoStream.Message("BD", "applyDeletes: no deletes; skipping"); } return(new ApplyDeletesResult(false, NextGen_Renamed++, null)); } if (InfoStream.IsEnabled("BD")) { InfoStream.Message("BD", "applyDeletes: infos=" + infos + " packetCount=" + Updates.Count); } long gen = NextGen_Renamed++; List <SegmentCommitInfo> infos2 = new List <SegmentCommitInfo>(); infos2.AddRange(infos); infos2.Sort(sortSegInfoByDelGen); CoalescedUpdates coalescedUpdates = null; bool anyNewDeletes = false; int infosIDX = infos2.Count - 1; int delIDX = Updates.Count - 1; IList <SegmentCommitInfo> allDeleted = null; while (infosIDX >= 0) { //System.out.println("BD: cycle delIDX=" + delIDX + " infoIDX=" + infosIDX); FrozenBufferedUpdates packet = delIDX >= 0 ? Updates[delIDX] : null; SegmentCommitInfo info = infos2[infosIDX]; long segGen = info.BufferedDeletesGen; if (packet != null && segGen < packet.DelGen) { // System.out.println(" coalesce"); if (coalescedUpdates == null) { coalescedUpdates = new CoalescedUpdates(); } if (!packet.IsSegmentPrivate) { /* * Only coalesce if we are NOT on a segment private del packet: the segment private del packet * must only applied to segments with the same delGen. Yet, if a segment is already deleted * from the SI since it had no more documents remaining after some del packets younger than * its segPrivate packet (higher delGen) have been applied, the segPrivate packet has not been * removed. */ coalescedUpdates.Update(packet); } delIDX--; } else if (packet != null && segGen == packet.DelGen) { Debug.Assert(packet.IsSegmentPrivate, "Packet and Segments deletegen can only match on a segment private del packet gen=" + segGen); //System.out.println(" eq"); // Lock order: IW -> BD -> RP Debug.Assert(readerPool.InfoIsLive(info)); ReadersAndUpdates rld = readerPool.Get(info, true); SegmentReader reader = rld.GetReader(IOContext.READ); int delCount = 0; bool segAllDeletes; try { DocValuesFieldUpdates.Container dvUpdates = new DocValuesFieldUpdates.Container(); if (coalescedUpdates != null) { //System.out.println(" del coalesced"); delCount += (int)ApplyTermDeletes(coalescedUpdates.TermsIterable(), rld, reader); delCount += (int)ApplyQueryDeletes(coalescedUpdates.QueriesIterable(), rld, reader); ApplyDocValuesUpdates(coalescedUpdates.NumericDVUpdates, rld, reader, dvUpdates); ApplyDocValuesUpdates(coalescedUpdates.BinaryDVUpdates, rld, reader, dvUpdates); } //System.out.println(" del exact"); // Don't delete by Term here; DocumentsWriterPerThread // already did that on flush: delCount += (int)ApplyQueryDeletes(packet.QueriesIterable(), rld, reader); ApplyDocValuesUpdates(Arrays.AsList(packet.NumericDVUpdates), rld, reader, dvUpdates); ApplyDocValuesUpdates(Arrays.AsList(packet.BinaryDVUpdates), rld, reader, dvUpdates); if (dvUpdates.Any()) { rld.WriteFieldUpdates(info.Info.Dir, dvUpdates); } int fullDelCount = rld.Info.DelCount + rld.PendingDeleteCount; Debug.Assert(fullDelCount <= rld.Info.Info.DocCount); segAllDeletes = fullDelCount == rld.Info.Info.DocCount; } finally { rld.Release(reader); readerPool.Release(rld); } anyNewDeletes |= delCount > 0; if (segAllDeletes) { if (allDeleted == null) { allDeleted = new List <SegmentCommitInfo>(); } allDeleted.Add(info); } if (InfoStream.IsEnabled("BD")) { InfoStream.Message("BD", "seg=" + info + " segGen=" + segGen + " segDeletes=[" + packet + "]; coalesced deletes=[" + (coalescedUpdates == null ? "null" : coalescedUpdates.ToString()) + "] newDelCount=" + delCount + (segAllDeletes ? " 100% deleted" : "")); } if (coalescedUpdates == null) { coalescedUpdates = new CoalescedUpdates(); } /* * Since we are on a segment private del packet we must not * update the coalescedDeletes here! We can simply advance to the * next packet and seginfo. */ delIDX--; infosIDX--; info.BufferedDeletesGen = gen; } else { //System.out.println(" gt"); if (coalescedUpdates != null) { // Lock order: IW -> BD -> RP Debug.Assert(readerPool.InfoIsLive(info)); ReadersAndUpdates rld = readerPool.Get(info, true); SegmentReader reader = rld.GetReader(IOContext.READ); int delCount = 0; bool segAllDeletes; try { delCount += (int)ApplyTermDeletes(coalescedUpdates.TermsIterable(), rld, reader); delCount += (int)ApplyQueryDeletes(coalescedUpdates.QueriesIterable(), rld, reader); DocValuesFieldUpdates.Container dvUpdates = new DocValuesFieldUpdates.Container(); ApplyDocValuesUpdates(coalescedUpdates.NumericDVUpdates, rld, reader, dvUpdates); ApplyDocValuesUpdates(coalescedUpdates.BinaryDVUpdates, rld, reader, dvUpdates); if (dvUpdates.Any()) { rld.WriteFieldUpdates(info.Info.Dir, dvUpdates); } int fullDelCount = rld.Info.DelCount + rld.PendingDeleteCount; Debug.Assert(fullDelCount <= rld.Info.Info.DocCount); segAllDeletes = fullDelCount == rld.Info.Info.DocCount; } finally { rld.Release(reader); readerPool.Release(rld); } anyNewDeletes |= delCount > 0; if (segAllDeletes) { if (allDeleted == null) { allDeleted = new List <SegmentCommitInfo>(); } allDeleted.Add(info); } if (InfoStream.IsEnabled("BD")) { InfoStream.Message("BD", "seg=" + info + " segGen=" + segGen + " coalesced deletes=[" + coalescedUpdates + "] newDelCount=" + delCount + (segAllDeletes ? " 100% deleted" : "")); } } info.BufferedDeletesGen = gen; infosIDX--; } } Debug.Assert(CheckDeleteStats()); if (InfoStream.IsEnabled("BD")) { InfoStream.Message("BD", "applyDeletes took " + (DateTime.Now.Millisecond - t0) + " msec"); } // assert infos != segmentInfos || !any() : "infos=" + infos + " segmentInfos=" + segmentInfos + " any=" + any; return(new ApplyDeletesResult(anyNewDeletes, gen, allDeleted)); } }
// DocValues updates private void ApplyDocValuesUpdates <T1>(IEnumerable <T1> updates, ReadersAndUpdates rld, SegmentReader reader, DocValuesFieldUpdates.Container dvUpdatesContainer) where T1 : DocValuesUpdate { lock (this) { Fields fields = reader.Fields; if (fields == null) { // this reader has no postings return; } // TODO: we can process the updates per DV field, from last to first so that // if multiple terms affect same document for the same field, we add an update // only once (that of the last term). To do that, we can keep a bitset which // marks which documents have already been updated. So e.g. if term T1 // updates doc 7, and then we process term T2 and it updates doc 7 as well, // we don't apply the update since we know T1 came last and therefore wins // the update. // We can also use that bitset as 'liveDocs' to pass to TermEnum.docs(), so // that these documents aren't even returned. string currentField = null; TermsEnum termsEnum = null; DocsEnum docs = null; //System.out.println(Thread.currentThread().getName() + " numericDVUpdate reader=" + reader); foreach (DocValuesUpdate update in updates) { Term term = update.Term; int limit = update.DocIDUpto; // TODO: we traverse the terms in update order (not term order) so that we // apply the updates in the correct order, i.e. if two terms udpate the // same document, the last one that came in wins, irrespective of the // terms lexical order. // we can apply the updates in terms order if we keep an updatesGen (and // increment it with every update) and attach it to each NumericUpdate. Note // that we cannot rely only on docIDUpto because an app may send two updates // which will get same docIDUpto, yet will still need to respect the order // those updates arrived. if (!term.Field().Equals(currentField)) { // if we change the code to process updates in terms order, enable this assert // assert currentField == null || currentField.compareTo(term.field()) < 0; currentField = term.Field(); Terms terms = fields.Terms(currentField); if (terms != null) { termsEnum = terms.Iterator(termsEnum); } else { termsEnum = null; continue; // no terms in that field } } if (termsEnum == null) { continue; } // System.out.println(" term=" + term); if (termsEnum.SeekExact(term.Bytes())) { // we don't need term frequencies for this DocsEnum docsEnum = termsEnum.Docs(rld.LiveDocs, docs, DocsEnum.FLAG_NONE); //System.out.println("BDS: got docsEnum=" + docsEnum); DocValuesFieldUpdates dvUpdates = dvUpdatesContainer.GetUpdates(update.Field, update.Type); if (dvUpdates == null) { dvUpdates = dvUpdatesContainer.NewUpdates(update.Field, update.Type, reader.MaxDoc); } int doc; while ((doc = docsEnum.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { //System.out.println(Thread.currentThread().getName() + " numericDVUpdate term=" + term + " doc=" + docID); if (doc >= limit) { break; // no more docs that can be updated for this term } dvUpdates.Add(doc, update.Value); } } } } }
/// <summary> /// Carefully merges deletes and updates for the segments we just merged. this /// is tricky because, although merging will clear all deletes (compacts the /// documents) and compact all the updates, new deletes and updates may have /// been flushed to the segments since the merge was started. this method /// "carries over" such new deletes and updates onto the newly merged segment, /// and saves the resulting deletes and updates files (incrementing the delete /// and DV generations for merge.info). If no deletes were flushed, no new /// deletes file is saved. /// </summary> private ReadersAndUpdates CommitMergedDeletesAndUpdates(MergePolicy.OneMerge merge, MergeState mergeState) { lock (this) { Debug.Assert(TestPoint("startCommitMergeDeletes")); IList<SegmentCommitInfo> sourceSegments = merge.Segments; if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "commitMergeDeletes " + SegString(merge.Segments)); } // Carefully merge deletes that occurred after we // started merging: int docUpto = 0; long minGen = long.MaxValue; // Lazy init (only when we find a delete to carry over): MergedDeletesAndUpdates holder = new MergedDeletesAndUpdates(); DocValuesFieldUpdates.Container mergedDVUpdates = new DocValuesFieldUpdates.Container(); for (int i = 0; i < sourceSegments.Count; i++) { SegmentCommitInfo info = sourceSegments[i]; minGen = Math.Min(info.BufferedDeletesGen, minGen); int docCount = info.Info.DocCount; Bits prevLiveDocs = merge.Readers[i].LiveDocs; ReadersAndUpdates rld = readerPool.Get(info, false); // We hold a ref so it should still be in the pool: Debug.Assert(rld != null, "seg=" + info.Info.Name); Bits currentLiveDocs = rld.LiveDocs; IDictionary<string, DocValuesFieldUpdates> mergingFieldUpdates = rld.MergingFieldUpdates; string[] mergingFields; DocValuesFieldUpdates[] dvFieldUpdates; DocValuesFieldUpdates.Iterator[] updatesIters; if (mergingFieldUpdates.Count == 0) { mergingFields = null; updatesIters = null; dvFieldUpdates = null; } else { mergingFields = new string[mergingFieldUpdates.Count]; dvFieldUpdates = new DocValuesFieldUpdates[mergingFieldUpdates.Count]; updatesIters = new DocValuesFieldUpdates.Iterator[mergingFieldUpdates.Count]; int idx = 0; foreach (KeyValuePair<string, DocValuesFieldUpdates> e in mergingFieldUpdates) { string field = e.Key; DocValuesFieldUpdates updates = e.Value; mergingFields[idx] = field; dvFieldUpdates[idx] = mergedDVUpdates.GetUpdates(field, updates.Type); if (dvFieldUpdates[idx] == null) { dvFieldUpdates[idx] = mergedDVUpdates.NewUpdates(field, updates.Type, mergeState.SegmentInfo.DocCount); } updatesIters[idx] = updates.GetIterator(); updatesIters[idx].NextDoc(); // advance to first update doc ++idx; } } // System.out.println("[" + Thread.currentThread().getName() + "] IW.commitMergedDeletes: info=" + info + ", mergingUpdates=" + mergingUpdates); if (prevLiveDocs != null) { // If we had deletions on starting the merge we must // still have deletions now: Debug.Assert(currentLiveDocs != null); Debug.Assert(prevLiveDocs.Length() == docCount); Debug.Assert(currentLiveDocs.Length() == docCount); // There were deletes on this segment when the merge // started. The merge has collapsed away those // deletes, but, if new deletes were flushed since // the merge started, we must now carefully keep any // newly flushed deletes but mapping them to the new // docIDs. // Since we copy-on-write, if any new deletes were // applied after merging has started, we can just // check if the before/after liveDocs have changed. // If so, we must carefully merge the liveDocs one // doc at a time: if (currentLiveDocs != prevLiveDocs) { // this means this segment received new deletes // since we started the merge, so we // must merge them: for (int j = 0; j < docCount; j++) { if (!prevLiveDocs.Get(j)) { Debug.Assert(!currentLiveDocs.Get(j)); } else { if (!currentLiveDocs.Get(j)) { if (holder.MergedDeletesAndUpdates_Renamed == null || !holder.InitializedWritableLiveDocs) { holder.Init(readerPool, merge, mergeState, true); } holder.MergedDeletesAndUpdates_Renamed.Delete(holder.DocMap.Map(docUpto)); if (mergingFields != null) // advance all iters beyond the deleted document { SkipDeletedDoc(updatesIters, j); } } else if (mergingFields != null) { MaybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j); } docUpto++; } } } else if (mergingFields != null) { // need to check each non-deleted document if it has any updates for (int j = 0; j < docCount; j++) { if (prevLiveDocs.Get(j)) { // document isn't deleted, check if any of the fields have an update to it MaybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j); // advance docUpto for every non-deleted document docUpto++; } else { // advance all iters beyond the deleted document SkipDeletedDoc(updatesIters, j); } } } else { docUpto += info.Info.DocCount - info.DelCount - rld.PendingDeleteCount; } } else if (currentLiveDocs != null) { Debug.Assert(currentLiveDocs.Length() == docCount); // this segment had no deletes before but now it // does: for (int j = 0; j < docCount; j++) { if (!currentLiveDocs.Get(j)) { if (holder.MergedDeletesAndUpdates_Renamed == null || !holder.InitializedWritableLiveDocs) { holder.Init(readerPool, merge, mergeState, true); } holder.MergedDeletesAndUpdates_Renamed.Delete(holder.DocMap.Map(docUpto)); if (mergingFields != null) // advance all iters beyond the deleted document { SkipDeletedDoc(updatesIters, j); } } else if (mergingFields != null) { MaybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j); } docUpto++; } } else if (mergingFields != null) { // no deletions before or after, but there were updates for (int j = 0; j < docCount; j++) { MaybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j); // advance docUpto for every non-deleted document docUpto++; } } else { // No deletes or updates before or after docUpto += info.Info.DocCount; } } Debug.Assert(docUpto == merge.Info_Renamed.Info.DocCount); if (mergedDVUpdates.Any()) { // System.out.println("[" + Thread.currentThread().getName() + "] IW.commitMergedDeletes: mergedDeletes.info=" + mergedDeletes.info + ", mergedFieldUpdates=" + mergedFieldUpdates); bool success = false; try { // if any error occurs while writing the field updates we should release // the info, otherwise it stays in the pool but is considered not "live" // which later causes false exceptions in pool.dropAll(). // NOTE: currently this is the only place which throws a true // IOException. If this ever changes, we need to extend that try/finally // block to the rest of the method too. holder.MergedDeletesAndUpdates_Renamed.WriteFieldUpdates(directory, mergedDVUpdates); success = true; } finally { if (!success) { holder.MergedDeletesAndUpdates_Renamed.DropChanges(); readerPool.Drop(merge.Info_Renamed); } } } if (infoStream.IsEnabled("IW")) { if (holder.MergedDeletesAndUpdates_Renamed == null) { infoStream.Message("IW", "no new deletes or field updates since merge started"); } else { string msg = holder.MergedDeletesAndUpdates_Renamed.PendingDeleteCount + " new deletes"; if (mergedDVUpdates.Any()) { msg += " and " + mergedDVUpdates.Size() + " new field updates"; } msg += " since merge started"; infoStream.Message("IW", msg); } } merge.Info_Renamed.BufferedDeletesGen = minGen; return holder.MergedDeletesAndUpdates_Renamed; } }