/// <summary> /// Seals the <seealso cref="SegmentInfo"/> for the new flushed segment and persists /// the deleted documents <seealso cref="MutableBits"/>. /// </summary> internal virtual void SealFlushedSegment(FlushedSegment flushedSegment) { Debug.Assert(flushedSegment != null); SegmentCommitInfo newSegment = flushedSegment.SegmentInfo; IndexWriter.SetDiagnostics(newSegment.Info, IndexWriter.SOURCE_FLUSH); IOContext context = new IOContext(new FlushInfo(newSegment.Info.DocCount, newSegment.SizeInBytes())); bool success = false; try { if (IndexWriterConfig.UseCompoundFile) { CollectionsHelper.AddAll(FilesToDelete, IndexWriter.CreateCompoundFile(InfoStream, Directory, MergeState.CheckAbort.NONE, newSegment.Info, context)); newSegment.Info.UseCompoundFile = true; } // Have codec write SegmentInfo. Must do this after // creating CFS so that 1) .si isn't slurped into CFS, // and 2) .si reflects useCompoundFile=true change // above: Codec.SegmentInfoFormat().SegmentInfoWriter.Write(Directory, newSegment.Info, flushedSegment.FieldInfos, context); // TODO: ideally we would freeze newSegment here!! // because any changes after writing the .si will be // lost... // Must write deleted docs after the CFS so we don't // slurp the del file into CFS: if (flushedSegment.LiveDocs != null) { int delCount = flushedSegment.DelCount; Debug.Assert(delCount > 0); if (InfoStream.IsEnabled("DWPT")) { InfoStream.Message("DWPT", "flush: write " + delCount + " deletes gen=" + flushedSegment.SegmentInfo.DelGen); } // TODO: we should prune the segment if it's 100% // deleted... but merge will also catch it. // TODO: in the NRT case it'd be better to hand // this del vector over to the // shortly-to-be-opened SegmentReader and let it // carry the changes; there's no reason to use // filesystem as intermediary here. SegmentCommitInfo info = flushedSegment.SegmentInfo; Codec codec = info.Info.Codec; codec.LiveDocsFormat().WriteLiveDocs(flushedSegment.LiveDocs, Directory, info, delCount, context); newSegment.DelCount = delCount; newSegment.AdvanceDelGen(); } success = true; } finally { if (!success) { if (InfoStream.IsEnabled("DWPT")) { InfoStream.Message("DWPT", "hit exception creating compound file for newly flushed segment " + newSegment.Info.Name); } } } }
/// <summary> /// Flush all pending docs to a new segment </summary> internal virtual FlushedSegment Flush() { Debug.Assert(numDocsInRAM > 0); Debug.Assert(DeleteSlice.Empty, "all deletes must be applied in prepareFlush"); SegmentInfo_Renamed.DocCount = numDocsInRAM; SegmentWriteState flushState = new SegmentWriteState(InfoStream, Directory, SegmentInfo_Renamed, FieldInfos.Finish(), IndexWriterConfig.TermIndexInterval, PendingUpdates, new IOContext(new FlushInfo(numDocsInRAM, BytesUsed()))); double startMBUsed = BytesUsed() / 1024.0 / 1024.0; // Apply delete-by-docID now (delete-byDocID only // happens when an exception is hit processing that // doc, eg if analyzer has some problem w/ the text): if (PendingUpdates.DocIDs.Count > 0) { flushState.LiveDocs = Codec.LiveDocsFormat().NewLiveDocs(numDocsInRAM); foreach (int delDocID in PendingUpdates.DocIDs) { flushState.LiveDocs.Clear(delDocID); } flushState.DelCountOnFlush = PendingUpdates.DocIDs.Count; PendingUpdates.BytesUsed.AddAndGet(-PendingUpdates.DocIDs.Count * BufferedUpdates.BYTES_PER_DEL_DOCID); PendingUpdates.DocIDs.Clear(); } if (Aborting) { if (InfoStream.IsEnabled("DWPT")) { InfoStream.Message("DWPT", "flush: skip because aborting is set"); } return null; } if (InfoStream.IsEnabled("DWPT")) { InfoStream.Message("DWPT", "flush postings as segment " + flushState.SegmentInfo.Name + " numDocs=" + numDocsInRAM); } bool success = false; try { Consumer.Flush(flushState); PendingUpdates.Terms.Clear(); SegmentInfo_Renamed.Files = new HashSet<string>(Directory.CreatedFiles); SegmentCommitInfo segmentInfoPerCommit = new SegmentCommitInfo(SegmentInfo_Renamed, 0, -1L, -1L); if (InfoStream.IsEnabled("DWPT")) { InfoStream.Message("DWPT", "new segment has " + (flushState.LiveDocs == null ? 0 : (flushState.SegmentInfo.DocCount - flushState.DelCountOnFlush)) + " deleted docs"); InfoStream.Message("DWPT", "new segment has " + (flushState.FieldInfos.HasVectors() ? "vectors" : "no vectors") + "; " + (flushState.FieldInfos.HasNorms() ? "norms" : "no norms") + "; " + (flushState.FieldInfos.HasDocValues() ? "docValues" : "no docValues") + "; " + (flushState.FieldInfos.HasProx() ? "prox" : "no prox") + "; " + (flushState.FieldInfos.HasFreq() ? "freqs" : "no freqs")); InfoStream.Message("DWPT", "flushedFiles=" + segmentInfoPerCommit.Files()); InfoStream.Message("DWPT", "flushed codec=" + Codec); } BufferedUpdates segmentDeletes; if (PendingUpdates.Queries.Count == 0 && PendingUpdates.NumericUpdates.Count == 0 && PendingUpdates.BinaryUpdates.Count == 0) { PendingUpdates.Clear(); segmentDeletes = null; } else { segmentDeletes = PendingUpdates; } if (InfoStream.IsEnabled("DWPT")) { double newSegmentSize = segmentInfoPerCommit.SizeInBytes() / 1024.0 / 1024.0; InfoStream.Message("DWPT", "flushed: segment=" + SegmentInfo_Renamed.Name + " ramUsed=" + startMBUsed.ToString(Nf) + " MB" + " newFlushedSize(includes docstores)=" + newSegmentSize.ToString(Nf) + " MB" + " docs/MB=" + (flushState.SegmentInfo.DocCount / newSegmentSize).ToString(Nf)); } Debug.Assert(SegmentInfo_Renamed != null); FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.FieldInfos, segmentDeletes, flushState.LiveDocs, flushState.DelCountOnFlush); SealFlushedSegment(fs); success = true; return fs; } finally { if (!success) { Abort(FilesToDelete); } } }
private bool DoFlush(DocumentsWriterPerThread flushingDWPT) { bool hasEvents = false; while (flushingDWPT != null) { hasEvents = true; bool success = false; SegmentFlushTicket ticket = null; try { if (Debugging.AssertsEnabled) { Debugging.Assert(currentFullFlushDelQueue == null || flushingDWPT.deleteQueue == currentFullFlushDelQueue, () => "expected: " + currentFullFlushDelQueue + "but was: " + flushingDWPT.deleteQueue + " " + flushControl.IsFullFlush); } /* * Since with DWPT the flush process is concurrent and several DWPT * could flush at the same time we must maintain the order of the * flushes before we can apply the flushed segment and the frozen global * deletes it is buffering. The reason for this is that the global * deletes mark a certain point in time where we took a DWPT out of * rotation and freeze the global deletes. * * Example: A flush 'A' starts and freezes the global deletes, then * flush 'B' starts and freezes all deletes occurred since 'A' has * started. if 'B' finishes before 'A' we need to wait until 'A' is done * otherwise the deletes frozen by 'B' are not applied to 'A' and we * might miss to deletes documents in 'A'. */ try { // Each flush is assigned a ticket in the order they acquire the ticketQueue lock ticket = ticketQueue.AddFlushTicket(flushingDWPT); int flushingDocsInRam = flushingDWPT.NumDocsInRAM; bool dwptSuccess = false; try { // flush concurrently without locking FlushedSegment newSegment = flushingDWPT.Flush(); ticketQueue.AddSegment(ticket, newSegment); dwptSuccess = true; } finally { SubtractFlushedNumDocs(flushingDocsInRam); if (flushingDWPT.PendingFilesToDelete.Count > 0) { PutEvent(new DeleteNewFilesEvent(flushingDWPT.PendingFilesToDelete)); hasEvents = true; } if (!dwptSuccess) { PutEvent(new FlushFailedEvent(flushingDWPT.SegmentInfo)); hasEvents = true; } } // flush was successful once we reached this point - new seg. has been assigned to the ticket! success = true; } finally { if (!success && ticket != null) { // In the case of a failure make sure we are making progress and // apply all the deletes since the segment flush failed since the flush // ticket could hold global deletes see FlushTicket#canPublish() ticketQueue.MarkTicketFailed(ticket); } } /* * Now we are done and try to flush the ticket queue if the head of the * queue has already finished the flush. */ if (ticketQueue.TicketCount >= perThreadPool.NumThreadStatesActive) { // this means there is a backlog: the one // thread in innerPurge can't keep up with all // other threads flushing segments. In this case // we forcefully stall the producers. PutEvent(ForcedPurgeEvent.INSTANCE); break; } } finally { flushControl.DoAfterFlush(flushingDWPT); flushingDWPT.CheckAndResetHasAborted(); } flushingDWPT = flushControl.NextPendingFlush(); } if (hasEvents) { PutEvent(MergePendingEvent.INSTANCE); } // If deletes alone are consuming > 1/2 our RAM // buffer, force them all to apply now. this is to // prevent too-frequent flushing of a long tail of // tiny segments: double ramBufferSizeMB = config.RAMBufferSizeMB; if (ramBufferSizeMB != Index.IndexWriterConfig.DISABLE_AUTO_FLUSH && flushControl.DeleteBytesUsed > (1024 * 1024 * ramBufferSizeMB / 2)) { if (infoStream.IsEnabled("DW")) { infoStream.Message("DW", "force apply deletes bytesUsed=" + flushControl.DeleteBytesUsed + " vs ramBuffer=" + (1024 * 1024 * ramBufferSizeMB)); } hasEvents = true; if (!this.ApplyAllDeletes(deleteQueue)) { PutEvent(ApplyDeletesEvent.INSTANCE); } } return(hasEvents); }
internal virtual void SealFlushedSegment(FlushedSegment flushedSegment) { if (Debugging.AssertsEnabled) { Debugging.Assert(flushedSegment != null); } SegmentCommitInfo newSegment = flushedSegment.segmentInfo; IndexWriter.SetDiagnostics(newSegment.Info, IndexWriter.SOURCE_FLUSH); IOContext context = new IOContext(new FlushInfo(newSegment.Info.DocCount, newSegment.GetSizeInBytes())); bool success = false; try { if (indexWriterConfig.UseCompoundFile) { filesToDelete.UnionWith(IndexWriter.CreateCompoundFile(infoStream, directory, CheckAbort.NONE, newSegment.Info, context)); newSegment.Info.UseCompoundFile = true; } // Have codec write SegmentInfo. Must do this after // creating CFS so that 1) .si isn't slurped into CFS, // and 2) .si reflects useCompoundFile=true change // above: codec.SegmentInfoFormat.SegmentInfoWriter.Write(directory, newSegment.Info, flushedSegment.fieldInfos, context); // TODO: ideally we would freeze newSegment here!! // because any changes after writing the .si will be // lost... // Must write deleted docs after the CFS so we don't // slurp the del file into CFS: if (flushedSegment.liveDocs != null) { int delCount = flushedSegment.delCount; if (Debugging.AssertsEnabled) { Debugging.Assert(delCount > 0); } if (infoStream.IsEnabled("DWPT")) { infoStream.Message("DWPT", "flush: write " + delCount + " deletes gen=" + flushedSegment.segmentInfo.DelGen); } // TODO: we should prune the segment if it's 100% // deleted... but merge will also catch it. // TODO: in the NRT case it'd be better to hand // this del vector over to the // shortly-to-be-opened SegmentReader and let it // carry the changes; there's no reason to use // filesystem as intermediary here. SegmentCommitInfo info = flushedSegment.segmentInfo; Codec codec = info.Info.Codec; codec.LiveDocsFormat.WriteLiveDocs(flushedSegment.liveDocs, directory, info, delCount, context); newSegment.DelCount = delCount; newSegment.AdvanceDelGen(); } success = true; } finally { if (!success) { if (infoStream.IsEnabled("DWPT")) { infoStream.Message("DWPT", "hit exception creating compound file for newly flushed segment " + newSegment.Info.Name); } } } }
internal virtual FlushedSegment Flush() { if (Debugging.AssertsEnabled) { Debugging.Assert(numDocsInRAM > 0); Debugging.Assert(deleteSlice.IsEmpty, "all deletes must be applied in prepareFlush"); } segmentInfo.DocCount = numDocsInRAM; SegmentWriteState flushState = new SegmentWriteState(infoStream, directory, segmentInfo, fieldInfos.Finish(), indexWriterConfig.TermIndexInterval, pendingUpdates, new IOContext(new FlushInfo(numDocsInRAM, BytesUsed))); double startMBUsed = BytesUsed / 1024.0 / 1024.0; // Apply delete-by-docID now (delete-byDocID only // happens when an exception is hit processing that // doc, eg if analyzer has some problem w/ the text): if (pendingUpdates.docIDs.Count > 0) { flushState.LiveDocs = codec.LiveDocsFormat.NewLiveDocs(numDocsInRAM); foreach (int delDocID in pendingUpdates.docIDs) { flushState.LiveDocs.Clear(delDocID); } flushState.DelCountOnFlush = pendingUpdates.docIDs.Count; pendingUpdates.bytesUsed.AddAndGet(-pendingUpdates.docIDs.Count * BufferedUpdates.BYTES_PER_DEL_DOCID); pendingUpdates.docIDs.Clear(); } if (aborting) { if (infoStream.IsEnabled("DWPT")) { infoStream.Message("DWPT", "flush: skip because aborting is set"); } return(null); } if (infoStream.IsEnabled("DWPT")) { infoStream.Message("DWPT", "flush postings as segment " + flushState.SegmentInfo.Name + " numDocs=" + numDocsInRAM); } bool success = false; try { consumer.Flush(flushState); pendingUpdates.terms.Clear(); segmentInfo.SetFiles(new JCG.HashSet <string>(directory.CreatedFiles)); SegmentCommitInfo segmentInfoPerCommit = new SegmentCommitInfo(segmentInfo, 0, -1L, -1L); if (infoStream.IsEnabled("DWPT")) { infoStream.Message("DWPT", "new segment has " + (flushState.LiveDocs == null ? 0 : (flushState.SegmentInfo.DocCount - flushState.DelCountOnFlush)) + " deleted docs"); infoStream.Message("DWPT", "new segment has " + (flushState.FieldInfos.HasVectors ? "vectors" : "no vectors") + "; " + (flushState.FieldInfos.HasNorms ? "norms" : "no norms") + "; " + (flushState.FieldInfos.HasDocValues ? "docValues" : "no docValues") + "; " + (flushState.FieldInfos.HasProx ? "prox" : "no prox") + "; " + (flushState.FieldInfos.HasFreq ? "freqs" : "no freqs")); infoStream.Message("DWPT", "flushedFiles=" + string.Format(J2N.Text.StringFormatter.InvariantCulture, "{0}", segmentInfoPerCommit.GetFiles())); infoStream.Message("DWPT", "flushed codec=" + codec); } BufferedUpdates segmentDeletes; if (pendingUpdates.queries.Count == 0 && pendingUpdates.numericUpdates.Count == 0 && pendingUpdates.binaryUpdates.Count == 0) { pendingUpdates.Clear(); segmentDeletes = null; } else { segmentDeletes = pendingUpdates; } if (infoStream.IsEnabled("DWPT")) { double newSegmentSize = segmentInfoPerCommit.GetSizeInBytes() / 1024.0 / 1024.0; infoStream.Message("DWPT", "flushed: segment=" + segmentInfo.Name + " ramUsed=" + startMBUsed.ToString(nf) + " MB" + " newFlushedSize(includes docstores)=" + newSegmentSize.ToString(nf) + " MB" + " docs/MB=" + (flushState.SegmentInfo.DocCount / newSegmentSize).ToString(nf)); } if (Debugging.AssertsEnabled) { Debugging.Assert(segmentInfo != null); } FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.FieldInfos, segmentDeletes, flushState.LiveDocs, flushState.DelCountOnFlush); SealFlushedSegment(fs); success = true; return(fs); } finally { if (!success) { Abort(filesToDelete); } } }
internal void SetSegment(FlushedSegment segment) // LUCENENET NOTE: Made internal rather than protected because class is sealed { Debug.Assert(!failed); this.segment = segment; }