private async Task PumpOperationAsync(bool isCopy) { try { this.TraceInfo("PumpOperationAsync: Pump {0} stream started", isCopy ? "copy" : "replication"); IOperationStream stream = isCopy ? this.replicator.StateReplicator2.GetCopyStream() : this.replicator.StateReplicator2.GetReplicationStream(); this.TraceInfo("PumpOperationAsync: obtained IOperationStream instance {0}", stream.GetType().Name); var operation = await stream.GetOperationAsync(CancellationToken.None); if (operation == null) { // Since we are not replicating any data, we always expect null. this.TraceInfo("PumpOperationAsync: Reached end of {0} stream", isCopy ? "copy" : "replication"); if (isCopy) { this.StartSecondaryReplicationPump(); } } else { // We don't expect any replication operations. It is an error if we get one. string message = string.Format( "PumpOperationAsync: An operation was unexpectedly received while pumping {0} stream.", isCopy ? "copy" : "replication") + this.PartitionAndReplicaId(); this.trace.WriteError(message); this.partition.ReportFault(FaultType.Transient); } } catch (Exception ex) { // This method is not awaited by the caller. // The exception on this thread is not supposed to bubble up the chain. // Hence logging and eating the exception. this.trace.Exception(ex); } }
private async Task <IOperation> TruncateTailIfNecessary(IOperationStream copyStream) { var operation = await copyStream.GetOperationAsync(CancellationToken.None).ConfigureAwait(false); if (operation == null) { return(null); } var data = operation.Data; CopyStage copyStage; using (var br = new BinaryReader(new MemoryStream(data[data.Count - 1].Array, data[data.Count - 1].Offset, data[data.Count - 1].Count))) { copyStage = (CopyStage)br.ReadInt32(); } Utility.Assert( (copyStage == CopyStage.CopyFalseProgress) || (copyStage == CopyStage.CopyLog), "(copyStage should be false progress or copy log. Copy stage:{0})", copyStage); if (copyStage == CopyStage.CopyFalseProgress) { LogicalSequenceNumber sourceStartingLsn; using (var br = new BinaryReader(new MemoryStream(data[0].Array, data[0].Offset, data[0].Count))) { sourceStartingLsn = new LogicalSequenceNumber(br.ReadInt64()); } Utility.Assert(sourceStartingLsn < this.replicatedLogManager.CurrentLogTailLsn, "sourceStartingLsn < this.currentLogTailLsn"); operation.Acknowledge(); await this.TruncateTailAsync(sourceStartingLsn).ConfigureAwait(false); operation = await copyStream.GetOperationAsync(CancellationToken.None).ConfigureAwait(false); } return(operation); }
void StartProcessingReplicationFromPrimary() { LogMessage(nameof(StartProcessingReplicationFromPrimary)); processingTaskCts = new CancellationTokenSource(); processingTask = Task.Run(async() => { IOperationStream replicationStream = null; var isSlow = !this.nodeContext.NodeName.EndsWith("4"); while (!processingTaskCts.IsCancellationRequested) { try { replicationStream = replicationStream ?? replicator.StateReplicator.GetReplicationStream(); var operation = await replicationStream.GetOperationAsync(processingTaskCts.Token); if (isSlow) { await Task.Delay(TimeSpan.FromSeconds(30)); } var sln = operation.SequenceNumber; var value = operation.Data.First().Array[0]; log.Append(sln); operation.Acknowledge(); } catch (Exception ex) { LogMessage($"Error procesing replication stream ${ex.Message}. ${ex.StackTrace}."); await Task.Delay(TimeSpan.FromSeconds(5)); } } }); }
/// <summary> /// Retrieves, applies, and acknowledges each operation from the provided <paramref name="queue"/>. /// </summary> /// <param name="queue">The queue.</param> /// <param name="apply">The method used to apply each operation.</param> /// <param name="cancellationToken">The cancellation token.</param> /// <param name="initiated"> /// Optional completion to signify that the queue draining has begun. /// </param> /// <returns>A <see cref="Task"/> representing the work performed.</returns> private static async Task PumpOperations( IOperationStream queue, OperationApplier apply, CancellationToken cancellationToken, TaskCompletionSource <int> initiated = null) { var firstOperation = true; do { cancellationToken.ThrowIfCancellationRequested(); // Start retrieving the next operation. var nextOperation = queue.GetOperationAsync(cancellationToken); // If this is the first operation and the caller has requested to be notified that draining has begun, // notify the caller. if (firstOperation) { initiated?.TrySetResult(0); firstOperation = false; } // Wait for the operation to be retrieved. var operation = await nextOperation.ConfigureAwait(false); // A null operation signifies that the queue has been completely drained. if (operation == null) { return; } // Apply and acknowledge the operation. await apply(operation, cancellationToken).ConfigureAwait(false); operation.Acknowledge(); }while (true); }
public Task DrainAsync() { //// AppTrace.TraceMsg(TraceLogEventType.Information, "OperationQueue.DrainAsync", "Starting drain on queue: {0}", this.Name); TaskCompletionSource <object> tcs = new TaskCompletionSource <object>(); IOperationStream stream = null; try { stream = this.operationStreamGetter(); } catch (Exception ex) { // AppTrace.TraceException(ex, "OperationQueue.DrainAsync", "Exception was thrown while getting stream for {0}", this.Name); tcs.SetException(ex); return(tcs.Task); } this.DrainTaskLoop(tcs, stream); return(tcs.Task); }
private void PumpReplicationOperation() { Trace.WriteInfo(TraceType, "PumpReplicationOperation started"); try { IOperationStream stream = this.replicator.StateReplicator.GetReplicationStream(); var task = stream.GetOperationAsync(CancellationToken.None); IOperation operation = task.Result; if (operation == null) { Trace.WriteInfo(TraceType, "Reached end of replication stream"); } else { // Don't expect any replication operations this.SchedulePumpReplicationOperation(); } } catch (Exception e) { Trace.WriteWarning(TraceType, "PumpReplicationOperation: {0}", e); } }
private void DrainTaskLoop(TaskCompletionSource <object> tcs, IOperationStream stream) { Task <IOperation> operationTask; try { operationTask = stream.GetOperationAsync(new CancellationToken()); } catch (Exception ex) { // AppTrace.TraceException(ex, "OperationQueue.DrainAsync", "Exception was thrown while calling operation getter for {0}", this.Name); tcs.SetException(ex); return; } operationTask.ContinueWith( t => { if (t.IsFaulted) { // AppTrace.TraceException(t.Exception.InnerException, "OperationQueue.DrainAsync", "Exception from getter for queue {0}", this.Name); tcs.SetException(t.Exception.InnerException); return; } if (t.Result == null) { // AppTrace.TraceMsg(TraceLogEventType.Information, "OperationQueue.DrainAsync", "Queue is complete: {0}", this.Name); tcs.SetResult(null); return; } //// AppTrace.TraceMsg(TraceLogEventType.Verbose, "OperationQueue.DrainAsync", "Queue {0} Received operation: {1}", this.Name, t.Result.SequenceNumber); Task callbackTask; try { callbackTask = this.info.Callback(t.Result); } catch (Exception ex) { // crash AppTrace.TraceSource.WriteExceptionAsError("OperationQueue.DrainAsync", ex, "Queue {0} - callback task function invoke threw", this.Name); throw; } // create a continuation on the callback task callbackTask.ContinueWith( (continuation) => { if (continuation.IsFaulted) { // crash // AppTrace.TraceException(continuation.Exception.InnerException, "OperationQueue.DrainAsync", "Queue {0} - callback failed", this.Name); throw continuation.Exception.InnerException; } // in single operation processing mode the outer task has not asked the fetcher for another operation // ask for it in the continuation if (!this.info.SupportsConcurrentProcessing) { this.DrainTaskLoop(tcs, stream); } }, TaskContinuationOptions.ExecuteSynchronously); // multiple operations can be processed at the same - continue asking for operations if (this.info.SupportsConcurrentProcessing) { this.DrainTaskLoop(tcs, stream); } }, TaskContinuationOptions.ExecuteSynchronously); }
private async Task <IOperation> DrainStateStreamAsync(IOperationStream copyStateStream) { FabricEvents.Events.DrainStart(this.tracer.Type, "State stream"); long stateRecordNumber = 0; var operation = await copyStateStream.GetOperationAsync(CancellationToken.None).ConfigureAwait(false); if (operation == null) { return(null); } this.roleContextDrainState.OnDrainState(); this.stateManager.BeginSettingCurrentState(); do { var data = operation.Data; CopyStage copyStage; using ( var br = new BinaryReader( new MemoryStream( data[data.Count - 1].Array, data[data.Count - 1].Offset, data[data.Count - 1].Count))) { copyStage = (CopyStage)br.ReadInt32(); } if (copyStage == CopyStage.CopyState) { var copiedBytes = new List <ArraySegment <byte> >(); for (var i = 0; i < data.Count - 1; i++) { copiedBytes.Add(data[i]); } var copiedData = new OperationData(copiedBytes); FabricEvents.Events.DrainStateNoise( this.tracer.Type, "Received state record: " + stateRecordNumber, string.Empty); await this.stateManager.SetCurrentStateAsync(stateRecordNumber, copiedData).ConfigureAwait(false); operation.Acknowledge(); FabricEvents.Events.DrainStateNoise( this.tracer.Type, "Acked state record: " + stateRecordNumber, string.Empty); stateRecordNumber++; } else { Utility.Assert( copyStage == CopyStage.CopyProgressVector, "copyStage == CopyStage.CopyProgressVector"); break; } operation = await copyStateStream.GetOperationAsync(CancellationToken.None).ConfigureAwait(false); } while (operation != null); bool copyCompleted = operation != null; //RDBug#10479578: If copy is aborted (stream returning null), EndSettingCurrentState API will not be called. if (copyCompleted) { await this.stateManager.EndSettingCurrentStateAsync().ConfigureAwait(false); } #if !DotNetCoreClr // These are new events defined in System.Fabric, existing CoreCLR apps would break // if these events are refernced as it wont be found. As CoreCLR apps carry System.Fabric // along with application // This is just a mitigation for now. Actual fix being tracked via bug# 11614507 FabricEvents.Events.DrainCompleted( this.tracer.Type, "State", copyCompleted ? "Completed" : "Incomplete", stateRecordNumber, (uint)LogRecordType.Invalid, 0, 0, 0); #endif return(operation); }
private async Task DrainReplicationStreamAsync(IOperationStream replicationStream) { FabricEvents.Events.DrainStart(this.tracer.Type, "Replication stream"); TaskCompletionSource <object> allOperationsAckedTcs = new TaskCompletionSource <object>(); var lastReplicatedRecord = LogicalLogRecord.InvalidLogicalLogRecord; long replicatedRecordNumber = 0, acksOutstanding = 1, bytesOutstanding = 0; this.roleContextDrainState.OnDrainReplication(); do { var drainTask = replicationStream.GetOperationAsync(CancellationToken.None); if (drainTask.IsCompleted == false) { this.replicatedLogManager.LogManager.FlushAsync("DrainReplicationStream.IsEmpty").IgnoreExceptionVoid(); await drainTask.ConfigureAwait(false); } var operation = drainTask.GetAwaiter().GetResult(); if (operation != null) { var data = operation.Data; #if DEBUG ReplicatedLogManager.ValidateOperationData(data, "DrainReplicationStream LSN: " + operation.SequenceNumber); #endif lastReplicatedRecord = (LogicalLogRecord)LogRecord.FromOperationData(data); lastReplicatedRecord.Lsn = new LogicalSequenceNumber(operation.SequenceNumber); await this.LogLogicalRecordOnSecondaryAsync(lastReplicatedRecord).ConfigureAwait(false); var acksRemaining = Interlocked.Increment(ref acksOutstanding); FabricEvents.Events.DrainReplicationReceive( this.tracer.Type, replicatedRecordNumber, (uint)lastReplicatedRecord.RecordType, lastReplicatedRecord.Lsn.LSN, acksRemaining); ++replicatedRecordNumber; long operationSize = Utility.GetOperationSize(data); var bytesRemaining = Interlocked.Add(ref bytesOutstanding, operationSize); if (((this.replicatorSettings.PublicSettings.MaxSecondaryReplicationQueueSize / 2 <= acksRemaining) || ((this.replicatorSettings.PublicSettings.MaxSecondaryReplicationQueueMemorySize > 0) && (this.replicatorSettings.PublicSettings.MaxSecondaryReplicationQueueMemorySize / 2 <= bytesRemaining))) || ((this.replicatorSettings.PublicSettings.MaxPrimaryReplicationQueueSize / 2 <= acksRemaining) || ((this.replicatorSettings.PublicSettings.MaxPrimaryReplicationQueueMemorySize > 0) && (this.replicatorSettings.PublicSettings.MaxPrimaryReplicationQueueMemorySize / 2 <= bytesRemaining)))) { FabricEvents.Events.DrainReplicationFlush( this.tracer.Type, replicatedRecordNumber, lastReplicatedRecord.Lsn.LSN, acksRemaining, bytesRemaining); this.replicatedLogManager.LogManager.FlushAsync("DrainReplicationStream.IsFull").IgnoreExceptionVoid(); } var capturedOperation = operation; var capturedRecord = lastReplicatedRecord; lastReplicatedRecord.AwaitFlush().IgnoreException().ContinueWith( async task => { var acksPending = Interlocked.Decrement(ref acksOutstanding); if (task.Exception != null) { // Signal the drain completion task if needed if (acksPending == 0) { allOperationsAckedTcs.TrySetResult(null); } return; } var bytesPending = Interlocked.Add(ref bytesOutstanding, -operationSize); Utility.Assert( (acksPending >= 0) && (bytesPending >= 0), "(acksPending >= 0) && (bytesPending >= 0)"); if (acksPending == 0) { allOperationsAckedTcs.TrySetResult(null); } capturedOperation.Acknowledge(); FabricEvents.Events.DrainReplicationNoise( this.tracer.Type, capturedRecord.Lsn.LSN, acksPending, bytesPending); await capturedRecord.AwaitApply().ConfigureAwait(false); }).IgnoreExceptionVoid(); } else { await this.replicatedLogManager.FlushInformationRecordAsync( InformationEvent.ReplicationFinished, closeLog : false, flushInitiator : "DrainReplicationstream.IsFinished").ConfigureAwait(false); await this.replicatedLogManager.LastInformationRecord.AwaitProcessing().ConfigureAwait(false); await this.recordsProcessor.WaitForLogicalRecordsProcessingAsync().ConfigureAwait(false); var acksPending = Interlocked.Decrement(ref acksOutstanding); Utility.Assert(acksPending >= 0, "acksPending >= 0"); if (acksPending != 0) { await allOperationsAckedTcs.Task.ConfigureAwait(false); } Utility.Assert(acksOutstanding == 0, "acksOutstanding == 0"); break; } } while (true); #if !DotNetCoreClr // These are new events defined in System.Fabric, existing CoreCLR apps would break // if these events are refernced as it wont be found. As CoreCLR apps carry System.Fabric // along with application // This is just a mitigation for now. Actual fix being tracked via bug# 11614507 FabricEvents.Events.DrainCompleted( this.tracer.Type, "Replication", "Completed", replicatedRecordNumber, (uint)lastReplicatedRecord.RecordType, lastReplicatedRecord.Lsn.LSN, lastReplicatedRecord.Psn.PSN, lastReplicatedRecord.RecordPosition); #endif }
private async Task DrainCopyStreamAsync( IOperationStream copyStream, IOperation operation, BeginCheckpointLogRecord copiedCheckpointRecord, bool renamedCopyLogSuccessfully) { FabricEvents.Events.DrainStart(this.tracer.Type, "Copy stream: RenamedCopyLogSuccessfully: " + renamedCopyLogSuccessfully); var lastCopiedRecord = LogicalLogRecord.InvalidLogicalLogRecord; long copiedRecordNumber = 0, acksOutstanding = 1; TaskCompletionSource <object> allOperationsAckedTcs = new TaskCompletionSource <object>(); try { if (operation != null) { this.roleContextDrainState.OnDrainCopy(); do { var data = operation.Data; #if DEBUG ReplicatedLogManager.ValidateOperationData(data, "DrainCopyStreamAsync LSN: " + operation.SequenceNumber); #endif lastCopiedRecord = (LogicalLogRecord)LogRecord.FromOperationData(data); await this.LogLogicalRecordOnSecondaryAsync(lastCopiedRecord).ConfigureAwait(false); // After successfully appending the record into the buffer, increment the outstanding ack count var acksRemaining = Interlocked.Increment(ref acksOutstanding); FabricEvents.Events.DrainCopyReceive( this.tracer.Type, copiedRecordNumber, lastCopiedRecord.RecordType.ToString(), lastCopiedRecord.Lsn.LSN, acksRemaining); ++copiedRecordNumber; if (this.replicatorSettings.PublicSettings.MaxCopyQueueSize / 2 <= acksRemaining) { FabricEvents.Events.DrainCopyFlush( this.tracer.Type, copiedRecordNumber, lastCopiedRecord.Lsn.LSN, acksRemaining); this.replicatedLogManager.LogManager.FlushAsync("DrainCopyStream.IsFull").IgnoreExceptionVoid(); } var capturedOperation = operation; var capturedRecord = lastCopiedRecord; if (copiedCheckpointRecord == null) { copiedCheckpointRecord = this.replicatedLogManager.LastInProgressCheckpointRecord; if (copiedCheckpointRecord != null) { Utility.Assert( copiedCheckpointRecord.Lsn == this.recoveredOrCopiedCheckpointLsn.Value, "copiedCheckpointRecordLsn {0} == recoveredOrCopiedCheckpointLsn {1}", copiedCheckpointRecord.Lsn, this.recoveredOrCopiedCheckpointLsn.Value); } } // If pumped the last operation in the copy stream (indicated by copiedUptoLsn), rename the copy log if this was a full copy // as we are guranteed that the replica has all the data needed to be promoted to an active secondary and we could not have lost any state if (copiedCheckpointRecord != null && copiedCheckpointRecord != BeginCheckpointLogRecord.InvalidBeginCheckpointLogRecord && lastCopiedRecord.Lsn == this.copiedUptoLsn && renamedCopyLogSuccessfully == false) // Copied UE record could have same LSN, so this condition is needed { await this.checkpointManager.CompleteFirstCheckpointOnIdleAndRenameLog(copiedCheckpointRecord, this.copiedUptoLsn.LSN).ConfigureAwait(false); renamedCopyLogSuccessfully = true; } lastCopiedRecord.AwaitFlush().ContinueWith( async task => { var acksPending = Interlocked.Decrement(ref acksOutstanding); if (task.Exception != null) { // Signal the drain completion task if needed if (acksPending == 0) { allOperationsAckedTcs.TrySetResult(null); } return; } capturedOperation.Acknowledge(); Utility.Assert(acksPending >= 0, "acksPending {0} >= 0", acksPending); if (acksPending == 0) { allOperationsAckedTcs.TrySetResult(null); } FabricEvents.Events.DrainCopyNoise( this.tracer.Type, capturedRecord.Lsn.LSN, acksPending); await capturedRecord.AwaitApply().ConfigureAwait(false); }).IgnoreExceptionVoid(); var drainTask = copyStream.GetOperationAsync(CancellationToken.None); if (drainTask.IsCompleted == false) { // GopalK: Currently, we cannot wait for copy to finish because copy might get // abandoned if the primary fails and the product waits for pending // copy operations to get acknowledged before electing a new primary this.replicatedLogManager.LogManager.FlushAsync("DrainCopyStream.IsEmpty").IgnoreExceptionVoid(); await drainTask.ConfigureAwait(false); } operation = drainTask.GetAwaiter().GetResult(); } while (operation != null); } } // This finally block ensures that before we continue, we cancel the first full copy checkpoint during full build // Without having this, it is possible that the above code throws out of this method and any lifecycle API like close might get stuck because // there is a pending checkpoint that is not yet fully processed finally { // If the pump prematurely finishes for any reason, it means the copy log cannot be renamed if (copiedCheckpointRecord != null && copiedCheckpointRecord != BeginCheckpointLogRecord.InvalidBeginCheckpointLogRecord && renamedCopyLogSuccessfully == false) { await this.checkpointManager.CancelFirstCheckpointOnIdleDueToIncompleteCopy(copiedCheckpointRecord, this.copiedUptoLsn.LSN); } } await this.replicatedLogManager.FlushInformationRecordAsync( InformationEvent.CopyFinished, closeLog : false, flushInitiator : "DrainCopyStream.IsFinished").ConfigureAwait(false); // Awaiting processing of this record, // ensures that all operations in the copystream must have been applied Before we complete the drainComplationTcs. await this.replicatedLogManager.LastInformationRecord.AwaitProcessing().ConfigureAwait(false); await this.recordsProcessor.WaitForLogicalRecordsProcessingAsync().ConfigureAwait(false); var acksOpen = Interlocked.Decrement(ref acksOutstanding); Utility.Assert(acksOpen >= 0, "acksOpen {0} >= 0", acksOpen); if (acksOpen != 0) { // wait for all the callbacks above to finish running and acknowleding await allOperationsAckedTcs.Task.ConfigureAwait(false); } Utility.Assert(acksOutstanding == 0, "acksOutstanding == 0"); #if !DotNetCoreClr // These are new events defined in System.Fabric, existing CoreCLR apps would break // if these events are refernced as it wont be found. As CoreCLR apps carry System.Fabric // along with application // This is just a mitigation for now. Actual fix being tracked via bug# 11614507 FabricEvents.Events.DrainCompleted( this.tracer.Type, "Copy", "Completed", copiedRecordNumber, (uint)lastCopiedRecord.RecordType, lastCopiedRecord.Lsn.LSN, lastCopiedRecord.Psn.PSN, lastCopiedRecord.RecordPosition); #endif }
/// <summary> /// Copies or Builds Idle Secondary replica from copyStream populated by the Primary. /// </summary> /// <param name="copyStream">The copy stream populated by the primary.</param> /// <returns>Task that represents the asynchronous operation.</returns> private async Task CopyOrBuildReplicaAsync(IOperationStream copyStream) { var operation = await copyStream.GetOperationAsync(CancellationToken.None).ConfigureAwait(false); if (operation == null) { return; } Utility.Assert(operation.Data.Count == 1, "operation.Data.Count == 1"); CopyHeader copyHeader = CopyHeader.ReadFromOperationData(operation.Data); operation.Acknowledge(); if (copyHeader.Stage == CopyStage.CopyNone) { // GopalK: The order of the following statements is significant Utility.Assert( this.roleContextDrainState.DrainingStream == DrainingStream.Invalid, "this.recordsProcessor.DrainingStream == DrainingStream.Invalid"); // Since there is no false progress stage, dispose the recovery stream this.recoveryManager.DisposeRecoveryReadStreamIfNeeded(); operation = await copyStream.GetOperationAsync(CancellationToken.None).ConfigureAwait(false); Utility.Assert(operation == null, "operation == null"); var trace = string.Format( CultureInfo.InvariantCulture, "Idle replica is already current with primary replica: {0}", copyHeader.PrimaryReplicaId); FabricEvents.Events.CopyOrBuildReplica(this.tracer.Type, trace); return; } BeginCheckpointLogRecord copiedCheckpointRecord; bool renamedCopyLogSuccessfully = false; if (copyHeader.Stage == CopyStage.CopyState) { var trace = string.Format(CultureInfo.InvariantCulture, "Idle replica is copying from primary replica: {0}", copyHeader.PrimaryReplicaId); // Since there is no false progress stage, dispose the recovery stream this.recoveryManager.DisposeRecoveryReadStreamIfNeeded(); FabricEvents.Events.CopyOrBuildReplica(this.tracer.Type, trace); operation = await this.DrainStateStreamAsync(copyStream).ConfigureAwait(false); if (operation == null) { FabricEvents.Events.CopyOrBuildReplica( this.tracer.Type, "Returning null as copy pump has been aborted"); return; } CopyMetadata copyMetadata = CopyMetadata.ReadFromOperationData(operation.Data); this.ReadConsistentAfterLsn = copyMetadata.HighestStateProviderCopiedLsn; trace = string.Format( CultureInfo.InvariantCulture, "Copy started. StartingLSN: {0} StartingEpoch: {1},{2} CheckpointLSN: {3} UptoLSN: {4} Highest Checkpointed Lsn {5}" + Environment.NewLine + "Copied ProgressVector: {6}" + Environment.NewLine, copyMetadata.StartingLogicalSequenceNumber, copyMetadata.StartingEpoch.DataLossNumber, copyMetadata.StartingEpoch.ConfigurationNumber, copyMetadata.CheckpointLsn, copyMetadata.UptoLsn, copyMetadata.HighestStateProviderCopiedLsn, copyMetadata.ProgressVector.ToString(Constants.ProgressVectorMaxStringSizeInKb)); FabricEvents.Events.CopyOrBuildReplica(this.tracer.Type, trace); this.transactionManager.TransactionsMap.Reuse(); this.checkpointManager.ResetStableLsn(copyMetadata.CheckpointLsn); this.copiedUptoLsn = copyMetadata.UptoLsn; var newLogHead = await this.replicatedLogManager.LogManager.CreateCopyLogAsync(copyMetadata.StartingEpoch, copyMetadata.StartingLogicalSequenceNumber).ConfigureAwait(false); this.recoveredOrCopiedCheckpointLsn.Update(copyMetadata.CheckpointLsn); this.replicatedLogManager.Reuse( copyMetadata.ProgressVector, null, null, null, InformationLogRecord.InvalidInformationLogRecord, newLogHead, copyMetadata.StartingEpoch, copyMetadata.StartingLogicalSequenceNumber); // RD: RDBug 7475439: Utility.Assert(sourceEntry == targetEntry, "currentSourceVector == currentTargetVector"); // UpdateEpoch lsn is same as starting lsn, so insert UE log record if (copyMetadata.StartingLogicalSequenceNumber == copyMetadata.ProgressVector.LastProgressVectorEntry.Lsn) { var record = new UpdateEpochLogRecord( copyMetadata.ProgressVector.LastProgressVectorEntry.Epoch, copyMetadata.ProgressVector.LastProgressVectorEntry.PrimaryReplicaId) { Lsn = copyMetadata.StartingLogicalSequenceNumber }; FabricEvents.Events.UpdateEpoch( this.tracer.Type, "UpdateEpochRecordDueToFullCopy", record.Epoch.DataLossNumber, record.Epoch.ConfigurationNumber, record.Lsn.LSN, this.roleContextDrainState.ReplicaRole.ToString()); // NOTE: Do not use the UpdateEpoch method on logmanager as it adds the entry to the list of progress vectors. // We do not want to do that here as the entry already exists this.replicatedLogManager.AppendWithoutReplication(record, null); } copiedCheckpointRecord = null; if (this.recoveredOrCopiedCheckpointLsn.Value == copyMetadata.StartingLogicalSequenceNumber) { this.checkpointManager.FirstBeginCheckpointOnIdleSecondary(); copiedCheckpointRecord = this.replicatedLogManager.LastInProgressCheckpointRecord; Utility.Assert( copiedCheckpointRecord.Lsn == this.recoveredOrCopiedCheckpointLsn.Value, "copiedCheckpointRecordLsn {0} != recoveredOrCopiedCheckpointLsn {1}", copiedCheckpointRecord.Lsn, this.recoveredOrCopiedCheckpointLsn.Value); // If this is the UptoLsn, ensure rename is done before continuing if (recoveredOrCopiedCheckpointLsn.Value == this.copiedUptoLsn) { // This ensures we dont get stuck waiting for stable LSN Utility.Assert( checkpointManager.LastStableLsn == this.recoveredOrCopiedCheckpointLsn.Value, "checkpointManager.LastStableLsn {0} == this.recoveredOrCopiedCheckpointLsn.Value {1}", checkpointManager.LastStableLsn, this.recoveredOrCopiedCheckpointLsn.Value); await this.checkpointManager.CompleteFirstCheckpointOnIdleAndRenameLog(copiedCheckpointRecord, this.copiedUptoLsn.LSN).ConfigureAwait(false); renamedCopyLogSuccessfully = true; } } operation.Acknowledge(); FabricEvents.Events.CopyOrBuildReplica(this.tracer.Type, "Acked progress ProgressVectorEntry operation"); operation = await copyStream.GetOperationAsync(CancellationToken.None).ConfigureAwait(false); } else { FabricEvents.Events.CopyOrBuildReplica( this.tracer.Type, "Idle replica is building from primary replica: " + copyHeader.PrimaryReplicaId); operation = await this.TruncateTailIfNecessary(copyStream).ConfigureAwait(false); copiedCheckpointRecord = BeginCheckpointLogRecord.InvalidBeginCheckpointLogRecord; // Since the false progress stage is complete, dispose the recovery stream this.recoveryManager.DisposeRecoveryReadStreamIfNeeded(); } await this.DrainCopyStreamAsync(copyStream, operation, copiedCheckpointRecord, renamedCopyLogSuccessfully).ConfigureAwait(false); }
/// <summary> /// Retrieves, applies, and acknowledges each operation from the provided <paramref name="queue"/>. /// </summary> /// <param name="queue">The queue.</param> /// <param name="apply">The method used to apply each operation.</param> /// <param name="cancellationToken">The cancellation token.</param> /// <param name="initiated"> /// Optional completion to signify that the queue draining has begun. /// </param> /// <returns>A <see cref="Task"/> representing the work performed.</returns> private static async Task PumpOperations( IOperationStream queue, OperationApplier apply, CancellationToken cancellationToken, TaskCompletionSource<int> initiated = null) { var firstOperation = true; do { cancellationToken.ThrowIfCancellationRequested(); // Start retrieving the next operation. var nextOperation = queue.GetOperationAsync(cancellationToken); // If this is the first operation and the caller has requested to be notified that draining has begun, // notify the caller. if (firstOperation) { initiated?.TrySetResult(0); firstOperation = false; } // Wait for the operation to be retrieved. var operation = await nextOperation.ConfigureAwait(false); // A null operation signifies that the queue has been completely drained. if (operation == null) { return; } // Apply and acknowledge the operation. await apply(operation, cancellationToken).ConfigureAwait(false); operation.Acknowledge(); } while (true); }