Exemplo n.º 1
0
        private async Task PumpOperationAsync(bool isCopy)
        {
            try
            {
                this.TraceInfo("PumpOperationAsync: Pump {0} stream started", isCopy ? "copy" : "replication");

                IOperationStream stream = isCopy ?
                                          this.replicator.StateReplicator2.GetCopyStream() :
                                          this.replicator.StateReplicator2.GetReplicationStream();

                this.TraceInfo("PumpOperationAsync: obtained IOperationStream instance {0}", stream.GetType().Name);

                var operation = await stream.GetOperationAsync(CancellationToken.None);

                if (operation == null)
                {
                    // Since we are not replicating any data, we always expect null.
                    this.TraceInfo("PumpOperationAsync: Reached end of {0} stream", isCopy ? "copy" : "replication");

                    if (isCopy)
                    {
                        this.StartSecondaryReplicationPump();
                    }
                }
                else
                {
                    // We don't expect any replication operations. It is an error if we get one.
                    string message = string.Format(
                        "PumpOperationAsync: An operation was unexpectedly received while pumping {0} stream.",
                        isCopy ? "copy" : "replication") + this.PartitionAndReplicaId();

                    this.trace.WriteError(message);
                    this.partition.ReportFault(FaultType.Transient);
                }
            }
            catch (Exception ex)
            {
                // This method is not awaited by the caller.
                // The exception on this thread is not supposed to bubble up the chain.
                // Hence logging and eating the exception.
                this.trace.Exception(ex);
            }
        }
Exemplo n.º 2
0
        private async Task <IOperation> TruncateTailIfNecessary(IOperationStream copyStream)
        {
            var operation = await copyStream.GetOperationAsync(CancellationToken.None).ConfigureAwait(false);

            if (operation == null)
            {
                return(null);
            }

            var data = operation.Data;

            CopyStage copyStage;

            using (var br = new BinaryReader(new MemoryStream(data[data.Count - 1].Array, data[data.Count - 1].Offset, data[data.Count - 1].Count)))
            {
                copyStage = (CopyStage)br.ReadInt32();
            }

            Utility.Assert(
                (copyStage == CopyStage.CopyFalseProgress) || (copyStage == CopyStage.CopyLog),
                "(copyStage should be false progress or copy log. Copy stage:{0})",
                copyStage);

            if (copyStage == CopyStage.CopyFalseProgress)
            {
                LogicalSequenceNumber sourceStartingLsn;
                using (var br = new BinaryReader(new MemoryStream(data[0].Array, data[0].Offset, data[0].Count)))
                {
                    sourceStartingLsn = new LogicalSequenceNumber(br.ReadInt64());
                }

                Utility.Assert(sourceStartingLsn < this.replicatedLogManager.CurrentLogTailLsn, "sourceStartingLsn < this.currentLogTailLsn");
                operation.Acknowledge();

                await this.TruncateTailAsync(sourceStartingLsn).ConfigureAwait(false);

                operation = await copyStream.GetOperationAsync(CancellationToken.None).ConfigureAwait(false);
            }

            return(operation);
        }
        void StartProcessingReplicationFromPrimary()
        {
            LogMessage(nameof(StartProcessingReplicationFromPrimary));

            processingTaskCts = new CancellationTokenSource();

            processingTask = Task.Run(async() =>
            {
                IOperationStream replicationStream = null;

                var isSlow = !this.nodeContext.NodeName.EndsWith("4");

                while (!processingTaskCts.IsCancellationRequested)
                {
                    try
                    {
                        replicationStream = replicationStream ?? replicator.StateReplicator.GetReplicationStream();

                        var operation = await replicationStream.GetOperationAsync(processingTaskCts.Token);

                        if (isSlow)
                        {
                            await Task.Delay(TimeSpan.FromSeconds(30));
                        }

                        var sln   = operation.SequenceNumber;
                        var value = operation.Data.First().Array[0];

                        log.Append(sln);

                        operation.Acknowledge();
                    }
                    catch (Exception ex)
                    {
                        LogMessage($"Error procesing replication stream ${ex.Message}. ${ex.StackTrace}.");

                        await Task.Delay(TimeSpan.FromSeconds(5));
                    }
                }
            });
        }
        /// <summary>
        /// Retrieves, applies, and acknowledges each operation from the provided <paramref name="queue"/>.
        /// </summary>
        /// <param name="queue">The queue.</param>
        /// <param name="apply">The method used to apply each operation.</param>
        /// <param name="cancellationToken">The cancellation token.</param>
        /// <param name="initiated">
        /// Optional completion to signify that the queue draining has begun.
        /// </param>
        /// <returns>A <see cref="Task"/> representing the work performed.</returns>
        private static async Task PumpOperations(
            IOperationStream queue,
            OperationApplier apply,
            CancellationToken cancellationToken,
            TaskCompletionSource <int> initiated = null)
        {
            var firstOperation = true;

            do
            {
                cancellationToken.ThrowIfCancellationRequested();

                // Start retrieving the next operation.
                var nextOperation = queue.GetOperationAsync(cancellationToken);

                // If this is the first operation and the caller has requested to be notified that draining has begun,
                // notify the caller.
                if (firstOperation)
                {
                    initiated?.TrySetResult(0);
                    firstOperation = false;
                }

                // Wait for the operation to be retrieved.
                var operation = await nextOperation.ConfigureAwait(false);

                // A null operation signifies that the queue has been completely drained.
                if (operation == null)
                {
                    return;
                }

                // Apply and acknowledge the operation.
                await apply(operation, cancellationToken).ConfigureAwait(false);

                operation.Acknowledge();
            }while (true);
        }
Exemplo n.º 5
0
            public Task DrainAsync()
            {
                //// AppTrace.TraceMsg(TraceLogEventType.Information, "OperationQueue.DrainAsync", "Starting drain on queue: {0}", this.Name);

                TaskCompletionSource <object> tcs = new TaskCompletionSource <object>();

                IOperationStream stream = null;

                try
                {
                    stream = this.operationStreamGetter();
                }
                catch (Exception ex)
                {
                    // AppTrace.TraceException(ex, "OperationQueue.DrainAsync", "Exception was thrown while getting stream for {0}", this.Name);
                    tcs.SetException(ex);
                    return(tcs.Task);
                }

                this.DrainTaskLoop(tcs, stream);

                return(tcs.Task);
            }
Exemplo n.º 6
0
        private void PumpReplicationOperation()
        {
            Trace.WriteInfo(TraceType, "PumpReplicationOperation started");
            try
            {
                IOperationStream stream = this.replicator.StateReplicator.GetReplicationStream();
                var task = stream.GetOperationAsync(CancellationToken.None);

                IOperation operation = task.Result;
                if (operation == null)
                {
                    Trace.WriteInfo(TraceType, "Reached end of replication stream");
                }
                else
                {
                    // Don't expect any replication operations
                    this.SchedulePumpReplicationOperation();
                }
            }
            catch (Exception e)
            {
                Trace.WriteWarning(TraceType, "PumpReplicationOperation: {0}", e);
            }
        }
Exemplo n.º 7
0
            private void DrainTaskLoop(TaskCompletionSource <object> tcs, IOperationStream stream)
            {
                Task <IOperation> operationTask;

                try
                {
                    operationTask = stream.GetOperationAsync(new CancellationToken());
                }
                catch (Exception ex)
                {
                    // AppTrace.TraceException(ex, "OperationQueue.DrainAsync", "Exception was thrown while calling operation getter for {0}", this.Name);
                    tcs.SetException(ex);
                    return;
                }

                operationTask.ContinueWith(
                    t =>
                {
                    if (t.IsFaulted)
                    {
                        // AppTrace.TraceException(t.Exception.InnerException, "OperationQueue.DrainAsync", "Exception from getter for queue {0}", this.Name);
                        tcs.SetException(t.Exception.InnerException);
                        return;
                    }

                    if (t.Result == null)
                    {
                        // AppTrace.TraceMsg(TraceLogEventType.Information, "OperationQueue.DrainAsync", "Queue is complete: {0}", this.Name);
                        tcs.SetResult(null);
                        return;
                    }

                    //// AppTrace.TraceMsg(TraceLogEventType.Verbose, "OperationQueue.DrainAsync", "Queue {0} Received operation: {1}", this.Name, t.Result.SequenceNumber);

                    Task callbackTask;
                    try
                    {
                        callbackTask = this.info.Callback(t.Result);
                    }
                    catch (Exception ex)
                    {
                        // crash
                        AppTrace.TraceSource.WriteExceptionAsError("OperationQueue.DrainAsync", ex, "Queue {0} - callback task function invoke threw", this.Name);
                        throw;
                    }

                    // create a continuation on the callback task
                    callbackTask.ContinueWith(
                        (continuation) =>
                    {
                        if (continuation.IsFaulted)
                        {
                            // crash
                            // AppTrace.TraceException(continuation.Exception.InnerException, "OperationQueue.DrainAsync", "Queue {0} - callback failed", this.Name);
                            throw continuation.Exception.InnerException;
                        }

                        // in single operation processing mode the outer task has not asked the fetcher for another operation
                        // ask for it in the continuation
                        if (!this.info.SupportsConcurrentProcessing)
                        {
                            this.DrainTaskLoop(tcs, stream);
                        }
                    },
                        TaskContinuationOptions.ExecuteSynchronously);

                    // multiple operations can be processed at the same - continue asking for operations
                    if (this.info.SupportsConcurrentProcessing)
                    {
                        this.DrainTaskLoop(tcs, stream);
                    }
                },
                    TaskContinuationOptions.ExecuteSynchronously);
            }
Exemplo n.º 8
0
        private async Task <IOperation> DrainStateStreamAsync(IOperationStream copyStateStream)
        {
            FabricEvents.Events.DrainStart(this.tracer.Type, "State stream");

            long stateRecordNumber = 0;
            var  operation         = await copyStateStream.GetOperationAsync(CancellationToken.None).ConfigureAwait(false);

            if (operation == null)
            {
                return(null);
            }

            this.roleContextDrainState.OnDrainState();

            this.stateManager.BeginSettingCurrentState();

            do
            {
                var data = operation.Data;

                CopyStage copyStage;
                using (
                    var br =
                        new BinaryReader(
                            new MemoryStream(
                                data[data.Count - 1].Array,
                                data[data.Count - 1].Offset,
                                data[data.Count - 1].Count)))
                {
                    copyStage = (CopyStage)br.ReadInt32();
                }

                if (copyStage == CopyStage.CopyState)
                {
                    var copiedBytes = new List <ArraySegment <byte> >();
                    for (var i = 0; i < data.Count - 1; i++)
                    {
                        copiedBytes.Add(data[i]);
                    }

                    var copiedData = new OperationData(copiedBytes);

                    FabricEvents.Events.DrainStateNoise(
                        this.tracer.Type,
                        "Received state record: " + stateRecordNumber,
                        string.Empty);

                    await this.stateManager.SetCurrentStateAsync(stateRecordNumber, copiedData).ConfigureAwait(false);

                    operation.Acknowledge();

                    FabricEvents.Events.DrainStateNoise(
                        this.tracer.Type,
                        "Acked state record: " + stateRecordNumber,
                        string.Empty);

                    stateRecordNumber++;
                }
                else
                {
                    Utility.Assert(
                        copyStage == CopyStage.CopyProgressVector,
                        "copyStage == CopyStage.CopyProgressVector");
                    break;
                }

                operation = await copyStateStream.GetOperationAsync(CancellationToken.None).ConfigureAwait(false);
            } while (operation != null);

            bool copyCompleted = operation != null;

            //RDBug#10479578: If copy is aborted (stream returning null), EndSettingCurrentState API will not be called.
            if (copyCompleted)
            {
                await this.stateManager.EndSettingCurrentStateAsync().ConfigureAwait(false);
            }

#if !DotNetCoreClr
            // These are new events defined in System.Fabric, existing CoreCLR apps would break
            // if these events are refernced as it wont be found. As CoreCLR apps carry System.Fabric
            // along with application
            // This is just a mitigation for now. Actual fix being tracked via bug# 11614507

            FabricEvents.Events.DrainCompleted(
                this.tracer.Type,
                "State",
                copyCompleted ? "Completed" : "Incomplete",
                stateRecordNumber,
                (uint)LogRecordType.Invalid,
                0,
                0,
                0);
#endif
            return(operation);
        }
Exemplo n.º 9
0
        private async Task DrainReplicationStreamAsync(IOperationStream replicationStream)
        {
            FabricEvents.Events.DrainStart(this.tracer.Type, "Replication stream");

            TaskCompletionSource <object> allOperationsAckedTcs = new TaskCompletionSource <object>();
            var  lastReplicatedRecord = LogicalLogRecord.InvalidLogicalLogRecord;
            long replicatedRecordNumber = 0, acksOutstanding = 1, bytesOutstanding = 0;

            this.roleContextDrainState.OnDrainReplication();

            do
            {
                var drainTask = replicationStream.GetOperationAsync(CancellationToken.None);
                if (drainTask.IsCompleted == false)
                {
                    this.replicatedLogManager.LogManager.FlushAsync("DrainReplicationStream.IsEmpty").IgnoreExceptionVoid();
                    await drainTask.ConfigureAwait(false);
                }

                var operation = drainTask.GetAwaiter().GetResult();
                if (operation != null)
                {
                    var data = operation.Data;
#if DEBUG
                    ReplicatedLogManager.ValidateOperationData(data, "DrainReplicationStream LSN: " + operation.SequenceNumber);
#endif
                    lastReplicatedRecord     = (LogicalLogRecord)LogRecord.FromOperationData(data);
                    lastReplicatedRecord.Lsn = new LogicalSequenceNumber(operation.SequenceNumber);

                    await this.LogLogicalRecordOnSecondaryAsync(lastReplicatedRecord).ConfigureAwait(false);

                    var acksRemaining = Interlocked.Increment(ref acksOutstanding);

                    FabricEvents.Events.DrainReplicationReceive(
                        this.tracer.Type,
                        replicatedRecordNumber,
                        (uint)lastReplicatedRecord.RecordType,
                        lastReplicatedRecord.Lsn.LSN,
                        acksRemaining);

                    ++replicatedRecordNumber;

                    long operationSize = Utility.GetOperationSize(data);

                    var bytesRemaining = Interlocked.Add(ref bytesOutstanding, operationSize);
                    if (((this.replicatorSettings.PublicSettings.MaxSecondaryReplicationQueueSize / 2 <= acksRemaining) ||
                         ((this.replicatorSettings.PublicSettings.MaxSecondaryReplicationQueueMemorySize > 0) &&
                          (this.replicatorSettings.PublicSettings.MaxSecondaryReplicationQueueMemorySize / 2 <= bytesRemaining))) ||
                        ((this.replicatorSettings.PublicSettings.MaxPrimaryReplicationQueueSize / 2 <= acksRemaining) ||
                         ((this.replicatorSettings.PublicSettings.MaxPrimaryReplicationQueueMemorySize > 0) &&
                          (this.replicatorSettings.PublicSettings.MaxPrimaryReplicationQueueMemorySize / 2 <= bytesRemaining))))
                    {
                        FabricEvents.Events.DrainReplicationFlush(
                            this.tracer.Type,
                            replicatedRecordNumber,
                            lastReplicatedRecord.Lsn.LSN,
                            acksRemaining,
                            bytesRemaining);

                        this.replicatedLogManager.LogManager.FlushAsync("DrainReplicationStream.IsFull").IgnoreExceptionVoid();
                    }

                    var capturedOperation = operation;
                    var capturedRecord    = lastReplicatedRecord;
                    lastReplicatedRecord.AwaitFlush().IgnoreException().ContinueWith(
                        async task =>
                    {
                        var acksPending = Interlocked.Decrement(ref acksOutstanding);

                        if (task.Exception != null)
                        {
                            // Signal the drain completion task if needed
                            if (acksPending == 0)
                            {
                                allOperationsAckedTcs.TrySetResult(null);
                            }

                            return;
                        }

                        var bytesPending = Interlocked.Add(ref bytesOutstanding, -operationSize);
                        Utility.Assert(
                            (acksPending >= 0) && (bytesPending >= 0),
                            "(acksPending >= 0) && (bytesPending >= 0)");

                        if (acksPending == 0)
                        {
                            allOperationsAckedTcs.TrySetResult(null);
                        }

                        capturedOperation.Acknowledge();

                        FabricEvents.Events.DrainReplicationNoise(
                            this.tracer.Type,
                            capturedRecord.Lsn.LSN,
                            acksPending,
                            bytesPending);

                        await capturedRecord.AwaitApply().ConfigureAwait(false);
                    }).IgnoreExceptionVoid();
                }
                else
                {
                    await this.replicatedLogManager.FlushInformationRecordAsync(
                        InformationEvent.ReplicationFinished,
                        closeLog : false,
                        flushInitiator : "DrainReplicationstream.IsFinished").ConfigureAwait(false);

                    await this.replicatedLogManager.LastInformationRecord.AwaitProcessing().ConfigureAwait(false);

                    await this.recordsProcessor.WaitForLogicalRecordsProcessingAsync().ConfigureAwait(false);

                    var acksPending = Interlocked.Decrement(ref acksOutstanding);
                    Utility.Assert(acksPending >= 0, "acksPending >= 0");
                    if (acksPending != 0)
                    {
                        await allOperationsAckedTcs.Task.ConfigureAwait(false);
                    }

                    Utility.Assert(acksOutstanding == 0, "acksOutstanding == 0");
                    break;
                }
            } while (true);

#if !DotNetCoreClr
            // These are new events defined in System.Fabric, existing CoreCLR apps would break
            // if these events are refernced as it wont be found. As CoreCLR apps carry System.Fabric
            // along with application
            // This is just a mitigation for now. Actual fix being tracked via bug# 11614507

            FabricEvents.Events.DrainCompleted(
                this.tracer.Type,
                "Replication",
                "Completed",
                replicatedRecordNumber,
                (uint)lastReplicatedRecord.RecordType,
                lastReplicatedRecord.Lsn.LSN,
                lastReplicatedRecord.Psn.PSN,
                lastReplicatedRecord.RecordPosition);
#endif
        }
Exemplo n.º 10
0
        private async Task DrainCopyStreamAsync(
            IOperationStream copyStream,
            IOperation operation,
            BeginCheckpointLogRecord copiedCheckpointRecord,
            bool renamedCopyLogSuccessfully)
        {
            FabricEvents.Events.DrainStart(this.tracer.Type, "Copy stream: RenamedCopyLogSuccessfully: " + renamedCopyLogSuccessfully);

            var  lastCopiedRecord = LogicalLogRecord.InvalidLogicalLogRecord;
            long copiedRecordNumber = 0, acksOutstanding = 1;

            TaskCompletionSource <object> allOperationsAckedTcs = new TaskCompletionSource <object>();

            try
            {
                if (operation != null)
                {
                    this.roleContextDrainState.OnDrainCopy();

                    do
                    {
                        var data = operation.Data;
#if DEBUG
                        ReplicatedLogManager.ValidateOperationData(data, "DrainCopyStreamAsync LSN: " + operation.SequenceNumber);
#endif
                        lastCopiedRecord = (LogicalLogRecord)LogRecord.FromOperationData(data);

                        await this.LogLogicalRecordOnSecondaryAsync(lastCopiedRecord).ConfigureAwait(false);

                        // After successfully appending the record into the buffer, increment the outstanding ack count
                        var acksRemaining = Interlocked.Increment(ref acksOutstanding);

                        FabricEvents.Events.DrainCopyReceive(
                            this.tracer.Type,
                            copiedRecordNumber,
                            lastCopiedRecord.RecordType.ToString(),
                            lastCopiedRecord.Lsn.LSN,
                            acksRemaining);

                        ++copiedRecordNumber;
                        if (this.replicatorSettings.PublicSettings.MaxCopyQueueSize / 2 <= acksRemaining)
                        {
                            FabricEvents.Events.DrainCopyFlush(
                                this.tracer.Type,
                                copiedRecordNumber,
                                lastCopiedRecord.Lsn.LSN,
                                acksRemaining);

                            this.replicatedLogManager.LogManager.FlushAsync("DrainCopyStream.IsFull").IgnoreExceptionVoid();
                        }

                        var capturedOperation = operation;
                        var capturedRecord    = lastCopiedRecord;
                        if (copiedCheckpointRecord == null)
                        {
                            copiedCheckpointRecord = this.replicatedLogManager.LastInProgressCheckpointRecord;
                            if (copiedCheckpointRecord != null)
                            {
                                Utility.Assert(
                                    copiedCheckpointRecord.Lsn == this.recoveredOrCopiedCheckpointLsn.Value,
                                    "copiedCheckpointRecordLsn {0} == recoveredOrCopiedCheckpointLsn {1}",
                                    copiedCheckpointRecord.Lsn,
                                    this.recoveredOrCopiedCheckpointLsn.Value);
                            }
                        }

                        // If pumped the last operation in the copy stream (indicated by copiedUptoLsn), rename the copy log if this was a full copy
                        // as we are guranteed that the replica has all the data needed to be promoted to an active secondary and we could not have lost any state
                        if (copiedCheckpointRecord != null &&
                            copiedCheckpointRecord != BeginCheckpointLogRecord.InvalidBeginCheckpointLogRecord &&
                            lastCopiedRecord.Lsn == this.copiedUptoLsn &&
                            renamedCopyLogSuccessfully == false) // Copied UE record could have same LSN, so this condition is needed
                        {
                            await this.checkpointManager.CompleteFirstCheckpointOnIdleAndRenameLog(copiedCheckpointRecord, this.copiedUptoLsn.LSN).ConfigureAwait(false);

                            renamedCopyLogSuccessfully = true;
                        }

                        lastCopiedRecord.AwaitFlush().ContinueWith(
                            async task =>
                        {
                            var acksPending = Interlocked.Decrement(ref acksOutstanding);

                            if (task.Exception != null)
                            {
                                // Signal the drain completion task if needed
                                if (acksPending == 0)
                                {
                                    allOperationsAckedTcs.TrySetResult(null);
                                }

                                return;
                            }

                            capturedOperation.Acknowledge();

                            Utility.Assert(acksPending >= 0, "acksPending {0} >= 0", acksPending);

                            if (acksPending == 0)
                            {
                                allOperationsAckedTcs.TrySetResult(null);
                            }

                            FabricEvents.Events.DrainCopyNoise(
                                this.tracer.Type,
                                capturedRecord.Lsn.LSN,
                                acksPending);

                            await capturedRecord.AwaitApply().ConfigureAwait(false);
                        }).IgnoreExceptionVoid();

                        var drainTask = copyStream.GetOperationAsync(CancellationToken.None);
                        if (drainTask.IsCompleted == false)
                        {
                            // GopalK: Currently, we cannot wait for copy to finish because copy might get
                            // abandoned if the primary fails and the product waits for pending
                            // copy operations to get acknowledged before electing a new primary
                            this.replicatedLogManager.LogManager.FlushAsync("DrainCopyStream.IsEmpty").IgnoreExceptionVoid();
                            await drainTask.ConfigureAwait(false);
                        }

                        operation = drainTask.GetAwaiter().GetResult();
                    } while (operation != null);
                }
            }

            // This finally block ensures that before we continue, we cancel the first full copy checkpoint during full build
            // Without having this, it is possible that the above code throws out of this method and any lifecycle API like close might get stuck because
            // there is a pending checkpoint that is not yet fully processed
            finally
            {
                // If the pump prematurely finishes for any reason, it means the copy log cannot be renamed
                if (copiedCheckpointRecord != null &&
                    copiedCheckpointRecord != BeginCheckpointLogRecord.InvalidBeginCheckpointLogRecord &&
                    renamedCopyLogSuccessfully == false)
                {
                    await this.checkpointManager.CancelFirstCheckpointOnIdleDueToIncompleteCopy(copiedCheckpointRecord, this.copiedUptoLsn.LSN);
                }
            }

            await this.replicatedLogManager.FlushInformationRecordAsync(
                InformationEvent.CopyFinished,
                closeLog : false,
                flushInitiator : "DrainCopyStream.IsFinished").ConfigureAwait(false);

            // Awaiting processing of this record,
            // ensures that all operations in the copystream must have been applied Before we complete the drainComplationTcs.
            await this.replicatedLogManager.LastInformationRecord.AwaitProcessing().ConfigureAwait(false);

            await this.recordsProcessor.WaitForLogicalRecordsProcessingAsync().ConfigureAwait(false);

            var acksOpen = Interlocked.Decrement(ref acksOutstanding);
            Utility.Assert(acksOpen >= 0, "acksOpen {0} >= 0", acksOpen);
            if (acksOpen != 0)
            {
                // wait for all the callbacks above to finish running and acknowleding
                await allOperationsAckedTcs.Task.ConfigureAwait(false);
            }

            Utility.Assert(acksOutstanding == 0, "acksOutstanding == 0");

#if !DotNetCoreClr
            // These are new events defined in System.Fabric, existing CoreCLR apps would break
            // if these events are refernced as it wont be found. As CoreCLR apps carry System.Fabric
            // along with application
            // This is just a mitigation for now. Actual fix being tracked via bug# 11614507

            FabricEvents.Events.DrainCompleted(
                this.tracer.Type,
                "Copy",
                "Completed",
                copiedRecordNumber,
                (uint)lastCopiedRecord.RecordType,
                lastCopiedRecord.Lsn.LSN,
                lastCopiedRecord.Psn.PSN,
                lastCopiedRecord.RecordPosition);
#endif
        }
Exemplo n.º 11
0
        /// <summary>
        /// Copies or Builds Idle Secondary replica from copyStream populated by the Primary.
        /// </summary>
        /// <param name="copyStream">The copy stream populated by the primary.</param>
        /// <returns>Task that represents the asynchronous operation.</returns>
        private async Task CopyOrBuildReplicaAsync(IOperationStream copyStream)
        {
            var operation = await copyStream.GetOperationAsync(CancellationToken.None).ConfigureAwait(false);

            if (operation == null)
            {
                return;
            }

            Utility.Assert(operation.Data.Count == 1, "operation.Data.Count == 1");

            CopyHeader copyHeader = CopyHeader.ReadFromOperationData(operation.Data);

            operation.Acknowledge();

            if (copyHeader.Stage == CopyStage.CopyNone)
            {
                // GopalK: The order of the following statements is significant
                Utility.Assert(
                    this.roleContextDrainState.DrainingStream == DrainingStream.Invalid,
                    "this.recordsProcessor.DrainingStream == DrainingStream.Invalid");

                // Since there is no false progress stage, dispose the recovery stream
                this.recoveryManager.DisposeRecoveryReadStreamIfNeeded();

                operation = await copyStream.GetOperationAsync(CancellationToken.None).ConfigureAwait(false);

                Utility.Assert(operation == null, "operation == null");

                var trace = string.Format(
                    CultureInfo.InvariantCulture,
                    "Idle replica is already current with primary replica: {0}",
                    copyHeader.PrimaryReplicaId);

                FabricEvents.Events.CopyOrBuildReplica(this.tracer.Type, trace);

                return;
            }

            BeginCheckpointLogRecord copiedCheckpointRecord;
            bool renamedCopyLogSuccessfully = false;

            if (copyHeader.Stage == CopyStage.CopyState)
            {
                var trace = string.Format(CultureInfo.InvariantCulture, "Idle replica is copying from primary replica: {0}", copyHeader.PrimaryReplicaId);

                // Since there is no false progress stage, dispose the recovery stream
                this.recoveryManager.DisposeRecoveryReadStreamIfNeeded();

                FabricEvents.Events.CopyOrBuildReplica(this.tracer.Type, trace);

                operation = await this.DrainStateStreamAsync(copyStream).ConfigureAwait(false);

                if (operation == null)
                {
                    FabricEvents.Events.CopyOrBuildReplica(
                        this.tracer.Type,
                        "Returning null as copy pump has been aborted");

                    return;
                }

                CopyMetadata copyMetadata = CopyMetadata.ReadFromOperationData(operation.Data);

                this.ReadConsistentAfterLsn = copyMetadata.HighestStateProviderCopiedLsn;

                trace =
                    string.Format(
                        CultureInfo.InvariantCulture,
                        "Copy started. StartingLSN: {0} StartingEpoch: {1},{2} CheckpointLSN: {3} UptoLSN: {4} Highest Checkpointed Lsn {5}"
                        + Environment.NewLine + "Copied ProgressVector: {6}" + Environment.NewLine,
                        copyMetadata.StartingLogicalSequenceNumber,
                        copyMetadata.StartingEpoch.DataLossNumber,
                        copyMetadata.StartingEpoch.ConfigurationNumber,
                        copyMetadata.CheckpointLsn,
                        copyMetadata.UptoLsn,
                        copyMetadata.HighestStateProviderCopiedLsn,
                        copyMetadata.ProgressVector.ToString(Constants.ProgressVectorMaxStringSizeInKb));

                FabricEvents.Events.CopyOrBuildReplica(this.tracer.Type, trace);

                this.transactionManager.TransactionsMap.Reuse();
                this.checkpointManager.ResetStableLsn(copyMetadata.CheckpointLsn);
                this.copiedUptoLsn = copyMetadata.UptoLsn;

                var newLogHead = await this.replicatedLogManager.LogManager.CreateCopyLogAsync(copyMetadata.StartingEpoch, copyMetadata.StartingLogicalSequenceNumber).ConfigureAwait(false);

                this.recoveredOrCopiedCheckpointLsn.Update(copyMetadata.CheckpointLsn);
                this.replicatedLogManager.Reuse(
                    copyMetadata.ProgressVector,
                    null,
                    null,
                    null,
                    InformationLogRecord.InvalidInformationLogRecord,
                    newLogHead,
                    copyMetadata.StartingEpoch,
                    copyMetadata.StartingLogicalSequenceNumber);

                // RD: RDBug 7475439: Utility.Assert(sourceEntry == targetEntry, "currentSourceVector == currentTargetVector");
                // UpdateEpoch lsn is same as starting lsn, so insert UE log record
                if (copyMetadata.StartingLogicalSequenceNumber == copyMetadata.ProgressVector.LastProgressVectorEntry.Lsn)
                {
                    var record = new UpdateEpochLogRecord(
                        copyMetadata.ProgressVector.LastProgressVectorEntry.Epoch,
                        copyMetadata.ProgressVector.LastProgressVectorEntry.PrimaryReplicaId)
                    {
                        Lsn = copyMetadata.StartingLogicalSequenceNumber
                    };

                    FabricEvents.Events.UpdateEpoch(
                        this.tracer.Type,
                        "UpdateEpochRecordDueToFullCopy",
                        record.Epoch.DataLossNumber,
                        record.Epoch.ConfigurationNumber,
                        record.Lsn.LSN,
                        this.roleContextDrainState.ReplicaRole.ToString());

                    // NOTE: Do not use the UpdateEpoch method on logmanager as it adds the entry to the list of progress vectors.
                    // We do not want to do that here as the entry already exists
                    this.replicatedLogManager.AppendWithoutReplication(record, null);
                }

                copiedCheckpointRecord = null;
                if (this.recoveredOrCopiedCheckpointLsn.Value == copyMetadata.StartingLogicalSequenceNumber)
                {
                    this.checkpointManager.FirstBeginCheckpointOnIdleSecondary();
                    copiedCheckpointRecord = this.replicatedLogManager.LastInProgressCheckpointRecord;

                    Utility.Assert(
                        copiedCheckpointRecord.Lsn == this.recoveredOrCopiedCheckpointLsn.Value,
                        "copiedCheckpointRecordLsn {0} != recoveredOrCopiedCheckpointLsn {1}",
                        copiedCheckpointRecord.Lsn, this.recoveredOrCopiedCheckpointLsn.Value);

                    // If this is the UptoLsn, ensure rename is done before continuing
                    if (recoveredOrCopiedCheckpointLsn.Value == this.copiedUptoLsn)
                    {
                        // This ensures we dont get stuck waiting for stable LSN
                        Utility.Assert(
                            checkpointManager.LastStableLsn == this.recoveredOrCopiedCheckpointLsn.Value,
                            "checkpointManager.LastStableLsn {0} == this.recoveredOrCopiedCheckpointLsn.Value {1}",
                            checkpointManager.LastStableLsn, this.recoveredOrCopiedCheckpointLsn.Value);

                        await this.checkpointManager.CompleteFirstCheckpointOnIdleAndRenameLog(copiedCheckpointRecord, this.copiedUptoLsn.LSN).ConfigureAwait(false);

                        renamedCopyLogSuccessfully = true;
                    }
                }

                operation.Acknowledge();
                FabricEvents.Events.CopyOrBuildReplica(this.tracer.Type, "Acked progress ProgressVectorEntry operation");

                operation = await copyStream.GetOperationAsync(CancellationToken.None).ConfigureAwait(false);
            }
            else
            {
                FabricEvents.Events.CopyOrBuildReplica(
                    this.tracer.Type,
                    "Idle replica is building from primary replica: " + copyHeader.PrimaryReplicaId);

                operation = await this.TruncateTailIfNecessary(copyStream).ConfigureAwait(false);

                copiedCheckpointRecord = BeginCheckpointLogRecord.InvalidBeginCheckpointLogRecord;

                // Since the false progress stage is complete, dispose the recovery stream
                this.recoveryManager.DisposeRecoveryReadStreamIfNeeded();
            }

            await this.DrainCopyStreamAsync(copyStream, operation, copiedCheckpointRecord, renamedCopyLogSuccessfully).ConfigureAwait(false);
        }
        /// <summary>
        /// Retrieves, applies, and acknowledges each operation from the provided <paramref name="queue"/>.
        /// </summary>
        /// <param name="queue">The queue.</param>
        /// <param name="apply">The method used to apply each operation.</param>
        /// <param name="cancellationToken">The cancellation token.</param>
        /// <param name="initiated">
        /// Optional completion to signify that the queue draining has begun.
        /// </param>
        /// <returns>A <see cref="Task"/> representing the work performed.</returns>
        private static async Task PumpOperations(
            IOperationStream queue,
            OperationApplier apply,
            CancellationToken cancellationToken,
            TaskCompletionSource<int> initiated = null)
        {
            var firstOperation = true;
            do
            {
                cancellationToken.ThrowIfCancellationRequested();

                // Start retrieving the next operation.
                var nextOperation = queue.GetOperationAsync(cancellationToken);

                // If this is the first operation and the caller has requested to be notified that draining has begun,
                // notify the caller.
                if (firstOperation)
                {
                    initiated?.TrySetResult(0);
                    firstOperation = false;
                }

                // Wait for the operation to be retrieved.
                var operation = await nextOperation.ConfigureAwait(false);

                // A null operation signifies that the queue has been completely drained.
                if (operation == null)
                {
                    return;
                }
                
                // Apply and acknowledge the operation.
                await apply(operation, cancellationToken).ConfigureAwait(false);
                operation.Acknowledge();
            }
            while (true);
        }