Пример #1
0
        private async Task DrainReplicationStreamAsync(IOperationStream replicationStream)
        {
            FabricEvents.Events.DrainStart(this.tracer.Type, "Replication stream");

            TaskCompletionSource <object> allOperationsAckedTcs = new TaskCompletionSource <object>();
            var  lastReplicatedRecord = LogicalLogRecord.InvalidLogicalLogRecord;
            long replicatedRecordNumber = 0, acksOutstanding = 1, bytesOutstanding = 0;

            this.roleContextDrainState.OnDrainReplication();

            do
            {
                var drainTask = replicationStream.GetOperationAsync(CancellationToken.None);
                if (drainTask.IsCompleted == false)
                {
                    this.replicatedLogManager.LogManager.FlushAsync("DrainReplicationStream.IsEmpty").IgnoreExceptionVoid();
                    await drainTask.ConfigureAwait(false);
                }

                var operation = drainTask.GetAwaiter().GetResult();
                if (operation != null)
                {
                    var data = operation.Data;
#if DEBUG
                    ReplicatedLogManager.ValidateOperationData(data, "DrainReplicationStream LSN: " + operation.SequenceNumber);
#endif
                    lastReplicatedRecord     = (LogicalLogRecord)LogRecord.FromOperationData(data);
                    lastReplicatedRecord.Lsn = new LogicalSequenceNumber(operation.SequenceNumber);

                    await this.LogLogicalRecordOnSecondaryAsync(lastReplicatedRecord).ConfigureAwait(false);

                    var acksRemaining = Interlocked.Increment(ref acksOutstanding);

                    FabricEvents.Events.DrainReplicationReceive(
                        this.tracer.Type,
                        replicatedRecordNumber,
                        (uint)lastReplicatedRecord.RecordType,
                        lastReplicatedRecord.Lsn.LSN,
                        acksRemaining);

                    ++replicatedRecordNumber;

                    long operationSize = Utility.GetOperationSize(data);

                    var bytesRemaining = Interlocked.Add(ref bytesOutstanding, operationSize);
                    if (((this.replicatorSettings.PublicSettings.MaxSecondaryReplicationQueueSize / 2 <= acksRemaining) ||
                         ((this.replicatorSettings.PublicSettings.MaxSecondaryReplicationQueueMemorySize > 0) &&
                          (this.replicatorSettings.PublicSettings.MaxSecondaryReplicationQueueMemorySize / 2 <= bytesRemaining))) ||
                        ((this.replicatorSettings.PublicSettings.MaxPrimaryReplicationQueueSize / 2 <= acksRemaining) ||
                         ((this.replicatorSettings.PublicSettings.MaxPrimaryReplicationQueueMemorySize > 0) &&
                          (this.replicatorSettings.PublicSettings.MaxPrimaryReplicationQueueMemorySize / 2 <= bytesRemaining))))
                    {
                        FabricEvents.Events.DrainReplicationFlush(
                            this.tracer.Type,
                            replicatedRecordNumber,
                            lastReplicatedRecord.Lsn.LSN,
                            acksRemaining,
                            bytesRemaining);

                        this.replicatedLogManager.LogManager.FlushAsync("DrainReplicationStream.IsFull").IgnoreExceptionVoid();
                    }

                    var capturedOperation = operation;
                    var capturedRecord    = lastReplicatedRecord;
                    lastReplicatedRecord.AwaitFlush().IgnoreException().ContinueWith(
                        async task =>
                    {
                        var acksPending = Interlocked.Decrement(ref acksOutstanding);

                        if (task.Exception != null)
                        {
                            // Signal the drain completion task if needed
                            if (acksPending == 0)
                            {
                                allOperationsAckedTcs.TrySetResult(null);
                            }

                            return;
                        }

                        var bytesPending = Interlocked.Add(ref bytesOutstanding, -operationSize);
                        Utility.Assert(
                            (acksPending >= 0) && (bytesPending >= 0),
                            "(acksPending >= 0) && (bytesPending >= 0)");

                        if (acksPending == 0)
                        {
                            allOperationsAckedTcs.TrySetResult(null);
                        }

                        capturedOperation.Acknowledge();

                        FabricEvents.Events.DrainReplicationNoise(
                            this.tracer.Type,
                            capturedRecord.Lsn.LSN,
                            acksPending,
                            bytesPending);

                        await capturedRecord.AwaitApply().ConfigureAwait(false);
                    }).IgnoreExceptionVoid();
                }
                else
                {
                    await this.replicatedLogManager.FlushInformationRecordAsync(
                        InformationEvent.ReplicationFinished,
                        closeLog : false,
                        flushInitiator : "DrainReplicationstream.IsFinished").ConfigureAwait(false);

                    await this.replicatedLogManager.LastInformationRecord.AwaitProcessing().ConfigureAwait(false);

                    await this.recordsProcessor.WaitForLogicalRecordsProcessingAsync().ConfigureAwait(false);

                    var acksPending = Interlocked.Decrement(ref acksOutstanding);
                    Utility.Assert(acksPending >= 0, "acksPending >= 0");
                    if (acksPending != 0)
                    {
                        await allOperationsAckedTcs.Task.ConfigureAwait(false);
                    }

                    Utility.Assert(acksOutstanding == 0, "acksOutstanding == 0");
                    break;
                }
            } while (true);

#if !DotNetCoreClr
            // These are new events defined in System.Fabric, existing CoreCLR apps would break
            // if these events are refernced as it wont be found. As CoreCLR apps carry System.Fabric
            // along with application
            // This is just a mitigation for now. Actual fix being tracked via bug# 11614507

            FabricEvents.Events.DrainCompleted(
                this.tracer.Type,
                "Replication",
                "Completed",
                replicatedRecordNumber,
                (uint)lastReplicatedRecord.RecordType,
                lastReplicatedRecord.Lsn.LSN,
                lastReplicatedRecord.Psn.PSN,
                lastReplicatedRecord.RecordPosition);
#endif
        }
Пример #2
0
        private async Task DrainCopyStreamAsync(
            IOperationStream copyStream,
            IOperation operation,
            BeginCheckpointLogRecord copiedCheckpointRecord,
            bool renamedCopyLogSuccessfully)
        {
            FabricEvents.Events.DrainStart(this.tracer.Type, "Copy stream: RenamedCopyLogSuccessfully: " + renamedCopyLogSuccessfully);

            var  lastCopiedRecord = LogicalLogRecord.InvalidLogicalLogRecord;
            long copiedRecordNumber = 0, acksOutstanding = 1;

            TaskCompletionSource <object> allOperationsAckedTcs = new TaskCompletionSource <object>();

            try
            {
                if (operation != null)
                {
                    this.roleContextDrainState.OnDrainCopy();

                    do
                    {
                        var data = operation.Data;
#if DEBUG
                        ReplicatedLogManager.ValidateOperationData(data, "DrainCopyStreamAsync LSN: " + operation.SequenceNumber);
#endif
                        lastCopiedRecord = (LogicalLogRecord)LogRecord.FromOperationData(data);

                        await this.LogLogicalRecordOnSecondaryAsync(lastCopiedRecord).ConfigureAwait(false);

                        // After successfully appending the record into the buffer, increment the outstanding ack count
                        var acksRemaining = Interlocked.Increment(ref acksOutstanding);

                        FabricEvents.Events.DrainCopyReceive(
                            this.tracer.Type,
                            copiedRecordNumber,
                            lastCopiedRecord.RecordType.ToString(),
                            lastCopiedRecord.Lsn.LSN,
                            acksRemaining);

                        ++copiedRecordNumber;
                        if (this.replicatorSettings.PublicSettings.MaxCopyQueueSize / 2 <= acksRemaining)
                        {
                            FabricEvents.Events.DrainCopyFlush(
                                this.tracer.Type,
                                copiedRecordNumber,
                                lastCopiedRecord.Lsn.LSN,
                                acksRemaining);

                            this.replicatedLogManager.LogManager.FlushAsync("DrainCopyStream.IsFull").IgnoreExceptionVoid();
                        }

                        var capturedOperation = operation;
                        var capturedRecord    = lastCopiedRecord;
                        if (copiedCheckpointRecord == null)
                        {
                            copiedCheckpointRecord = this.replicatedLogManager.LastInProgressCheckpointRecord;
                            if (copiedCheckpointRecord != null)
                            {
                                Utility.Assert(
                                    copiedCheckpointRecord.Lsn == this.recoveredOrCopiedCheckpointLsn.Value,
                                    "copiedCheckpointRecordLsn {0} == recoveredOrCopiedCheckpointLsn {1}",
                                    copiedCheckpointRecord.Lsn,
                                    this.recoveredOrCopiedCheckpointLsn.Value);
                            }
                        }

                        // If pumped the last operation in the copy stream (indicated by copiedUptoLsn), rename the copy log if this was a full copy
                        // as we are guranteed that the replica has all the data needed to be promoted to an active secondary and we could not have lost any state
                        if (copiedCheckpointRecord != null &&
                            copiedCheckpointRecord != BeginCheckpointLogRecord.InvalidBeginCheckpointLogRecord &&
                            lastCopiedRecord.Lsn == this.copiedUptoLsn &&
                            renamedCopyLogSuccessfully == false) // Copied UE record could have same LSN, so this condition is needed
                        {
                            await this.checkpointManager.CompleteFirstCheckpointOnIdleAndRenameLog(copiedCheckpointRecord, this.copiedUptoLsn.LSN).ConfigureAwait(false);

                            renamedCopyLogSuccessfully = true;
                        }

                        lastCopiedRecord.AwaitFlush().ContinueWith(
                            async task =>
                        {
                            var acksPending = Interlocked.Decrement(ref acksOutstanding);

                            if (task.Exception != null)
                            {
                                // Signal the drain completion task if needed
                                if (acksPending == 0)
                                {
                                    allOperationsAckedTcs.TrySetResult(null);
                                }

                                return;
                            }

                            capturedOperation.Acknowledge();

                            Utility.Assert(acksPending >= 0, "acksPending {0} >= 0", acksPending);

                            if (acksPending == 0)
                            {
                                allOperationsAckedTcs.TrySetResult(null);
                            }

                            FabricEvents.Events.DrainCopyNoise(
                                this.tracer.Type,
                                capturedRecord.Lsn.LSN,
                                acksPending);

                            await capturedRecord.AwaitApply().ConfigureAwait(false);
                        }).IgnoreExceptionVoid();

                        var drainTask = copyStream.GetOperationAsync(CancellationToken.None);
                        if (drainTask.IsCompleted == false)
                        {
                            // GopalK: Currently, we cannot wait for copy to finish because copy might get
                            // abandoned if the primary fails and the product waits for pending
                            // copy operations to get acknowledged before electing a new primary
                            this.replicatedLogManager.LogManager.FlushAsync("DrainCopyStream.IsEmpty").IgnoreExceptionVoid();
                            await drainTask.ConfigureAwait(false);
                        }

                        operation = drainTask.GetAwaiter().GetResult();
                    } while (operation != null);
                }
            }

            // This finally block ensures that before we continue, we cancel the first full copy checkpoint during full build
            // Without having this, it is possible that the above code throws out of this method and any lifecycle API like close might get stuck because
            // there is a pending checkpoint that is not yet fully processed
            finally
            {
                // If the pump prematurely finishes for any reason, it means the copy log cannot be renamed
                if (copiedCheckpointRecord != null &&
                    copiedCheckpointRecord != BeginCheckpointLogRecord.InvalidBeginCheckpointLogRecord &&
                    renamedCopyLogSuccessfully == false)
                {
                    await this.checkpointManager.CancelFirstCheckpointOnIdleDueToIncompleteCopy(copiedCheckpointRecord, this.copiedUptoLsn.LSN);
                }
            }

            await this.replicatedLogManager.FlushInformationRecordAsync(
                InformationEvent.CopyFinished,
                closeLog : false,
                flushInitiator : "DrainCopyStream.IsFinished").ConfigureAwait(false);

            // Awaiting processing of this record,
            // ensures that all operations in the copystream must have been applied Before we complete the drainComplationTcs.
            await this.replicatedLogManager.LastInformationRecord.AwaitProcessing().ConfigureAwait(false);

            await this.recordsProcessor.WaitForLogicalRecordsProcessingAsync().ConfigureAwait(false);

            var acksOpen = Interlocked.Decrement(ref acksOutstanding);
            Utility.Assert(acksOpen >= 0, "acksOpen {0} >= 0", acksOpen);
            if (acksOpen != 0)
            {
                // wait for all the callbacks above to finish running and acknowleding
                await allOperationsAckedTcs.Task.ConfigureAwait(false);
            }

            Utility.Assert(acksOutstanding == 0, "acksOutstanding == 0");

#if !DotNetCoreClr
            // These are new events defined in System.Fabric, existing CoreCLR apps would break
            // if these events are refernced as it wont be found. As CoreCLR apps carry System.Fabric
            // along with application
            // This is just a mitigation for now. Actual fix being tracked via bug# 11614507

            FabricEvents.Events.DrainCompleted(
                this.tracer.Type,
                "Copy",
                "Completed",
                copiedRecordNumber,
                (uint)lastCopiedRecord.RecordType,
                lastCopiedRecord.Lsn.LSN,
                lastCopiedRecord.Psn.PSN,
                lastCopiedRecord.RecordPosition);
#endif
        }