Пример #1
0
        /// <summary>
        /// Creates or finds the log stream.
        /// If being created either initializes the log with default log records or records from backup log.
        /// </summary>
        /// <param name="openMode">Open mode of the replica.</param>
        /// <returns>Task that represents the asynchronous open operation.</returns>
        internal async Task <PhysicalLogReader> OpenAsync(ReplicaOpenMode openMode)
        {
            // TODO: Anurag: do we plumb c.token up?
            this.LogicalLog = await this.CreateLogFileAsync(openMode == ReplicaOpenMode.New, CancellationToken.None).ConfigureAwait(false);

            var logLogLength = this.LogicalLog.Length;

            if (logLogLength <= sizeof(int))
            {
                // No usable content in the log
                if (this.LogicalLog.WritePosition > 0)
                {
                    await this.LogicalLog.TruncateTail(0, CancellationToken.None).ConfigureAwait(false);

                    // Remove all contents and reset write cursor back to 0
                    Utility.Assert(this.LogicalLog.Length == 0, "this.logicalLog.Length == 0");
                    Utility.Assert(this.LogicalLog.WritePosition == 0, "this.logicalLog.WritePosition == 0");
                }

                using (
                    var logWriter = new PhysicalLogWriter(
                        this.LogicalLog,
                        this.emptyCallbackManager,
                        this.Tracer,
                        this.MaxWriteCacheSizeInMB,
                        this.IncomingBytesRateCounterWriter,
                        this.LogFlushBytesRateCounterWriter,
                        this.BytesPerFlushCounterWriter,
                        this.AvgFlushLatencyCounterWriter,
                        this.AvgSerializationLatencyCounterWriter,
                        false))
                {
                    var zeroIndexRecord = IndexingLogRecord.CreateZeroIndexingLogRecord();
                    logWriter.InsertBufferedRecord(zeroIndexRecord);
                    logWriter.InsertBufferedRecord(UpdateEpochLogRecord.CreateZeroUpdateEpochLogRecord());
                    var zeroBeginCheckpointRecord =
                        BeginCheckpointLogRecord.CreateZeroBeginCheckpointLogRecord();
                    logWriter.InsertBufferedRecord(zeroBeginCheckpointRecord);
                    logWriter.InsertBufferedRecord(BarrierLogRecord.CreateOneBarrierLogRecord());
                    var oneEndCheckpointRecord =
                        EndCheckpointLogRecord.CreateOneEndCheckpointLogRecord(
                            zeroBeginCheckpointRecord,
                            zeroIndexRecord);
                    logWriter.InsertBufferedRecord(oneEndCheckpointRecord);
                    var endCompleteCheckpointRecord =
                        new CompleteCheckpointLogRecord(
                            LogicalSequenceNumber.OneLsn,
                            zeroIndexRecord,
                            oneEndCheckpointRecord);
                    logWriter.InsertBufferedRecord(endCompleteCheckpointRecord);
                    await logWriter.FlushAsync("OpenAsync").ConfigureAwait(false);

                    // This additional await is required to ensure the log record was indeed flushed.
                    // Without this, the flushasync could succeed, but the log record flush could have failed due to a write error
                    await endCompleteCheckpointRecord.AwaitFlush().ConfigureAwait(false);
                }
            }

            return(new PhysicalLogReader(this));
        }
Пример #2
0
        private static LogRecord ReadFromOperationData(OperationData operationData)
        {
            LogRecord     record;
            long          lsn;
            const ulong   RecordPosition = InvalidRecordPosition;
            LogRecordType recordType;
            var           index = -1;

            using (var reader = new BinaryReader(IncrementIndexAndGetMemoryStreamAt(operationData, ref index)))
            {
                // Logical metadata section.
                var startingPosition = reader.BaseStream.Position;
                var sizeOfSection    = reader.ReadInt32();
                var endPosition      = startingPosition + sizeOfSection;

                // Logical metadata read.
                recordType = (LogRecordType)reader.ReadUInt32();
                lsn        = reader.ReadInt64();

                // Jump to the end of the section ignoring fields that are not understood.
                Utility.Assert(endPosition >= reader.BaseStream.Position, "Could not have read more than section size.");
                reader.BaseStream.Position = endPosition;
            }

            switch (recordType)
            {
            case LogRecordType.BeginTransaction:
                record = new BeginTransactionOperationLogRecord(recordType, RecordPosition, lsn);
                break;

            case LogRecordType.Operation:
                record = new OperationLogRecord(recordType, RecordPosition, lsn);
                break;

            case LogRecordType.EndTransaction:
                record = new EndTransactionLogRecord(recordType, RecordPosition, lsn);
                break;

            case LogRecordType.Barrier:
                record = new BarrierLogRecord(recordType, RecordPosition, lsn);
                break;

            case LogRecordType.UpdateEpoch:
                record = new UpdateEpochLogRecord(recordType, RecordPosition, lsn);
                break;

            case LogRecordType.Backup:
                record = new BackupLogRecord(recordType, RecordPosition, lsn);
                break;

            default:
                Utility.CodingError(
                    "Unexpected record type received during replication/copy processing {0}",
                    recordType);
                return(null);
            }

            record.ReadLogical(operationData, ref index);
            return(record);
        }
Пример #3
0
        private bool CopiedUpdateEpoch(UpdateEpochLogRecord record)
        {
            Utility.Assert(
                this.roleContextDrainState.DrainingStream == DrainingStream.CopyStream,
                "this.recordsProcessor.DrainingStream == DrainingStream.CopyStream");
            Utility.Assert(
                record.Lsn.LSN == this.replicatedLogManager.CurrentLogTailLsn.LSN,
                "record.LastLogicalSequenceNumber.LSN == this.currentLogTailLsn.LSN");

            FabricEvents.Events.UpdateEpoch(
                this.tracer.Type,
                "CopiedUpdateEpoch",
                record.Epoch.DataLossNumber,
                record.Epoch.ConfigurationNumber,
                record.Lsn.LSN,
                this.roleContextDrainState.ReplicaRole.ToString());

            var lastVector = this.replicatedLogManager.ProgressVector.LastProgressVectorEntry;

            if (record.Epoch <= lastVector.Epoch)
            {
                if (this.replicatedLogManager.CurrentLogTailEpoch < lastVector.Epoch)
                {
                    // This case happens during full copy before the first checkpoint
                    Utility.Assert(
                        record.Lsn <= this.recoveredOrCopiedCheckpointLsn.Value,
                        "record.LastLogicalSequenceNumber <= this.recoveredOrCopiedCheckpointLsn");

                    this.replicatedLogManager.AppendWithoutReplication(record, null);
                    if (this.replicatedLogManager.CurrentLogTailEpoch < record.Epoch)
                    {
                        this.replicatedLogManager.SetTailEpoch(record.Epoch);
                    }

                    return(false);
                }

                var isInserted = this.replicatedLogManager.ProgressVector.Insert(new ProgressVectorEntry(record));
                if (isInserted == true)
                {
                    // This case happens when a series of primaries fail before
                    // a stable one completes reconfiguration
                    this.replicatedLogManager.AppendWithoutReplication(record, null);
                }
                else
                {
                    ProcessDuplicateRecord(record);
                }

                return(false);
            }

            this.replicatedLogManager.UpdateEpochRecord(record);
            return(true);
        }
Пример #4
0
        /// <summary>
        /// Process the update epoch log record.
        /// </summary>
        /// <param name="updateEpochLogRecord">The truncate tail log record to be processed.</param>
        /// <param name="isRecoverableRecord">Is this a recoverable record.</param>
        private void ProcessLogRecord(UpdateEpochLogRecord updateEpochLogRecord, out bool isRecoverableRecord)
        {
            isRecoverableRecord = true;

            Utility.Assert(
                this.LastLogicalSequenceNumber == updateEpochLogRecord.Lsn,
                "{0} this.LastLogicalSequenceNumber ({1}) == updateEpochLogRecord.Lsn ({2})",
                this.tracer.Type,
                this.LastLogicalSequenceNumber,
                updateEpochLogRecord.Lsn);

            var updateEpochRecordEpoch = updateEpochLogRecord.Epoch;

            if (this.CurrentLogTailEpoch < updateEpochRecordEpoch)
            {
                this.CurrentLogTailEpoch = updateEpochRecordEpoch;
            }

            if (Mode.Recovery == this.mode)
            {
                ProgressVectorEntry progressVectorEntry;

                if (updateEpochLogRecord.RecordPosition
                    > this.recoveredLastCompletedBeginCheckpointRecord.RecordPosition)
                {
                    progressVectorEntry = new ProgressVectorEntry(updateEpochLogRecord);
                    if (this.ProgressVector.LastProgressVectorEntry.Epoch < updateEpochRecordEpoch)
                    {
                        this.ProgressVector.Add(progressVectorEntry);
                    }
                    else
                    {
                        var isInserted = this.ProgressVector.Insert(progressVectorEntry);
                        Utility.Assert(
                            isInserted == true,
                            "{0} isInserted ({1}) == true. Incoming: {2} CurrentTail: {3}",
                            this.tracer.Type,
                            isInserted,
                            progressVectorEntry.ToString(),
                            this.ProgressVector.LastProgressVectorEntry.ToString());
                    }
                }
                else
                {
                    progressVectorEntry = this.ProgressVector.Find(updateEpochRecordEpoch);
                    Utility.Assert(
                        (progressVectorEntry != null) && (progressVectorEntry.Lsn == this.LastLogicalSequenceNumber),
                        "{0} (ProgressVectorEntry != null) && (ProgressVectorEntry.LastLogicalSequenceNumber {1} == LSN {2})",
                        this.tracer.Type,
                        this.LastLogicalSequenceNumber,
                        progressVectorEntry.Lsn);
                }
            }
        }
Пример #5
0
        public void UpdateEpochRecord(UpdateEpochLogRecord record)
        {
            Utility.Assert(
                this.CurrentLogTailLsn == record.Lsn,
                "this.currentLogTailLsn == record.LastLogicalSequenceNumber");

            FabricEvents.Events.UpdateEpoch(
                this.tracer.Type,
                "UpdateEpochRecord",
                record.Epoch.DataLossNumber,
                record.Epoch.ConfigurationNumber,
                record.Lsn.LSN,
                RoleContextDrainState.ReplicaRole.ToString());

            this.AppendWithoutReplication(
                record,
                null);

            this.ProgressVector.Add(new ProgressVectorEntry(record));
            this.SetTailEpoch(record.Epoch);
        }
Пример #6
0
        private long InsertBufferedRecordCallerHoldsLock(
            LogicalLogRecord record,
            bool isPrimary,
            out long pendingCount)
        {
            pendingCount = -1;
            var result = LogManager.PhysicalLogWriter.InsertBufferedRecord(record);

            UpdateEpochLogRecord updateEpochRecord = record as UpdateEpochLogRecord;

            Utility.Assert(
                (this.CurrentLogTailLsn + 1) == record.Lsn || (updateEpochRecord != null && updateEpochRecord.Lsn == this.CurrentLogTailLsn),
                "(this.currentLogTailLsn + 1) {0} == record.LastLogicalSequenceNumber {1}. RecordType {2}",
                this.CurrentLogTailLsn + 1,
                record.Lsn,
                record.RecordType);

            if (updateEpochRecord == null)
            {
                // Increment tail lsn only on appending a logical record that is NOT an update epoch
                ++this.CurrentLogTailLsn;
            }

            if (isPrimary)
            {
                pendingCount = OnOperationLogInitiationCallerHoldsLock();
            }

            var barrierRecord = record as BarrierLogRecord;

            if (barrierRecord != null)
            {
                this.OnBarrierBufferedCallerHoldsLock(barrierRecord, isPrimary);
            }

            return(result);
        }
Пример #7
0
 internal ProgressVectorEntry(UpdateEpochLogRecord record)
     : this(record.Epoch, record.Lsn, record.PrimaryReplicaId, record.Timestamp)
 {
 }
Пример #8
0
        private static LogRecord ReadRecord(BinaryReader br, ulong recordPosition, bool isPhysicalRead)
        {
            LogRecord     record;
            var           lsn = LogicalSequenceNumber.InvalidLsn.LSN;
            LogRecordType recordType;

            // Metadata section.
            var startingPosition = br.BaseStream.Position;
            var sizeOfSection    = br.ReadInt32();
            var endPosition      = startingPosition + sizeOfSection;

            // Read Logical Metadata
            recordType = (LogRecordType)br.ReadUInt32();

            switch (recordType)
            {
            case LogRecordType.BeginTransaction:
                record = new BeginTransactionOperationLogRecord(recordType, recordPosition, lsn);
                break;

            case LogRecordType.Operation:
                record = new OperationLogRecord(recordType, recordPosition, lsn);
                break;

            case LogRecordType.EndTransaction:
                record = new EndTransactionLogRecord(recordType, recordPosition, lsn);
                break;

            case LogRecordType.Barrier:
                record = new BarrierLogRecord(recordType, recordPosition, lsn);
                break;

            case LogRecordType.UpdateEpoch:
                record = new UpdateEpochLogRecord(recordType, recordPosition, lsn);
                break;

            case LogRecordType.Backup:
                record = new BackupLogRecord(recordType, recordPosition, lsn);
                break;

            case LogRecordType.BeginCheckpoint:
                record = new BeginCheckpointLogRecord(recordType, recordPosition, lsn);
                break;

            case LogRecordType.EndCheckpoint:
                record = new EndCheckpointLogRecord(recordType, recordPosition, lsn);
                break;

            case LogRecordType.Indexing:
                record = new IndexingLogRecord(recordType, recordPosition, lsn);
                break;

            case LogRecordType.TruncateHead:
                record = new TruncateHeadLogRecord(recordType, recordPosition, lsn);
                break;

            case LogRecordType.TruncateTail:
                record = new TruncateTailLogRecord(recordType, recordPosition, lsn);
                break;

            case LogRecordType.Information:
                record = new InformationLogRecord(recordType, recordPosition, lsn);
                break;

            case LogRecordType.CompleteCheckpoint:
                record = new CompleteCheckpointLogRecord(recordType, recordPosition, lsn);
                break;

            default:
                Utility.CodingError("Unexpected record type {0}", recordType);
                return(null);
            }

            record.lsn = new LogicalSequenceNumber(br.ReadInt64());

            // Jump to the end of the section ignoring fields that are not understood.
            Utility.Assert(endPosition >= br.BaseStream.Position, "Could not have read more than section size.");
            br.BaseStream.Position = endPosition;

            record.Read(br, isPhysicalRead);

            return(record);
        }
Пример #9
0
        /// <summary>
        /// Copies or Builds Idle Secondary replica from copyStream populated by the Primary.
        /// </summary>
        /// <param name="copyStream">The copy stream populated by the primary.</param>
        /// <returns>Task that represents the asynchronous operation.</returns>
        private async Task CopyOrBuildReplicaAsync(IOperationStream copyStream)
        {
            var operation = await copyStream.GetOperationAsync(CancellationToken.None).ConfigureAwait(false);

            if (operation == null)
            {
                return;
            }

            Utility.Assert(operation.Data.Count == 1, "operation.Data.Count == 1");

            CopyHeader copyHeader = CopyHeader.ReadFromOperationData(operation.Data);

            operation.Acknowledge();

            if (copyHeader.Stage == CopyStage.CopyNone)
            {
                // GopalK: The order of the following statements is significant
                Utility.Assert(
                    this.roleContextDrainState.DrainingStream == DrainingStream.Invalid,
                    "this.recordsProcessor.DrainingStream == DrainingStream.Invalid");

                // Since there is no false progress stage, dispose the recovery stream
                this.recoveryManager.DisposeRecoveryReadStreamIfNeeded();

                operation = await copyStream.GetOperationAsync(CancellationToken.None).ConfigureAwait(false);

                Utility.Assert(operation == null, "operation == null");

                var trace = string.Format(
                    CultureInfo.InvariantCulture,
                    "Idle replica is already current with primary replica: {0}",
                    copyHeader.PrimaryReplicaId);

                FabricEvents.Events.CopyOrBuildReplica(this.tracer.Type, trace);

                return;
            }

            BeginCheckpointLogRecord copiedCheckpointRecord;
            bool renamedCopyLogSuccessfully = false;

            if (copyHeader.Stage == CopyStage.CopyState)
            {
                var trace = string.Format(CultureInfo.InvariantCulture, "Idle replica is copying from primary replica: {0}", copyHeader.PrimaryReplicaId);

                // Since there is no false progress stage, dispose the recovery stream
                this.recoveryManager.DisposeRecoveryReadStreamIfNeeded();

                FabricEvents.Events.CopyOrBuildReplica(this.tracer.Type, trace);

                operation = await this.DrainStateStreamAsync(copyStream).ConfigureAwait(false);

                if (operation == null)
                {
                    FabricEvents.Events.CopyOrBuildReplica(
                        this.tracer.Type,
                        "Returning null as copy pump has been aborted");

                    return;
                }

                CopyMetadata copyMetadata = CopyMetadata.ReadFromOperationData(operation.Data);

                this.ReadConsistentAfterLsn = copyMetadata.HighestStateProviderCopiedLsn;

                trace =
                    string.Format(
                        CultureInfo.InvariantCulture,
                        "Copy started. StartingLSN: {0} StartingEpoch: {1},{2} CheckpointLSN: {3} UptoLSN: {4} Highest Checkpointed Lsn {5}"
                        + Environment.NewLine + "Copied ProgressVector: {6}" + Environment.NewLine,
                        copyMetadata.StartingLogicalSequenceNumber,
                        copyMetadata.StartingEpoch.DataLossNumber,
                        copyMetadata.StartingEpoch.ConfigurationNumber,
                        copyMetadata.CheckpointLsn,
                        copyMetadata.UptoLsn,
                        copyMetadata.HighestStateProviderCopiedLsn,
                        copyMetadata.ProgressVector.ToString(Constants.ProgressVectorMaxStringSizeInKb));

                FabricEvents.Events.CopyOrBuildReplica(this.tracer.Type, trace);

                this.transactionManager.TransactionsMap.Reuse();
                this.checkpointManager.ResetStableLsn(copyMetadata.CheckpointLsn);
                this.copiedUptoLsn = copyMetadata.UptoLsn;

                var newLogHead = await this.replicatedLogManager.LogManager.CreateCopyLogAsync(copyMetadata.StartingEpoch, copyMetadata.StartingLogicalSequenceNumber).ConfigureAwait(false);

                this.recoveredOrCopiedCheckpointLsn.Update(copyMetadata.CheckpointLsn);
                this.replicatedLogManager.Reuse(
                    copyMetadata.ProgressVector,
                    null,
                    null,
                    null,
                    InformationLogRecord.InvalidInformationLogRecord,
                    newLogHead,
                    copyMetadata.StartingEpoch,
                    copyMetadata.StartingLogicalSequenceNumber);

                // RD: RDBug 7475439: Utility.Assert(sourceEntry == targetEntry, "currentSourceVector == currentTargetVector");
                // UpdateEpoch lsn is same as starting lsn, so insert UE log record
                if (copyMetadata.StartingLogicalSequenceNumber == copyMetadata.ProgressVector.LastProgressVectorEntry.Lsn)
                {
                    var record = new UpdateEpochLogRecord(
                        copyMetadata.ProgressVector.LastProgressVectorEntry.Epoch,
                        copyMetadata.ProgressVector.LastProgressVectorEntry.PrimaryReplicaId)
                    {
                        Lsn = copyMetadata.StartingLogicalSequenceNumber
                    };

                    FabricEvents.Events.UpdateEpoch(
                        this.tracer.Type,
                        "UpdateEpochRecordDueToFullCopy",
                        record.Epoch.DataLossNumber,
                        record.Epoch.ConfigurationNumber,
                        record.Lsn.LSN,
                        this.roleContextDrainState.ReplicaRole.ToString());

                    // NOTE: Do not use the UpdateEpoch method on logmanager as it adds the entry to the list of progress vectors.
                    // We do not want to do that here as the entry already exists
                    this.replicatedLogManager.AppendWithoutReplication(record, null);
                }

                copiedCheckpointRecord = null;
                if (this.recoveredOrCopiedCheckpointLsn.Value == copyMetadata.StartingLogicalSequenceNumber)
                {
                    this.checkpointManager.FirstBeginCheckpointOnIdleSecondary();
                    copiedCheckpointRecord = this.replicatedLogManager.LastInProgressCheckpointRecord;

                    Utility.Assert(
                        copiedCheckpointRecord.Lsn == this.recoveredOrCopiedCheckpointLsn.Value,
                        "copiedCheckpointRecordLsn {0} != recoveredOrCopiedCheckpointLsn {1}",
                        copiedCheckpointRecord.Lsn, this.recoveredOrCopiedCheckpointLsn.Value);

                    // If this is the UptoLsn, ensure rename is done before continuing
                    if (recoveredOrCopiedCheckpointLsn.Value == this.copiedUptoLsn)
                    {
                        // This ensures we dont get stuck waiting for stable LSN
                        Utility.Assert(
                            checkpointManager.LastStableLsn == this.recoveredOrCopiedCheckpointLsn.Value,
                            "checkpointManager.LastStableLsn {0} == this.recoveredOrCopiedCheckpointLsn.Value {1}",
                            checkpointManager.LastStableLsn, this.recoveredOrCopiedCheckpointLsn.Value);

                        await this.checkpointManager.CompleteFirstCheckpointOnIdleAndRenameLog(copiedCheckpointRecord, this.copiedUptoLsn.LSN).ConfigureAwait(false);

                        renamedCopyLogSuccessfully = true;
                    }
                }

                operation.Acknowledge();
                FabricEvents.Events.CopyOrBuildReplica(this.tracer.Type, "Acked progress ProgressVectorEntry operation");

                operation = await copyStream.GetOperationAsync(CancellationToken.None).ConfigureAwait(false);
            }
            else
            {
                FabricEvents.Events.CopyOrBuildReplica(
                    this.tracer.Type,
                    "Idle replica is building from primary replica: " + copyHeader.PrimaryReplicaId);

                operation = await this.TruncateTailIfNecessary(copyStream).ConfigureAwait(false);

                copiedCheckpointRecord = BeginCheckpointLogRecord.InvalidBeginCheckpointLogRecord;

                // Since the false progress stage is complete, dispose the recovery stream
                this.recoveryManager.DisposeRecoveryReadStreamIfNeeded();
            }

            await this.DrainCopyStreamAsync(copyStream, operation, copiedCheckpointRecord, renamedCopyLogSuccessfully).ConfigureAwait(false);
        }