/// <summary> /// Creates or finds the log stream. /// If being created either initializes the log with default log records or records from backup log. /// </summary> /// <param name="openMode">Open mode of the replica.</param> /// <returns>Task that represents the asynchronous open operation.</returns> internal async Task <PhysicalLogReader> OpenAsync(ReplicaOpenMode openMode) { // TODO: Anurag: do we plumb c.token up? this.LogicalLog = await this.CreateLogFileAsync(openMode == ReplicaOpenMode.New, CancellationToken.None).ConfigureAwait(false); var logLogLength = this.LogicalLog.Length; if (logLogLength <= sizeof(int)) { // No usable content in the log if (this.LogicalLog.WritePosition > 0) { await this.LogicalLog.TruncateTail(0, CancellationToken.None).ConfigureAwait(false); // Remove all contents and reset write cursor back to 0 Utility.Assert(this.LogicalLog.Length == 0, "this.logicalLog.Length == 0"); Utility.Assert(this.LogicalLog.WritePosition == 0, "this.logicalLog.WritePosition == 0"); } using ( var logWriter = new PhysicalLogWriter( this.LogicalLog, this.emptyCallbackManager, this.Tracer, this.MaxWriteCacheSizeInMB, this.IncomingBytesRateCounterWriter, this.LogFlushBytesRateCounterWriter, this.BytesPerFlushCounterWriter, this.AvgFlushLatencyCounterWriter, this.AvgSerializationLatencyCounterWriter, false)) { var zeroIndexRecord = IndexingLogRecord.CreateZeroIndexingLogRecord(); logWriter.InsertBufferedRecord(zeroIndexRecord); logWriter.InsertBufferedRecord(UpdateEpochLogRecord.CreateZeroUpdateEpochLogRecord()); var zeroBeginCheckpointRecord = BeginCheckpointLogRecord.CreateZeroBeginCheckpointLogRecord(); logWriter.InsertBufferedRecord(zeroBeginCheckpointRecord); logWriter.InsertBufferedRecord(BarrierLogRecord.CreateOneBarrierLogRecord()); var oneEndCheckpointRecord = EndCheckpointLogRecord.CreateOneEndCheckpointLogRecord( zeroBeginCheckpointRecord, zeroIndexRecord); logWriter.InsertBufferedRecord(oneEndCheckpointRecord); var endCompleteCheckpointRecord = new CompleteCheckpointLogRecord( LogicalSequenceNumber.OneLsn, zeroIndexRecord, oneEndCheckpointRecord); logWriter.InsertBufferedRecord(endCompleteCheckpointRecord); await logWriter.FlushAsync("OpenAsync").ConfigureAwait(false); // This additional await is required to ensure the log record was indeed flushed. // Without this, the flushasync could succeed, but the log record flush could have failed due to a write error await endCompleteCheckpointRecord.AwaitFlush().ConfigureAwait(false); } } return(new PhysicalLogReader(this)); }
private static LogRecord ReadFromOperationData(OperationData operationData) { LogRecord record; long lsn; const ulong RecordPosition = InvalidRecordPosition; LogRecordType recordType; var index = -1; using (var reader = new BinaryReader(IncrementIndexAndGetMemoryStreamAt(operationData, ref index))) { // Logical metadata section. var startingPosition = reader.BaseStream.Position; var sizeOfSection = reader.ReadInt32(); var endPosition = startingPosition + sizeOfSection; // Logical metadata read. recordType = (LogRecordType)reader.ReadUInt32(); lsn = reader.ReadInt64(); // Jump to the end of the section ignoring fields that are not understood. Utility.Assert(endPosition >= reader.BaseStream.Position, "Could not have read more than section size."); reader.BaseStream.Position = endPosition; } switch (recordType) { case LogRecordType.BeginTransaction: record = new BeginTransactionOperationLogRecord(recordType, RecordPosition, lsn); break; case LogRecordType.Operation: record = new OperationLogRecord(recordType, RecordPosition, lsn); break; case LogRecordType.EndTransaction: record = new EndTransactionLogRecord(recordType, RecordPosition, lsn); break; case LogRecordType.Barrier: record = new BarrierLogRecord(recordType, RecordPosition, lsn); break; case LogRecordType.UpdateEpoch: record = new UpdateEpochLogRecord(recordType, RecordPosition, lsn); break; case LogRecordType.Backup: record = new BackupLogRecord(recordType, RecordPosition, lsn); break; default: Utility.CodingError( "Unexpected record type received during replication/copy processing {0}", recordType); return(null); } record.ReadLogical(operationData, ref index); return(record); }
private bool CopiedUpdateEpoch(UpdateEpochLogRecord record) { Utility.Assert( this.roleContextDrainState.DrainingStream == DrainingStream.CopyStream, "this.recordsProcessor.DrainingStream == DrainingStream.CopyStream"); Utility.Assert( record.Lsn.LSN == this.replicatedLogManager.CurrentLogTailLsn.LSN, "record.LastLogicalSequenceNumber.LSN == this.currentLogTailLsn.LSN"); FabricEvents.Events.UpdateEpoch( this.tracer.Type, "CopiedUpdateEpoch", record.Epoch.DataLossNumber, record.Epoch.ConfigurationNumber, record.Lsn.LSN, this.roleContextDrainState.ReplicaRole.ToString()); var lastVector = this.replicatedLogManager.ProgressVector.LastProgressVectorEntry; if (record.Epoch <= lastVector.Epoch) { if (this.replicatedLogManager.CurrentLogTailEpoch < lastVector.Epoch) { // This case happens during full copy before the first checkpoint Utility.Assert( record.Lsn <= this.recoveredOrCopiedCheckpointLsn.Value, "record.LastLogicalSequenceNumber <= this.recoveredOrCopiedCheckpointLsn"); this.replicatedLogManager.AppendWithoutReplication(record, null); if (this.replicatedLogManager.CurrentLogTailEpoch < record.Epoch) { this.replicatedLogManager.SetTailEpoch(record.Epoch); } return(false); } var isInserted = this.replicatedLogManager.ProgressVector.Insert(new ProgressVectorEntry(record)); if (isInserted == true) { // This case happens when a series of primaries fail before // a stable one completes reconfiguration this.replicatedLogManager.AppendWithoutReplication(record, null); } else { ProcessDuplicateRecord(record); } return(false); } this.replicatedLogManager.UpdateEpochRecord(record); return(true); }
/// <summary> /// Process the update epoch log record. /// </summary> /// <param name="updateEpochLogRecord">The truncate tail log record to be processed.</param> /// <param name="isRecoverableRecord">Is this a recoverable record.</param> private void ProcessLogRecord(UpdateEpochLogRecord updateEpochLogRecord, out bool isRecoverableRecord) { isRecoverableRecord = true; Utility.Assert( this.LastLogicalSequenceNumber == updateEpochLogRecord.Lsn, "{0} this.LastLogicalSequenceNumber ({1}) == updateEpochLogRecord.Lsn ({2})", this.tracer.Type, this.LastLogicalSequenceNumber, updateEpochLogRecord.Lsn); var updateEpochRecordEpoch = updateEpochLogRecord.Epoch; if (this.CurrentLogTailEpoch < updateEpochRecordEpoch) { this.CurrentLogTailEpoch = updateEpochRecordEpoch; } if (Mode.Recovery == this.mode) { ProgressVectorEntry progressVectorEntry; if (updateEpochLogRecord.RecordPosition > this.recoveredLastCompletedBeginCheckpointRecord.RecordPosition) { progressVectorEntry = new ProgressVectorEntry(updateEpochLogRecord); if (this.ProgressVector.LastProgressVectorEntry.Epoch < updateEpochRecordEpoch) { this.ProgressVector.Add(progressVectorEntry); } else { var isInserted = this.ProgressVector.Insert(progressVectorEntry); Utility.Assert( isInserted == true, "{0} isInserted ({1}) == true. Incoming: {2} CurrentTail: {3}", this.tracer.Type, isInserted, progressVectorEntry.ToString(), this.ProgressVector.LastProgressVectorEntry.ToString()); } } else { progressVectorEntry = this.ProgressVector.Find(updateEpochRecordEpoch); Utility.Assert( (progressVectorEntry != null) && (progressVectorEntry.Lsn == this.LastLogicalSequenceNumber), "{0} (ProgressVectorEntry != null) && (ProgressVectorEntry.LastLogicalSequenceNumber {1} == LSN {2})", this.tracer.Type, this.LastLogicalSequenceNumber, progressVectorEntry.Lsn); } } }
public void UpdateEpochRecord(UpdateEpochLogRecord record) { Utility.Assert( this.CurrentLogTailLsn == record.Lsn, "this.currentLogTailLsn == record.LastLogicalSequenceNumber"); FabricEvents.Events.UpdateEpoch( this.tracer.Type, "UpdateEpochRecord", record.Epoch.DataLossNumber, record.Epoch.ConfigurationNumber, record.Lsn.LSN, RoleContextDrainState.ReplicaRole.ToString()); this.AppendWithoutReplication( record, null); this.ProgressVector.Add(new ProgressVectorEntry(record)); this.SetTailEpoch(record.Epoch); }
private long InsertBufferedRecordCallerHoldsLock( LogicalLogRecord record, bool isPrimary, out long pendingCount) { pendingCount = -1; var result = LogManager.PhysicalLogWriter.InsertBufferedRecord(record); UpdateEpochLogRecord updateEpochRecord = record as UpdateEpochLogRecord; Utility.Assert( (this.CurrentLogTailLsn + 1) == record.Lsn || (updateEpochRecord != null && updateEpochRecord.Lsn == this.CurrentLogTailLsn), "(this.currentLogTailLsn + 1) {0} == record.LastLogicalSequenceNumber {1}. RecordType {2}", this.CurrentLogTailLsn + 1, record.Lsn, record.RecordType); if (updateEpochRecord == null) { // Increment tail lsn only on appending a logical record that is NOT an update epoch ++this.CurrentLogTailLsn; } if (isPrimary) { pendingCount = OnOperationLogInitiationCallerHoldsLock(); } var barrierRecord = record as BarrierLogRecord; if (barrierRecord != null) { this.OnBarrierBufferedCallerHoldsLock(barrierRecord, isPrimary); } return(result); }
internal ProgressVectorEntry(UpdateEpochLogRecord record) : this(record.Epoch, record.Lsn, record.PrimaryReplicaId, record.Timestamp) { }
private static LogRecord ReadRecord(BinaryReader br, ulong recordPosition, bool isPhysicalRead) { LogRecord record; var lsn = LogicalSequenceNumber.InvalidLsn.LSN; LogRecordType recordType; // Metadata section. var startingPosition = br.BaseStream.Position; var sizeOfSection = br.ReadInt32(); var endPosition = startingPosition + sizeOfSection; // Read Logical Metadata recordType = (LogRecordType)br.ReadUInt32(); switch (recordType) { case LogRecordType.BeginTransaction: record = new BeginTransactionOperationLogRecord(recordType, recordPosition, lsn); break; case LogRecordType.Operation: record = new OperationLogRecord(recordType, recordPosition, lsn); break; case LogRecordType.EndTransaction: record = new EndTransactionLogRecord(recordType, recordPosition, lsn); break; case LogRecordType.Barrier: record = new BarrierLogRecord(recordType, recordPosition, lsn); break; case LogRecordType.UpdateEpoch: record = new UpdateEpochLogRecord(recordType, recordPosition, lsn); break; case LogRecordType.Backup: record = new BackupLogRecord(recordType, recordPosition, lsn); break; case LogRecordType.BeginCheckpoint: record = new BeginCheckpointLogRecord(recordType, recordPosition, lsn); break; case LogRecordType.EndCheckpoint: record = new EndCheckpointLogRecord(recordType, recordPosition, lsn); break; case LogRecordType.Indexing: record = new IndexingLogRecord(recordType, recordPosition, lsn); break; case LogRecordType.TruncateHead: record = new TruncateHeadLogRecord(recordType, recordPosition, lsn); break; case LogRecordType.TruncateTail: record = new TruncateTailLogRecord(recordType, recordPosition, lsn); break; case LogRecordType.Information: record = new InformationLogRecord(recordType, recordPosition, lsn); break; case LogRecordType.CompleteCheckpoint: record = new CompleteCheckpointLogRecord(recordType, recordPosition, lsn); break; default: Utility.CodingError("Unexpected record type {0}", recordType); return(null); } record.lsn = new LogicalSequenceNumber(br.ReadInt64()); // Jump to the end of the section ignoring fields that are not understood. Utility.Assert(endPosition >= br.BaseStream.Position, "Could not have read more than section size."); br.BaseStream.Position = endPosition; record.Read(br, isPhysicalRead); return(record); }
/// <summary> /// Copies or Builds Idle Secondary replica from copyStream populated by the Primary. /// </summary> /// <param name="copyStream">The copy stream populated by the primary.</param> /// <returns>Task that represents the asynchronous operation.</returns> private async Task CopyOrBuildReplicaAsync(IOperationStream copyStream) { var operation = await copyStream.GetOperationAsync(CancellationToken.None).ConfigureAwait(false); if (operation == null) { return; } Utility.Assert(operation.Data.Count == 1, "operation.Data.Count == 1"); CopyHeader copyHeader = CopyHeader.ReadFromOperationData(operation.Data); operation.Acknowledge(); if (copyHeader.Stage == CopyStage.CopyNone) { // GopalK: The order of the following statements is significant Utility.Assert( this.roleContextDrainState.DrainingStream == DrainingStream.Invalid, "this.recordsProcessor.DrainingStream == DrainingStream.Invalid"); // Since there is no false progress stage, dispose the recovery stream this.recoveryManager.DisposeRecoveryReadStreamIfNeeded(); operation = await copyStream.GetOperationAsync(CancellationToken.None).ConfigureAwait(false); Utility.Assert(operation == null, "operation == null"); var trace = string.Format( CultureInfo.InvariantCulture, "Idle replica is already current with primary replica: {0}", copyHeader.PrimaryReplicaId); FabricEvents.Events.CopyOrBuildReplica(this.tracer.Type, trace); return; } BeginCheckpointLogRecord copiedCheckpointRecord; bool renamedCopyLogSuccessfully = false; if (copyHeader.Stage == CopyStage.CopyState) { var trace = string.Format(CultureInfo.InvariantCulture, "Idle replica is copying from primary replica: {0}", copyHeader.PrimaryReplicaId); // Since there is no false progress stage, dispose the recovery stream this.recoveryManager.DisposeRecoveryReadStreamIfNeeded(); FabricEvents.Events.CopyOrBuildReplica(this.tracer.Type, trace); operation = await this.DrainStateStreamAsync(copyStream).ConfigureAwait(false); if (operation == null) { FabricEvents.Events.CopyOrBuildReplica( this.tracer.Type, "Returning null as copy pump has been aborted"); return; } CopyMetadata copyMetadata = CopyMetadata.ReadFromOperationData(operation.Data); this.ReadConsistentAfterLsn = copyMetadata.HighestStateProviderCopiedLsn; trace = string.Format( CultureInfo.InvariantCulture, "Copy started. StartingLSN: {0} StartingEpoch: {1},{2} CheckpointLSN: {3} UptoLSN: {4} Highest Checkpointed Lsn {5}" + Environment.NewLine + "Copied ProgressVector: {6}" + Environment.NewLine, copyMetadata.StartingLogicalSequenceNumber, copyMetadata.StartingEpoch.DataLossNumber, copyMetadata.StartingEpoch.ConfigurationNumber, copyMetadata.CheckpointLsn, copyMetadata.UptoLsn, copyMetadata.HighestStateProviderCopiedLsn, copyMetadata.ProgressVector.ToString(Constants.ProgressVectorMaxStringSizeInKb)); FabricEvents.Events.CopyOrBuildReplica(this.tracer.Type, trace); this.transactionManager.TransactionsMap.Reuse(); this.checkpointManager.ResetStableLsn(copyMetadata.CheckpointLsn); this.copiedUptoLsn = copyMetadata.UptoLsn; var newLogHead = await this.replicatedLogManager.LogManager.CreateCopyLogAsync(copyMetadata.StartingEpoch, copyMetadata.StartingLogicalSequenceNumber).ConfigureAwait(false); this.recoveredOrCopiedCheckpointLsn.Update(copyMetadata.CheckpointLsn); this.replicatedLogManager.Reuse( copyMetadata.ProgressVector, null, null, null, InformationLogRecord.InvalidInformationLogRecord, newLogHead, copyMetadata.StartingEpoch, copyMetadata.StartingLogicalSequenceNumber); // RD: RDBug 7475439: Utility.Assert(sourceEntry == targetEntry, "currentSourceVector == currentTargetVector"); // UpdateEpoch lsn is same as starting lsn, so insert UE log record if (copyMetadata.StartingLogicalSequenceNumber == copyMetadata.ProgressVector.LastProgressVectorEntry.Lsn) { var record = new UpdateEpochLogRecord( copyMetadata.ProgressVector.LastProgressVectorEntry.Epoch, copyMetadata.ProgressVector.LastProgressVectorEntry.PrimaryReplicaId) { Lsn = copyMetadata.StartingLogicalSequenceNumber }; FabricEvents.Events.UpdateEpoch( this.tracer.Type, "UpdateEpochRecordDueToFullCopy", record.Epoch.DataLossNumber, record.Epoch.ConfigurationNumber, record.Lsn.LSN, this.roleContextDrainState.ReplicaRole.ToString()); // NOTE: Do not use the UpdateEpoch method on logmanager as it adds the entry to the list of progress vectors. // We do not want to do that here as the entry already exists this.replicatedLogManager.AppendWithoutReplication(record, null); } copiedCheckpointRecord = null; if (this.recoveredOrCopiedCheckpointLsn.Value == copyMetadata.StartingLogicalSequenceNumber) { this.checkpointManager.FirstBeginCheckpointOnIdleSecondary(); copiedCheckpointRecord = this.replicatedLogManager.LastInProgressCheckpointRecord; Utility.Assert( copiedCheckpointRecord.Lsn == this.recoveredOrCopiedCheckpointLsn.Value, "copiedCheckpointRecordLsn {0} != recoveredOrCopiedCheckpointLsn {1}", copiedCheckpointRecord.Lsn, this.recoveredOrCopiedCheckpointLsn.Value); // If this is the UptoLsn, ensure rename is done before continuing if (recoveredOrCopiedCheckpointLsn.Value == this.copiedUptoLsn) { // This ensures we dont get stuck waiting for stable LSN Utility.Assert( checkpointManager.LastStableLsn == this.recoveredOrCopiedCheckpointLsn.Value, "checkpointManager.LastStableLsn {0} == this.recoveredOrCopiedCheckpointLsn.Value {1}", checkpointManager.LastStableLsn, this.recoveredOrCopiedCheckpointLsn.Value); await this.checkpointManager.CompleteFirstCheckpointOnIdleAndRenameLog(copiedCheckpointRecord, this.copiedUptoLsn.LSN).ConfigureAwait(false); renamedCopyLogSuccessfully = true; } } operation.Acknowledge(); FabricEvents.Events.CopyOrBuildReplica(this.tracer.Type, "Acked progress ProgressVectorEntry operation"); operation = await copyStream.GetOperationAsync(CancellationToken.None).ConfigureAwait(false); } else { FabricEvents.Events.CopyOrBuildReplica( this.tracer.Type, "Idle replica is building from primary replica: " + copyHeader.PrimaryReplicaId); operation = await this.TruncateTailIfNecessary(copyStream).ConfigureAwait(false); copiedCheckpointRecord = BeginCheckpointLogRecord.InvalidBeginCheckpointLogRecord; // Since the false progress stage is complete, dispose the recovery stream this.recoveryManager.DisposeRecoveryReadStreamIfNeeded(); } await this.DrainCopyStreamAsync(copyStream, operation, copiedCheckpointRecord, renamedCopyLogSuccessfully).ConfigureAwait(false); }