private EpochRecord WriteEpochRecordWithRetry(int epochNumber, Guid epochId, long lastEpochPosition, Guid instanceId) { long pos = _writer.Checkpoint.ReadNonFlushed(); var epoch = new EpochRecord(pos, epochNumber, epochId, lastEpochPosition, DateTime.UtcNow, instanceId); var rec = new SystemLogRecord(epoch.EpochPosition, epoch.TimeStamp, SystemRecordType.Epoch, SystemRecordSerialization.Json, epoch.AsSerialized()); Log.Debug( "=== Writing E{epochNumber}@{epochPosition}:{epochId:B} (previous epoch at {lastEpochPosition}). L={leaderId:B}.", epochNumber, epoch.EpochPosition, epochId, lastEpochPosition, epoch.LeaderInstanceId); if (!_writer.Write(rec, out pos)) { epoch = new EpochRecord(pos, epochNumber, epochId, lastEpochPosition, DateTime.UtcNow, instanceId); rec = new SystemLogRecord(epoch.EpochPosition, epoch.TimeStamp, SystemRecordType.Epoch, SystemRecordSerialization.Json, epoch.AsSerialized()); if (!_writer.Write(rec, out pos)) { throw new Exception($"Second write try failed at {epoch.EpochPosition}."); } } _writer.Flush(); _bus.Publish(new SystemMessage.EpochWritten(epoch)); return(epoch); }
private EpochRecord WriteEpochRecordWithRetry(int epochNumber, Guid epochId, long lastEpochPosition, Guid instanceId) { long pos = _writer.Checkpoint.ReadNonFlushed(); var epoch = new EpochRecord(pos, epochNumber, epochId, lastEpochPosition, DateTime.UtcNow, instanceId); var rec = _recordFactory.CreateEpoch(epoch); Log.Debug( "=== Writing E{epochNumber}@{epochPosition}:{epochId:B} (previous epoch at {lastEpochPosition}). L={leaderId:B}.", epochNumber, epoch.EpochPosition, epochId, lastEpochPosition, epoch.LeaderInstanceId); if (!_writer.Write(rec, out pos)) { epoch = new EpochRecord(pos, epochNumber, epochId, lastEpochPosition, DateTime.UtcNow, instanceId); rec = _recordFactory.CreateEpoch(epoch); if (!_writer.Write(rec, out pos)) { throw new Exception($"Second write try failed at {epoch.EpochPosition}."); } } _partitionManager.Initialize(); WriteEpochInformationWithRetry(epoch); _writer.Flush(); _bus.Publish(new ReplicationTrackingMessage.WriterCheckpointFlushed()); _bus.Publish(new SystemMessage.EpochWritten(epoch)); return(epoch); }
public MemberInfo Updated(VNodeState?state = null, bool?isAlive = null, long?lastCommitPosition = null, long?writerCheckpoint = null, long?chaserCheckpoint = null, EpochRecord epoch = null) { return(new MemberInfo(InstanceId, DateTime.UtcNow, state ?? State, isAlive ?? IsAlive, InternalTcpEndPoint, InternalSecureTcpEndPoint, ExternalTcpEndPoint, ExternalSecureTcpEndPoint, InternalHttpEndPoint, ExternalHttpEndPoint, lastCommitPosition ?? LastCommitPosition, writerCheckpoint ?? WriterCheckpoint, chaserCheckpoint ?? ChaserCheckpoint, epoch != null ? epoch.EpochPosition : EpochPosition, epoch != null ? epoch.EpochNumber : EpochNumber, epoch != null ? epoch.EpochId : EpochId, NodePriority)); }
public MemberInfo Updated(DateTime utcNow, VNodeState?state = null, bool?isAlive = null, long?lastCommitPosition = null, long?writerCheckpoint = null, long?chaserCheckpoint = null, EpochRecord epoch = null, int?nodePriority = null) { return(new MemberInfo(InstanceId, utcNow, state ?? State, isAlive ?? IsAlive, InternalTcpEndPoint, InternalSecureTcpEndPoint, ExternalTcpEndPoint, ExternalSecureTcpEndPoint, HttpEndPoint, AdvertiseHostToClientAs, AdvertiseHttpPortToClientAs, AdvertiseTcpPortToClientAs, lastCommitPosition ?? LastCommitPosition, writerCheckpoint ?? WriterCheckpoint, chaserCheckpoint ?? ChaserCheckpoint, epoch != null ? epoch.EpochPosition : EpochPosition, epoch != null ? epoch.EpochNumber : EpochNumber, epoch != null ? epoch.EpochId : EpochId, nodePriority ?? NodePriority, IsReadOnlyReplica)); }
public void Handle(ReplicationMessage.ReplicaSubscribed message) { if (_activeChunk != null) { _activeChunk.MarkForDeletion(); _activeChunk = null; } _framer.Reset(); _subscriptionId = message.SubscriptionId; _ackedSubscriptionPos = _subscriptionPos = message.SubscriptionPosition; Log.Info("=== SUBSCRIBED to [{masterEndPoint},{masterId:B}] at {subscriptionPosition} (0x{subscriptionPosition:X}). SubscriptionId: {subscriptionId:B}.", message.MasterEndPoint, message.MasterId, message.SubscriptionPosition, message.SubscriptionPosition, message.SubscriptionId); var writerCheck = Db.Config.WriterCheckpoint.ReadNonFlushed(); if (message.SubscriptionPosition > writerCheck) { ReplicationFail( "Master [{0},{1:B}] subscribed us at {2} (0x{3:X}), which is greater than our writer checkpoint {4} (0x{5:X}). REPLICATION BUG.", "Master [{masterEndpoint},{masterId:B}] subscribed us at {subscriptionPosition} (0x{subscriptionPosition:X}), which is greater than our writer checkpoint {writerCheckpoint} (0x{writerCheckpoint:X}). REPLICATION BUG.", message.MasterEndPoint, message.MasterId, message.SubscriptionPosition, message.SubscriptionPosition, writerCheck, writerCheck); } if (message.SubscriptionPosition < writerCheck) { Log.Info("Master [{masterEndPoint},{masterId:B}] subscribed us at {subscriptionPosition} (0x{subscriptionPosition:X}), which is less than our writer checkpoint {writerCheckpoint} (0x{writerCheckpoint:X}). TRUNCATION IS NEEDED.", message.MasterEndPoint, message.MasterId, message.SubscriptionPosition, message.SubscriptionPosition, writerCheck, writerCheck); var lastCommitPosition = _getLastCommitPosition(); if (message.SubscriptionPosition > lastCommitPosition) { Log.Info("ONLINE TRUNCATION IS NEEDED. NOT IMPLEMENTED. OFFLINE TRUNCATION WILL BE PERFORMED. SHUTTING DOWN NODE."); } else { Log.Info("OFFLINE TRUNCATION IS NEEDED (SubscribedAt {subscriptionPosition} (0x{subscriptionPosition:X}) <= LastCommitPosition {lastCommitPosition} (0x{lastCommitPosition:X})). SHUTTING DOWN NODE.", message.SubscriptionPosition, message.SubscriptionPosition, lastCommitPosition, lastCommitPosition); } EpochRecord lastEpoch = EpochManager.GetLastEpoch(); if (AreAnyCommittedRecordsTruncatedWithLastEpoch(message.SubscriptionPosition, lastEpoch, lastCommitPosition)) { Log.Error("Master [{masterEndPoint},{masterId:B}] subscribed us at {subscriptionPosition} (0x{subscriptionPosition:X}), which is less than our last epoch and LastCommitPosition {lastCommitPosition} (0x{lastCommitPosition:X}) >= lastEpoch.EpochPosition {lastEpochPosition} (0x{lastEpochPosition:X}). That might be bad, especially if the LastCommitPosition is way beyond EpochPosition.", message.MasterEndPoint, message.MasterId, message.SubscriptionPosition, message.SubscriptionPosition, lastCommitPosition, lastCommitPosition, lastEpoch.EpochPosition, lastEpoch.EpochPosition); Log.Error("ATTEMPT TO TRUNCATE EPOCH WITH COMMITTED RECORDS. THIS MAY BE BAD, BUT IT IS OK IF JUST-ELECTED MASTER FAILS IMMEDIATELY AFTER ITS ELECTION."); } Db.Config.TruncateCheckpoint.Write(message.SubscriptionPosition); Db.Config.TruncateCheckpoint.Flush(); BlockWriter = true; Bus.Publish(new ClientMessage.RequestShutdown(exitProcess: true, shutdownHttp: true)); return; } // subscription position == writer checkpoint // everything is ok }
public override string ToString() { var sb = new StringBuilder(); sb.AppendLine(EpochRecord.ToString()); sb.AppendLine(Record.ToString()); return(sb.ToString()); }
public void EpochRecordNoChange() { var ep = new EpochRecord(); var ep2 = new EpochRecord(); ep2.EpochCopy(ref ep); ep2.HasUpdateData().Should().BeFalse("Epoc record should not have data"); }
// we have just written epoch. about to write the $epoch-information for it. // this decides what expected version the $epoch-information should have. // it looks up the previous epoch, finds that epoch's previous information // (which immediately follows it) and gets its event number. // except the first epoch in logv3, which is followed by the root partition // initialization before the epochinfo. bool TryGetExpectedVersionForEpochInformation(EpochRecord epoch, out long expectedVersion) { expectedVersion = default; if (epoch.PrevEpochPosition < 0) { return(false); } var reader = _readers.Get(); try { reader.Reposition(epoch.PrevEpochPosition); // read the epoch var result = reader.TryReadNext(); if (!result.Success) { return(false); } // read the epoch-information (if there is one) while (true) { result = reader.TryReadNext(); if (!result.Success) { return(false); } if (result.LogRecord is IPrepareLogRecord <TStreamId> prepare && EqualityComparer <TStreamId> .Default.Equals(prepare.EventStreamId, GetEpochInformationStream())) { // found the epoch information expectedVersion = prepare.ExpectedVersion + 1; return(true); } if (result.LogRecord.RecordType == LogRecordType.Prepare || result.LogRecord.RecordType == LogRecordType.Commit || result.LogRecord.RecordType == LogRecordType.System || result.LogRecord.RecordType == LogRecordType.StreamWrite) { // definitely not reading the root partition initialization; // there is no epochinfo for this epoch (probably the epoch is older // than the epochinfo mechanism. return(false); } // could be reading the root partition initialization; skip over it. } } catch (Exception) { return(false); } finally { _readers.Return(reader); } }
private EpochRecord WriteEpoch(int epochNumber, long lastPos, Guid instanceId) { long pos = _writer.Checkpoint.ReadNonFlushed(); var epoch = new EpochRecord(pos, epochNumber, Guid.NewGuid(), lastPos, DateTime.UtcNow, instanceId); var rec = _logFormat.RecordFactory.CreateEpoch(epoch); _writer.Write(rec, out _); _writer.Flush(); return(epoch); }
private EpochRecord WriteEpoch(int epochNumber, long lastPos, Guid instanceId) { long pos = _writer.Checkpoint.ReadNonFlushed(); var epoch = new EpochRecord(pos, epochNumber, Guid.NewGuid(), lastPos, DateTime.UtcNow, instanceId); var rec = new SystemLogRecord(epoch.EpochPosition, epoch.TimeStamp, SystemRecordType.Epoch, SystemRecordSerialization.Json, epoch.AsSerialized()); _writer.Write(rec, out _); _writer.Flush(); return(epoch); }
public ISystemLogRecord CreateEpoch(EpochRecord epoch) { var result = new SystemLogRecord( logPosition: epoch.EpochPosition, timeStamp: epoch.TimeStamp, systemRecordType: SystemRecordType.Epoch, systemRecordSerialization: SystemRecordSerialization.Json, data: epoch.AsSerialized()); return(result); }
protected override IEnumerable <WhenStep> When() { yield return(new SystemMessage.BecomeMaster(Guid.NewGuid())); _uniqueId = Guid.NewGuid(); EpochRecord epochRecord = new EpochRecord(0L, 0, _uniqueId, 0L, DateTime.Now); yield return(new SystemMessage.EpochWritten(epochRecord)); yield return(new SystemMessage.SystemCoreReady()); }
public override void When() { _epochId = Guid.NewGuid(); _epochNumber = 7; var epoch = new EpochRecord(0, _epochNumber, _epochId, -1, DateTime.UtcNow, Guid.Empty); var rec = new SystemLogRecord(epoch.EpochPosition, epoch.TimeStamp, SystemRecordType.Epoch, SystemRecordSerialization.Json, epoch.AsSerialized()); Assert.True(Writer.Write(rec, out _)); Writer.Flush(); }
public ISystemLogRecord CreateEpoch(EpochRecord epoch) { var result = new LogV3EpochLogRecord( logPosition: epoch.EpochPosition, timeStamp: epoch.TimeStamp, epochNumber: epoch.EpochNumber, epochId: epoch.EpochId, prevEpochPosition: epoch.PrevEpochPosition, leaderInstanceId: epoch.LeaderInstanceId); return(result); }
public void CacheEpoch(EpochRecord epoch) { var added = AddEpochToCache(epoch); // Check each epoch as it is added to the cache for the first time from the chaser. // n.b.: added will be false for idempotent CacheRequests // If this check fails, then there is something very wrong with epochs, data corruption is possible. if (added && !IsCorrectEpochAt(epoch.EpochPosition, epoch.EpochNumber, epoch.EpochId)) { throw new Exception( $"Not found epoch at {epoch.EpochPosition} with epoch number: {epoch.EpochNumber} and epoch ID: {epoch.EpochId}. " + "SetLastEpoch FAILED! Data corruption risk!"); } }
public override void When() { EpochManager.WriteNewEpoch(0); Writer.Write(CreateLogRecord(0), out _); Writer.Write(CreateLogRecord(1), out _); Writer.Write(CreateLogRecord(2), out _); Writer.Write(CreateLogRecord(3), out _); Writer.Write(CreateLogRecord(4), out _subscribedPosition); EpochManager.WriteNewEpoch(1); _lastEpoch = EpochManager.GetLastEpoch(); var epochs = EpochManager.GetLastEpochs(10) .Select(e => new Epoch(e.EpochPosition, e.EpochNumber, e.EpochId)).ToArray(); AddSubscription(_replicaId, true, epochs, _subscribedPosition, out _replicaManager); }
private EpochRecord WriteEpochRecordWithRetry(int epochNumber, Guid epochId, long lastEpochPosition) { long pos = _writer.Checkpoint.ReadNonFlushed(); var epoch = new EpochRecord(pos, epochNumber, epochId, lastEpochPosition, DateTime.UtcNow); var rec = new SystemLogRecord(epoch.EpochPosition, epoch.TimeStamp, SystemRecordType.Epoch, SystemRecordSerialization.Json, epoch.AsSerialized()); if (!_writer.Write(rec, out pos)) { epoch = new EpochRecord(pos, epochNumber, epochId, lastEpochPosition, DateTime.UtcNow); rec = new SystemLogRecord(epoch.EpochPosition, epoch.TimeStamp, SystemRecordType.Epoch, SystemRecordSerialization.Json, epoch.AsSerialized()); if (!_writer.Write(rec, out pos)) { throw new Exception(string.Format("Second write try failed at {0}.", epoch.EpochPosition)); } } Log.Debug("=== Writing E{0}@{1}:{2:B} (previous epoch at {3}).", epochNumber, epoch.EpochPosition, epochId, lastEpochPosition); return(epoch); }
void WriteEpochInformationWithRetry(EpochRecord epoch) { if (!TryGetExpectedVersionForEpochInformation(epoch, out var expectedVersion)) { expectedVersion = ExpectedVersion.NoStream; } var originalLogPosition = _writer.Checkpoint.ReadNonFlushed(); var epochInformation = LogRecord.Prepare( factory: _recordFactory, logPosition: originalLogPosition, correlationId: Guid.NewGuid(), eventId: Guid.NewGuid(), transactionPos: originalLogPosition, transactionOffset: 0, eventStreamId: GetEpochInformationStream(), expectedVersion: expectedVersion, flags: PrepareFlags.SingleWrite | PrepareFlags.IsCommitted | PrepareFlags.IsJson, eventType: GetEpochInformationEventType(), data: epoch.AsSerialized(), metadata: Empty.ByteArray); if (_writer.Write(epochInformation, out var retryLogPosition)) { return; } epochInformation = epochInformation.CopyForRetry(retryLogPosition, retryLogPosition); if (_writer.Write(epochInformation, out _)) { return; } throw new Exception( string.Format("Second write try failed when first writing $epoch-information at {0}, then at {1}.", originalLogPosition, retryLogPosition)); }
private void UpdateLastEpoch(EpochRecord epoch, bool flushWriter) { lock (_locker) { _epochs[epoch.EpochNumber] = epoch; _lastEpochNumber = epoch.EpochNumber; _lastEpochPosition = epoch.EpochPosition; _minCachedEpochNumber = Math.Max(_minCachedEpochNumber, epoch.EpochNumber - CachedEpochCount + 1); _epochs.Remove(_minCachedEpochNumber - 1); if (flushWriter) { _writer.Flush(); } // Now update epoch checkpoint, so on restart we don't scan sequentially TF. _checkpoint.Write(epoch.EpochPosition); _checkpoint.Flush(); Log.Debug("=== Update Last Epoch E{epochNumber}@{epochPosition}:{epochId:B} (previous epoch at {lastEpochPosition}).", epoch.EpochNumber, epoch.EpochPosition, epoch.EpochId, epoch.PrevEpochPosition); } }
public void SetLastEpoch(EpochRecord epoch) { Ensure.NotNull(epoch, "epoch"); lock (_locker) { if (epoch.EpochPosition > _lastEpochPosition) { UpdateLastEpoch(epoch, flushWriter: false); return; } } // Epoch record must have been already written, so we need to make sure it is where we expect it to be. // If this check fails, then there is something very wrong with epochs, data corruption is possible. if (!IsCorrectEpochAt(epoch.EpochPosition, epoch.EpochNumber, epoch.EpochId)) { throw new Exception(string.Format("Not found epoch at {0} with epoch number: {1} and epoch ID: {2}. " + "SetLastEpoch FAILED! Data corruption risk!", epoch.EpochPosition, epoch.EpochNumber, epoch.EpochId)); } }
public void SetLastEpoch(EpochRecord epoch) { throw new NotImplementedException(); }
private bool AreAnyCommittedRecordsTruncatedWithLastEpoch(long subscriptionPosition, EpochRecord lastEpoch, long lastCommitPosition) { return(lastEpoch != null && subscriptionPosition <= lastEpoch.EpochPosition && lastCommitPosition >= lastEpoch.EpochPosition); }
public void SetLastEpoch(EpochRecord epoch) { lock (_epochs) { _epochs.Add(epoch); } }
public void CacheEpoch(EpochRecord epoch) { lock (_epochs) { _epochs.Add(epoch); } }
/// <summary> /// Idempotently adds epochs to the cache /// </summary> /// <param name="epoch">the epoch to add</param> /// <returns>if the submitted epoch was added to the cache, false if already present</returns> public bool AddEpochToCache(EpochRecord epoch) { Ensure.NotNull(epoch, "epoch"); lock (_locker) { // if it's already cached, just return false to indicate idempotent add if (_epochs.Contains(ep => ep.EpochNumber == epoch.EpochNumber)) { return(false); } //new last epoch written or received, this is the normal case //if the list is empty Last will be null if (_epochs.Last == null || _epochs.Last.Value.EpochNumber < epoch.EpochNumber) { _epochs.AddLast(epoch); _lastCachedEpoch = _epochs.Last; // in some race conditions we might have a gap in the epoch list //read the epochs from the TFLog to fill in the gaps if (epoch.EpochPosition > 0 && epoch.PrevEpochPosition >= 0 && epoch.PrevEpochPosition > (_epochs.Last?.Previous?.Value?.EpochPosition ?? -1)) { var reader = _readers.Get(); var previous = _epochs.Last; var count = 1; //include last try { do { epoch = ReadEpochAt(reader, epoch.PrevEpochPosition); previous = _epochs.AddBefore(previous, epoch); count++; } while ( epoch.EpochPosition > 0 && epoch.PrevEpochPosition >= 0 && count <= _cacheSize && epoch.PrevEpochPosition > (previous?.Previous?.Value?.EpochPosition ?? -1)); } finally { _readers.Return(reader); } } while (_epochs.Count > _cacheSize) { _epochs.RemoveFirst(); } _firstCachedEpoch = _epochs.First; // Now update epoch checkpoint, so on restart we don't scan sequentially TF. _checkpoint.Write(_epochs.Last.Value.EpochPosition); _checkpoint.Flush(); Log.Debug( "=== Cached new Last Epoch E{epochNumber}@{epochPosition}:{epochId:B} (previous epoch at {lastEpochPosition}) L={leaderId:B}.", epoch.EpochNumber, epoch.EpochPosition, epoch.EpochId, epoch.PrevEpochPosition, epoch.LeaderInstanceId); return(true); } if (epoch.EpochNumber < _epochs.First.Value.EpochNumber) { return(false); } //this should never happen Log.Error("=== Unable to cache Epoch E{epochNumber}@{epochPosition}:{epochId:B} (previous epoch at {lastEpochPosition}) L={leaderId:B}.", epoch.EpochNumber, epoch.EpochPosition, epoch.EpochId, epoch.PrevEpochPosition, epoch.LeaderInstanceId); foreach (var epochRecord in _epochs) { Log.Error( "====== Epoch E{epochNumber}@{epochPosition}:{epochId:B} (previous epoch at {lastEpochPosition}) L={leaderId:B}.", epochRecord.EpochNumber, epochRecord.EpochPosition, epochRecord.EpochId, epochRecord.PrevEpochPosition, epochRecord.LeaderInstanceId); } Log.Error( "====== Last Epoch E{epochNumber}@{epochPosition}:{epochId:B} (previous epoch at {lastEpochPosition}) L={leaderId:B}.", _lastCachedEpoch.Value.EpochNumber, _lastCachedEpoch.Value.EpochPosition, _lastCachedEpoch.Value.EpochId, _lastCachedEpoch.Value.PrevEpochPosition, _lastCachedEpoch.Value.LeaderInstanceId); Log.Error( "====== First Epoch E{epochNumber}@{epochPosition}:{epochId:B} (previous epoch at {lastEpochPosition}) L={leaderId:B}.", _firstCachedEpoch.Value.EpochNumber, _firstCachedEpoch.Value.EpochPosition, _firstCachedEpoch.Value.EpochId, _firstCachedEpoch.Value.PrevEpochPosition, _firstCachedEpoch.Value.LeaderInstanceId); throw new Exception($"This should never happen: Unable to find correct position to cache Epoch E{epoch.EpochNumber}@{epoch.EpochPosition}:{epoch.EpochId:B} (previous epoch at {epoch.PrevEpochPosition}) L={epoch.LeaderInstanceId:B}"); } }
public EpochWritten(EpochRecord epoch) { Ensure.NotNull(epoch, "epoch"); Epoch = epoch; }