public void Apply(TransactionOperationContext context, long uptoInclusive, Leader leader, ServerStore serverStore) { Debug.Assert(context.Transaction != null); var lastAppliedIndex = _parent.GetLastCommitIndex(context); for (var index = lastAppliedIndex + 1; index <= uptoInclusive; index++) { var cmd = _parent.GetEntry(context, index, out RachisEntryFlags flags); if (cmd == null || flags == RachisEntryFlags.Invalid) { throw new InvalidOperationException("Expected to apply entry " + index + " but it isn't stored"); } if (flags != RachisEntryFlags.StateMachineCommand) { continue; } Apply(context, cmd, index, leader, serverStore); } var term = _parent.GetTermForKnownExisting(context, uptoInclusive); _parent.SetLastCommitIndex(context, uptoInclusive, term); }
public long Apply(ClusterOperationContext context, long uptoInclusive, Leader leader, ServerStore serverStore, Stopwatch duration) { Debug.Assert(context.Transaction != null); var lastAppliedIndex = _parent.GetLastCommitIndex(context); var maxTimeAllowedToWaitForApply = _parent.Timeout.TimeoutPeriod / 4; for (var index = lastAppliedIndex + 1; index <= uptoInclusive; index++) { var cmd = _parent.GetEntry(context, index, out RachisEntryFlags flags); if (cmd == null || flags == RachisEntryFlags.Invalid) { throw new InvalidOperationException("Expected to apply entry " + index + " but it isn't stored"); } lastAppliedIndex = index; if (flags != RachisEntryFlags.StateMachineCommand) { _parent.LogHistory.UpdateHistoryLog(context, index, _parent.CurrentTerm, cmd, null, null); var currentIndex = index; context.Transaction.InnerTransaction.LowLevelTransaction.OnDispose += t => { if (t is LowLevelTransaction llt && llt.Committed) { serverStore.Cluster.NotifyAndSetCompleted(currentIndex); } }; continue; } Apply(context, cmd, index, leader, serverStore); if (duration.ElapsedMilliseconds >= maxTimeAllowedToWaitForApply) { // we don't want to spend so much time applying commands that we will time out the leader // so we time this from the follower perspective and abort after applying a single command // or 25% of the time has already passed break; } } var term = _parent.GetTermForKnownExisting(context, lastAppliedIndex); _parent.SetLastCommitIndex(context, lastAppliedIndex, term); return(lastAppliedIndex); }
private void ReadAndCommitSnapshot(ClusterOperationContext context, InstallSnapshot snapshot, CancellationToken token) { using (context.OpenWriteTransaction()) { var lastTerm = _engine.GetTermFor(context, snapshot.LastIncludedIndex); var lastCommitIndex = _engine.GetLastEntryIndex(context); if (_engine.GetSnapshotRequest(context) == false && snapshot.LastIncludedTerm == lastTerm && snapshot.LastIncludedIndex < lastCommitIndex) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info( $"{ToString()}: Got installed snapshot with last index={snapshot.LastIncludedIndex} while our lastCommitIndex={lastCommitIndex}, will just ignore it"); } //This is okay to ignore because we will just get the committed entries again and skip them ReadInstallSnapshotAndIgnoreContent(token); } else if (InstallSnapshot(context, token)) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info( $"{ToString()}: Installed snapshot with last index={snapshot.LastIncludedIndex} with LastIncludedTerm={snapshot.LastIncludedTerm} "); } _engine.SetLastCommitIndex(context, snapshot.LastIncludedIndex, snapshot.LastIncludedTerm); _engine.ClearLogEntriesAndSetLastTruncate(context, snapshot.LastIncludedIndex, snapshot.LastIncludedTerm); } else { var lastEntryIndex = _engine.GetLastEntryIndex(context); if (lastEntryIndex < snapshot.LastIncludedIndex) { var message = $"The snapshot installation had failed because the last included index {snapshot.LastIncludedIndex} in term {snapshot.LastIncludedTerm} doesn't match the last entry {lastEntryIndex}"; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: {message}"); } throw new InvalidOperationException(message); } } // snapshot always has the latest topology if (snapshot.Topology == null) { const string message = "Expected to get topology on snapshot"; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: {message}"); } throw new InvalidOperationException(message); } using (var topologyJson = context.ReadObject(snapshot.Topology, "topology")) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: topology on install snapshot: {topologyJson}"); } var topology = JsonDeserializationRachis <ClusterTopology> .Deserialize(topologyJson); RachisConsensus.SetTopology(_engine, context, topology); } _engine.SetSnapshotRequest(context, false); context.Transaction.InnerTransaction.LowLevelTransaction.OnDispose += t => { if (t is LowLevelTransaction llt && llt.Committed) { // we might have moved from passive node, so we need to start the timeout clock _engine.Timeout.Start(_engine.SwitchToCandidateStateOnTimeout); } }; context.Transaction.Commit(); } }
private void NegotiateWithLeader(TransactionOperationContext context, LogLengthNegotiation negotiation) { _debugRecorder.Start(); // only the leader can send append entries, so if we accepted it, it's the leader if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got a negotiation request for term {negotiation.Term} where our term is {_term}"); } if (negotiation.Term != _term) { // Our leader is no longer a valid one var msg = $"The term was changed after leader validation from {_term} to {negotiation.Term}, so you are no longer a valid leader"; _connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Rejected, Message = msg, CurrentTerm = _term, LastLogIndex = negotiation.PrevLogIndex }); throw new InvalidOperationException($"We close this follower because: {msg}"); } long prevTerm; using (context.OpenReadTransaction()) { prevTerm = _engine.GetTermFor(context, negotiation.PrevLogIndex) ?? 0; } if (prevTerm != negotiation.PrevLogTerm) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got a negotiation request with PrevLogTerm={negotiation.PrevLogTerm} while our PrevLogTerm={prevTerm}" + " will negotiate to find next matched index"); } // we now have a mismatch with the log position, and need to negotiate it with // the leader NegotiateMatchEntryWithLeaderAndApplyEntries(context, _connection, negotiation); } else { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got a negotiation request with identical PrevLogTerm will continue to steady state"); } // this (or the negotiation above) completes the negotiation process _connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Acceptable, Message = $"Found a log index / term match at {negotiation.PrevLogIndex} with term {prevTerm}", CurrentTerm = _term, LastLogIndex = negotiation.PrevLogIndex }); } _debugRecorder.Record("Matching Negotiation is over, waiting for snapshot"); _engine.Timeout.Defer(_connection.Source); // at this point, the leader will send us a snapshot message // in most cases, it is an empty snapshot, then start regular append entries // the reason we send this is to simplify the # of states in the protocol var snapshot = _connection.ReadInstallSnapshot(context); _debugRecorder.Record("Snapshot received"); using (context.OpenWriteTransaction()) { var lastTerm = _engine.GetTermFor(context, snapshot.LastIncludedIndex); var lastCommitIndex = _engine.GetLastEntryIndex(context); if (snapshot.LastIncludedTerm == lastTerm && snapshot.LastIncludedIndex < lastCommitIndex) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info( $"{ToString()}: Got installed snapshot with last index={snapshot.LastIncludedIndex} while our lastCommitIndex={lastCommitIndex}, will just ignore it"); } //This is okay to ignore because we will just get the committed entries again and skip them ReadInstallSnapshotAndIgnoreContent(context); } else if (InstallSnapshot(context)) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info( $"{ToString()}: Installed snapshot with last index={snapshot.LastIncludedIndex} with LastIncludedTerm={snapshot.LastIncludedTerm} "); } _engine.SetLastCommitIndex(context, snapshot.LastIncludedIndex, snapshot.LastIncludedTerm); _engine.ClearLogEntriesAndSetLastTruncate(context, snapshot.LastIncludedIndex, snapshot.LastIncludedTerm); } else { var lastEntryIndex = _engine.GetLastEntryIndex(context); if (lastEntryIndex < snapshot.LastIncludedIndex) { var message = $"The snapshot installation had failed because the last included index {snapshot.LastIncludedIndex} in term {snapshot.LastIncludedTerm} doesn't match the last entry {lastEntryIndex}"; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: {message}"); } throw new InvalidOperationException(message); } } // snapshot always has the latest topology if (snapshot.Topology == null) { const string message = "Expected to get topology on snapshot"; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: {message}"); } throw new InvalidOperationException(message); } using (var topologyJson = context.ReadObject(snapshot.Topology, "topology")) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: topology on install snapshot: {topologyJson}"); } var topology = JsonDeserializationRachis <ClusterTopology> .Deserialize(topologyJson); RachisConsensus.SetTopology(_engine, context, topology); } context.Transaction.Commit(); } _engine.Timeout.Defer(_connection.Source); _debugRecorder.Record("Invoking StateMachine.SnapshotInstalled"); // notify the state machine, we do this in an async manner, and start // the operator in a separate thread to avoid timeouts while this is // going on var task = Task.Run(() => _engine.SnapshotInstalledAsync(snapshot.LastIncludedIndex)); var sp = Stopwatch.StartNew(); var timeToWait = (int)(_engine.ElectionTimeout.TotalMilliseconds / 4); while (task.Wait(timeToWait) == false) { // this may take a while, so we let the other side know that // we are still processing, and we reset our own timer while // this is happening MaybeNotifyLeaderThatWeAreStillAlive(context, sp); } _debugRecorder.Record("Done with StateMachine.SnapshotInstalled"); // we might have moved from passive node, so we need to start the timeout clock _engine.Timeout.Start(_engine.SwitchToCandidateStateOnTimeout); _debugRecorder.Record("Snapshot installed"); //Here we send the LastIncludedIndex as our matched index even for the case where our lastCommitIndex is greater //So we could validate that the entries sent by the leader are indeed the same as the ones we have. _connection.Send(context, new InstallSnapshotResponse { Done = true, CurrentTerm = _term, LastLogIndex = snapshot.LastIncludedIndex }); _engine.Timeout.Defer(_connection.Source); }
private void NegotiateWithLeader(TransactionOperationContext context, LogLengthNegotiation negotiation) { // only the leader can send append entries, so if we accepted it, it's the leader if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Got a negotiation request for term {negotiation.Term} where our term is {_engine.CurrentTerm}"); } if (negotiation.Term > _engine.CurrentTerm) { _engine.FoundAboutHigherTerm(negotiation.Term); } long prevTerm; using (context.OpenReadTransaction()) { prevTerm = _engine.GetTermFor(context, negotiation.PrevLogIndex) ?? 0; } if (prevTerm != negotiation.PrevLogTerm) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Got a negotiation request with PrevLogTerm={negotiation.PrevLogTerm} while our PrevLogTerm={prevTerm}" + " will negotiate to find next matched index"); } // we now have a mismatch with the log position, and need to negotiate it with // the leader NegotiateMatchEntryWithLeaderAndApplyEntries(context, _connection, negotiation); } else { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Got a negotiation request with identical PrevLogTerm will continue to steady state"); } // this (or the negotiation above) completes the negotiation process _connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Acceptable, Message = $"Found a log index / term match at {negotiation.PrevLogIndex} with term {prevTerm}", CurrentTerm = _engine.CurrentTerm, LastLogIndex = negotiation.PrevLogIndex }); } _engine.Timeout.Defer(_connection.Source); // at this point, the leader will send us a snapshot message // in most cases, it is an empty snapshot, then start regular append entries // the reason we send this is to simplify the # of states in the protocol var snapshot = _connection.ReadInstallSnapshot(context); using (context.OpenWriteTransaction()) { var lastCommitIndex = _engine.GetLastCommitIndex(context); if (snapshot.LastIncludedIndex < lastCommitIndex) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info( $"Follower {_engine.Tag}: Got installed snapshot with last index={snapshot.LastIncludedIndex} while our lastCommitIndex={lastCommitIndex}, will just ignore it"); } //This is okay to ignore because we will just get the commited entries again and skip them ReadInstallSnapshotAndIgnoreContent(context); } else if (InstallSnapshot(context)) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info( $"Follower {_engine.Tag}: Installed snapshot with last index={snapshot.LastIncludedIndex} with LastIncludedTerm={snapshot.LastIncludedTerm} "); } _engine.SetLastCommitIndex(context, snapshot.LastIncludedIndex, snapshot.LastIncludedTerm); _engine.ClearLogEntriesAndSetLastTruncate(context, snapshot.LastIncludedIndex, snapshot.LastIncludedTerm); } else { var lastEntryIndex = _engine.GetLastEntryIndex(context); if (lastEntryIndex < snapshot.LastIncludedIndex) { var message = $"The snapshot installation had failed because the last included index {snapshot.LastIncludedIndex} in term {snapshot.LastIncludedTerm} doesn't match the last entry {lastEntryIndex}"; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: {message}"); } throw new InvalidOperationException(message); } } // snapshot always has the latest topology if (snapshot.Topology == null) { const string message = "Expected to get topology on snapshot"; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: {message}"); } throw new InvalidOperationException(message); } using (var topologyJson = context.ReadObject(snapshot.Topology, "topology")) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: topology on install snapshot: {topologyJson}"); } var topology = JsonDeserializationRachis <ClusterTopology> .Deserialize(topologyJson); RachisConsensus.SetTopology(_engine, context, topology); } context.Transaction.Commit(); } //Here we send the LastIncludedIndex as our matched index even for the case where our lastCommitIndex is greater //So we could validate that the entries sent by the leader are indeed the same as the ones we have. _connection.Send(context, new InstallSnapshotResponse { Done = true, CurrentTerm = _engine.CurrentTerm, LastLogIndex = snapshot.LastIncludedIndex }); _engine.Timeout.Defer(_connection.Source); // notify the state machine _engine.SnapshotInstalled(context, snapshot.LastIncludedIndex); _engine.Timeout.Defer(_connection.Source); }