public static bool CheckIfValidLeader(RachisConsensus engine, RemoteConnection connection, out LogLengthNegotiation negotiation) { negotiation = null; using (engine.ContextPool.AllocateOperationContext(out ClusterOperationContext context)) { var logLength = connection.Read <LogLengthNegotiation>(context); if (logLength.Term < engine.CurrentTerm) { var msg = $"The incoming term {logLength.Term} is smaller than current term {engine.CurrentTerm} and is therefor rejected (From thread: {logLength.SendingThread})"; if (engine.Log.IsInfoEnabled) { engine.Log.Info(msg); } connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Rejected, Message = msg, CurrentTerm = engine.CurrentTerm }); connection.Dispose(); return(false); } if (engine.Log.IsInfoEnabled) { engine.Log.Info($"The incoming term { logLength.Term} is from a valid leader (From thread: {logLength.SendingThread})"); } engine.FoundAboutHigherTerm(logLength.Term, "Setting the term of the new leader"); engine.Timeout.Defer(connection.Source); negotiation = logLength; } return(true); }
private void RequestAllEntries(ClusterOperationContext context, RemoteConnection connection, string message) { connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Acceptable, Message = message, CurrentTerm = _term, LastLogIndex = 0 }); }
private void SendElectionResult() { using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context)) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: Send election result message to {_tag} : {_candidate.ElectionResult}"); } Connection.Send(context, new RequestVote { Source = _engine.Tag, Term = _engine.CurrentTerm, ElectionResult = _candidate.ElectionResult }); } }
/// <summary> /// This method may run for a long while, as we are trying to get agreement /// from a majority of the cluster /// </summary> private void Run() { try { while (_candidate.Running && _disposed == false) { try { Stream stream; try { using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context)) { stream = _engine.ConnectToPeer(_url, _certificate, context).Result; } if (_candidate.Running == false) { break; } } catch (Exception e) { Status = AmbassadorStatus.FailedToConnect; StatusMessage = $"Failed to connect with {_tag}.{Environment.NewLine} " + e.Message; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: Failed to connect to remote peer: " + _url, e); } // wait a bit _candidate.WaitForChangeInState(); continue; // we'll retry connecting } Status = AmbassadorStatus.Connected; StatusMessage = $"Connected to {_tag}"; Connection = new RemoteConnection(_tag, _engine.Tag, stream); using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context)) { ClusterTopology topology; long lastLogIndex; long lastLogTerm; using (context.OpenReadTransaction()) { topology = _engine.GetTopology(context); lastLogIndex = _engine.GetLastEntryIndex(context); lastLogTerm = _engine.GetTermForKnownExisting(context, lastLogIndex); } Debug.Assert(topology.TopologyId != null); Connection.Send(context, new RachisHello { TopologyId = topology.TopologyId, DebugSourceIdentifier = _engine.Tag, DebugDestinationIdentifier = _tag, InitialMessageType = InitialMessageType.RequestVote }); while (_candidate.Running) { RequestVoteResponse rvr; var currentElectionTerm = _candidate.ElectionTerm; var engineCurrentTerm = _engine.CurrentTerm; if (_candidate.IsForcedElection == false || _candidate.RunRealElectionAtTerm != currentElectionTerm) { Connection.Send(context, new RequestVote { Source = _engine.Tag, Term = currentElectionTerm, IsForcedElection = false, IsTrialElection = true, LastLogIndex = lastLogIndex, LastLogTerm = lastLogTerm }); rvr = Connection.Read <RequestVoteResponse>(context); if (rvr.Term > currentElectionTerm) { var message = $"Candidate ambassador {_engine.Tag}: found election term {rvr.Term} that is higher than ours {currentElectionTerm}"; // we need to abort the current elections _engine.SetNewState(RachisState.Follower, null, engineCurrentTerm, message); if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: {message}"); } _engine.FoundAboutHigherTerm(rvr.Term); throw new InvalidOperationException(message); } NotInTopology = rvr.NotInTopology; if (rvr.VoteGranted == false) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: Got a negative response from {_tag} reason: {rvr.Message}"); } // we go a negative response here, so we can't proceed // we'll need to wait until the candidate has done something, like // change term or given up _candidate.WaitForChangeInState(); continue; } TrialElectionWonAtTerm = rvr.Term; _candidate.WaitForChangeInState(); } Connection.Send(context, new RequestVote { Source = _engine.Tag, Term = currentElectionTerm, IsForcedElection = _candidate.IsForcedElection, IsTrialElection = false, LastLogIndex = lastLogIndex, LastLogTerm = lastLogTerm }); rvr = Connection.Read <RequestVoteResponse>(context); if (rvr.Term > currentElectionTerm) { var message = $"Candidate ambassador {_engine.Tag}: found election term {rvr.Term} that is higher than ours {currentElectionTerm}"; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: {message}"); } // we need to abort the current elections _engine.SetNewState(RachisState.Follower, null, engineCurrentTerm, message); _engine.FoundAboutHigherTerm(rvr.Term); throw new InvalidOperationException(message); } NotInTopology = rvr.NotInTopology; if (rvr.VoteGranted == false) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: Got a negative response from {_tag} reason: {rvr.Message}"); } // we go a negative response here, so we can't proceed // we'll need to wait until the candidate has done something, like // change term or given up _candidate.WaitForChangeInState(); continue; } RealElectionWonAtTerm = rvr.Term; _candidate.WaitForChangeInState(); } SendElectionResult(); } } catch (OperationCanceledException) { Status = AmbassadorStatus.Closed; StatusMessage = "Closed"; SendElectionResult(); break; } catch (ObjectDisposedException) { Status = AmbassadorStatus.Closed; StatusMessage = "Closed"; SendElectionResult(); break; } catch (AggregateException ae) when(ae.InnerException is OperationCanceledException || ae.InnerException is ObjectDisposedException) { Status = AmbassadorStatus.Closed; StatusMessage = "Closed"; SendElectionResult(); break; } catch (Exception e) { Status = AmbassadorStatus.FailedToConnect; StatusMessage = $"Failed to get vote from {_tag}.{Environment.NewLine}" + e.Message; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: Failed to get vote from remote peer url={_url} tag={_tag}", e); } Connection?.Dispose(); _candidate.WaitForChangeInState(); } } } catch (Exception e) { Status = AmbassadorStatus.FailedToConnect; StatusMessage = $"Failed to talk to {_url}.{Environment.NewLine}" + e; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info("Failed to talk to remote peer: " + _url, e); } } finally { if (_candidate.ElectionResult != ElectionResult.Won) { Connection?.Dispose(); } } }
public void HandleVoteRequest() { try { while (true) { using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context)) { var rv = _connection.Read <RequestVote>(context); if (_engine.Log.IsInfoEnabled) { var election = rv.IsTrialElection ? "Trial" : "Real"; _engine.Log.Info($"Received ({election}) 'RequestVote' from {rv.Source}: Election is {rv.ElectionResult} in term {rv.Term} while our current term is {_engine.CurrentTerm}, " + $"Forced election is {rv.IsForcedElection}. (Sent from:{rv.SendingThread})"); } //We are getting a request to vote for our known leader if (_engine.LeaderTag == rv.Source) { _engine.LeaderTag = null; //If we are followers we want to drop the connection with the leader right away. //We shouldn't be in any other state since if we are candidate our leaderTag should be null but its safer to verify. if (_engine.CurrentState == RachisState.Follower) { _engine.SetNewState(RachisState.Follower, null, _engine.CurrentTerm, $"We got a vote request from our leader {rv.Source} so we switch to leaderless state."); } } ClusterTopology clusterTopology; long lastIndex; long lastTerm; string whoGotMyVoteIn; long lastVotedTerm; using (context.OpenReadTransaction()) { lastIndex = _engine.GetLastEntryIndex(context); lastTerm = _engine.GetTermForKnownExisting(context, lastIndex); (whoGotMyVoteIn, lastVotedTerm) = _engine.GetWhoGotMyVoteIn(context, rv.Term); clusterTopology = _engine.GetTopology(context); } // this should be only the case when we where once in a cluster, then we were brought down and our data was wiped. if (clusterTopology.TopologyId == null) { _connection.Send(context, new RequestVoteResponse { Term = rv.Term, VoteGranted = true, Message = "I might vote for you, because I'm not part of any cluster." }); continue; } if (clusterTopology.Members.ContainsKey(rv.Source) == false && clusterTopology.Promotables.ContainsKey(rv.Source) == false && clusterTopology.Watchers.ContainsKey(rv.Source) == false) { _connection.Send(context, new RequestVoteResponse { Term = _engine.CurrentTerm, VoteGranted = false, // we only report to the node asking for our vote if we are the leader, this gives // the oust node a authorotative confirmation that they were removed from the cluster NotInTopology = _engine.CurrentState == RachisState.Leader, Message = $"Node {rv.Source} is not in my topology, cannot vote for it" }); _connection.Dispose(); return; } var currentTerm = _engine.CurrentTerm; if (rv.Term == currentTerm && rv.ElectionResult == ElectionResult.Won) { _electionWon = true; if (Follower.CheckIfValidLeader(_engine, _connection, out var negotiation)) { var follower = new Follower(_engine, negotiation.Term, _connection); follower.AcceptConnection(negotiation); } return; } if (rv.ElectionResult != ElectionResult.InProgress) { _connection.Dispose(); return; } if (rv.Term <= _engine.CurrentTerm) { _connection.Send(context, new RequestVoteResponse { Term = _engine.CurrentTerm, VoteGranted = false, Message = "My term is higher or equals to yours" }); _connection.Dispose(); return; } if (rv.IsForcedElection == false && ( _engine.CurrentState == RachisState.Leader || _engine.CurrentState == RachisState.LeaderElect ) ) { _connection.Send(context, new RequestVoteResponse { Term = _engine.CurrentLeader.Term, VoteGranted = false, Message = "I'm a leader in good standing, coup will be resisted" }); _connection.Dispose(); return; } if (whoGotMyVoteIn != null && whoGotMyVoteIn != rv.Source) { _connection.Send(context, new RequestVoteResponse { Term = _engine.CurrentTerm, VoteGranted = false, Message = $"Already voted in {rv.LastLogTerm}, for {whoGotMyVoteIn}" }); continue; } if (lastVotedTerm > rv.Term) { _connection.Send(context, new RequestVoteResponse { Term = _engine.CurrentTerm, VoteGranted = false, Message = $"Already voted for another node in {lastVotedTerm}" }); continue; } if (lastTerm > rv.LastLogTerm) { // we aren't going to vote for this guy, but we need to check if it is more up to date // in the state of the cluster than we are if (rv.Term > _engine.CurrentTerm + 1) { // trail election is often done on the current term + 1, but if there is any // election on a term that is greater than the current term plus one, we should // consider this an indication that the cluster was able to move past our term // and update the term accordingly using (context.OpenWriteTransaction()) { // double checking things under the transaction lock if (rv.Term > _engine.CurrentTerm + 1) { _engine.CastVoteInTerm(context, rv.Term - 1, null, "Noticed that the term in the cluster grew beyond what I was familiar with, increasing it"); } context.Transaction.Commit(); } } _connection.Send(context, new RequestVoteResponse { Term = _engine.CurrentTerm, VoteGranted = false, Message = $"My last log entry is of term {lastTerm} / {lastIndex} while yours is {rv.LastLogTerm}, so I'm more up to date" }); continue; } if (rv.IsTrialElection) { if (_engine.Timeout.ExpiredLastDeferral(_engine.ElectionTimeout.TotalMilliseconds / 2, out string currentLeader) == false && string.IsNullOrEmpty(currentLeader) == false) // if we are leaderless we can't refuse to cast our vote. { _connection.Send(context, new RequestVoteResponse { Term = _engine.CurrentTerm, VoteGranted = false, Message = $"My leader {currentLeader} is keeping me up to date, so I don't want to vote for you" }); continue; } if (lastTerm == rv.LastLogTerm && lastIndex > rv.LastLogIndex) { _connection.Send(context, new RequestVoteResponse { Term = _engine.CurrentTerm, VoteGranted = false, Message = $"My log {lastIndex} is more up to date than yours {rv.LastLogIndex}" }); continue; } _connection.Send(context, new RequestVoteResponse { Term = rv.Term, VoteGranted = true, Message = "I might vote for you" }); continue; } HandleVoteResult result; using (context.OpenWriteTransaction()) { result = ShouldGrantVote(context, lastIndex, rv, lastTerm); if (result.DeclineVote == false) { _engine.CastVoteInTerm(context, rv.Term, rv.Source, "Casting vote as elector"); context.Transaction.Commit(); } } if (result.DeclineVote) { _connection.Send(context, new RequestVoteResponse { Term = result.VotedTerm, VoteGranted = false, Message = result.DeclineReason }); } else { _connection.Send(context, new RequestVoteResponse { Term = rv.Term, VoteGranted = true, Message = "I've voted for you" }); } } } } catch (OperationCanceledException) { } catch (ObjectDisposedException) { } catch (AggregateException ae) when(ae.InnerException is OperationCanceledException || ae.InnerException is ObjectDisposedException) { } catch (Exception e) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Failed to talk to candidate: {_engine.Tag}", e); } } finally { if (_electionWon == false) { _connection.Dispose(); } } }
private bool NegotiateMatchEntryWithLeaderAndApplyEntries(ClusterOperationContext context, RemoteConnection connection, LogLengthNegotiation negotiation) { long minIndex; long maxIndex; long midpointTerm; long midpointIndex; using (context.OpenReadTransaction()) { minIndex = _engine.GetFirstEntryIndex(context); if (minIndex == 0) // no entries at all { RequestAllEntries(context, connection, "No entries at all here, give me everything from the start"); return(true); // leader will know where to start from here } maxIndex = Math.Min( _engine.GetLastEntryIndex(context), // max negotiation.PrevLogIndex ); midpointIndex = (maxIndex + minIndex) / 2; midpointTerm = _engine.GetTermForKnownExisting(context, midpointIndex); } while ((midpointTerm == negotiation.PrevLogTerm && midpointIndex == negotiation.PrevLogIndex) == false) { _engine.Timeout.Defer(_connection.Source); _engine.ValidateTerm(_term); if (midpointIndex == negotiation.PrevLogIndex && midpointTerm != negotiation.PrevLogTerm) { // our appended entries has been diverged, same index with different terms. if (CanHandleLogDivergence(context, negotiation, ref midpointIndex, ref midpointTerm, ref minIndex, ref maxIndex) == false) { RequestAllEntries(context, connection, "all my entries are invalid, will require snapshot."); return(true); } } connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Negotiation, Message = $"Term/Index mismatch from leader, need to figure out at what point the logs match, range: {maxIndex} - {minIndex} | {midpointIndex} in term {midpointTerm}", CurrentTerm = _term, MaxIndex = maxIndex, MinIndex = minIndex, MidpointIndex = midpointIndex, MidpointTerm = midpointTerm }); negotiation = connection.Read <LogLengthNegotiation>(context); _engine.Timeout.Defer(_connection.Source); if (negotiation.Truncated) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got a truncated response from the leader will request all entries"); } RequestAllEntries(context, connection, "We have entries that are already truncated at the leader, will ask for full snapshot"); return(true); } using (context.OpenReadTransaction()) { if (_engine.GetTermFor(context, negotiation.PrevLogIndex) == negotiation.PrevLogTerm) { minIndex = Math.Min(midpointIndex + 1, maxIndex); } else { maxIndex = Math.Max(midpointIndex - 1, minIndex); } } midpointIndex = (maxIndex + minIndex) / 2; using (context.OpenReadTransaction()) midpointTerm = _engine.GetTermForKnownExisting(context, midpointIndex); } if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: agreed upon last matched index = {midpointIndex} on term = {midpointTerm}"); } connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Acceptable, Message = $"Found a log index / term match at {midpointIndex} with term {midpointTerm}", CurrentTerm = _term, LastLogIndex = midpointIndex, }); return(false); }
private void FollowerSteadyState() { var entries = new List <RachisEntry>(); long lastCommit = 0, lastTruncate = 0, lastAcknowledgedIndex = 0; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Entering steady state"); } while (true) { entries.Clear(); using (_engine.ContextPool.AllocateOperationContext(out ClusterOperationContext context)) { using (context.OpenReadTransaction()) { if (_engine.RequestSnapshot) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Snapshot was requested, so we close this follower."); } return; } } _debugRecorder.Record("Wait for entries"); var appendEntries = _connection.Read <AppendEntries>(context); var sp = Stopwatch.StartNew(); if (appendEntries.Term != _engine.CurrentTerm) { _connection.Send(context, new AppendEntriesResponse { CurrentTerm = _engine.CurrentTerm, Message = "The current term that I have " + _engine.CurrentTerm + " doesn't match " + appendEntries.Term, Success = false }); if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got invalid term {appendEntries.Term} while the current term is {_engine.CurrentTerm}, aborting connection..."); } return; } ClusterCommandsVersionManager.SetClusterVersion(appendEntries.MinCommandVersion); _debugRecorder.Record("Got entries"); _engine.Timeout.Defer(_connection.Source); if (appendEntries.EntriesCount != 0) { using (var cts = new CancellationTokenSource()) { var task = Concurrent_SendAppendEntriesPendingToLeaderAsync(cts, _term, appendEntries.PrevLogIndex); try { for (int i = 0; i < appendEntries.EntriesCount; i++) { entries.Add(_connection.ReadRachisEntry(context)); } } finally { cts.Cancel(); task.Wait(CancellationToken.None); } _engine.Timeout.Defer(_connection.Source); } if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got non empty append entries request with {entries.Count} entries. Last: ({entries[entries.Count - 1].Index} - {entries[entries.Count - 1].Flags})" #if DEBUG + $"[{string.Join(" ,", entries.Select(x => x.ToString()))}]" #endif ); } } // don't start write transaction for noop if (lastCommit != appendEntries.LeaderCommit || lastTruncate != appendEntries.TruncateLogBefore || entries.Count != 0) { using (var cts = new CancellationTokenSource()) { // applying the leader state may take a while, we need to ping // the server and let us know that we are still there var task = Concurrent_SendAppendEntriesPendingToLeaderAsync(cts, _term, appendEntries.PrevLogIndex); try { bool hasRemovedFromTopology; (hasRemovedFromTopology, lastAcknowledgedIndex, lastTruncate, lastCommit) = ApplyLeaderStateToLocalState(sp, context, entries, appendEntries); if (hasRemovedFromTopology) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info("Was notified that I was removed from the node topology, will be moving to passive mode now."); } _engine.SetNewState(RachisState.Passive, null, appendEntries.Term, "I was kicked out of the cluster and moved to passive mode"); return; } } catch (RachisInvalidOperationException) { // on raft protocol violation propagate the error and close this follower. throw; } catch (ConcurrencyException) { // the term was changed throw; } catch (Exception e) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Failed to apply leader state to local state with {entries.Count:#,#;;0} entries with leader commit: {appendEntries.LeaderCommit}, term: {appendEntries.Term}. Prev log index: {appendEntries.PrevLogIndex}", e); } } finally { // here we need to wait until the concurrent send pending to leader // is completed to avoid concurrent writes to the leader cts.Cancel(); task.Wait(CancellationToken.None); } } } if (appendEntries.ForceElections) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got a request to become candidate from the leader."); } _engine.SwitchToCandidateState("Was asked to do so by my leader", forced: true); return; } _debugRecorder.Record("Processing entries is completed"); _connection.Send(context, new AppendEntriesResponse { CurrentTerm = _term, LastLogIndex = lastAcknowledgedIndex, Success = true }); if (sp.Elapsed > _engine.ElectionTimeout / 2) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Took a long time to complete the cycle with {entries.Count} entries: {sp.Elapsed}"); } } _engine.Timeout.Defer(_connection.Source); _engine.ReportLeaderTime(appendEntries.TimeAsLeader); _debugRecorder.Record("Cycle done"); _debugRecorder.Start(); } } }
public void HandleVoteRequest() { try { while (true) { using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context)) { var rv = _connection.Read <RequestVote>(context); ClusterTopology clusterTopology; long lastIndex; long lastTerm; string whoGotMyVoteIn; using (context.OpenReadTransaction()) { lastIndex = _engine.GetLastEntryIndex(context); lastTerm = _engine.GetTermForKnownExisting(context, lastIndex); whoGotMyVoteIn = _engine.GetWhoGotMyVoteIn(context, rv.Term); clusterTopology = _engine.GetTopology(context); } if (clusterTopology.Members.ContainsKey(rv.Source) == false && clusterTopology.Promotables.ContainsKey(rv.Source) == false && clusterTopology.Watchers.ContainsKey(rv.Source) == false) { _connection.Send(context, new RequestVoteResponse { Term = _engine.CurrentTerm, VoteGranted = false, // we only report to the node asking for our vote if we are the leader, this gives // the oust node a authorotative confirmation that they were removed from the cluster NotInTopology = _engine.CurrentState == RachisConsensus.State.Leader, Message = $"Node {rv.Source} is not in my topology, cannot vote for it" }); _connection.Dispose(); return; } if (rv.Term <= _engine.CurrentTerm) { _connection.Send(context, new RequestVoteResponse { Term = _engine.CurrentTerm, VoteGranted = false, Message = "My term is higher or equals to yours" }); _connection.Dispose(); return; } if (rv.IsForcedElection == false && ( _engine.CurrentState == RachisConsensus.State.Leader || _engine.CurrentState == RachisConsensus.State.LeaderElect ) ) { _connection.Send(context, new RequestVoteResponse { Term = _engine.CurrentTerm, VoteGranted = false, Message = "I'm a leader in good standing, coup will be resisted" }); _connection.Dispose(); return; } if (whoGotMyVoteIn != null && whoGotMyVoteIn != rv.Source) { _connection.Send(context, new RequestVoteResponse { Term = _engine.CurrentTerm, VoteGranted = false, Message = $"Already voted in {rv.LastLogTerm}, for {whoGotMyVoteIn}" }); _connection.Dispose(); return; } if (lastTerm > rv.LastLogTerm) { _connection.Send(context, new RequestVoteResponse { Term = _engine.CurrentTerm, VoteGranted = false, Message = $"My last log entry is of term {lastTerm} / {lastIndex} while yours is {rv.LastLogTerm}, so I'm more up to date" }); _connection.Dispose(); return; } if (lastIndex > rv.LastLogIndex) { _connection.Send(context, new RequestVoteResponse { Term = _engine.CurrentTerm, VoteGranted = false, Message = $"My last log entry is of term {lastTerm} / {lastIndex} while yours is {rv.LastLogTerm} / {rv.LastLogIndex}, so I'm more up to date" }); _connection.Dispose(); return; } if (rv.IsTrialElection) { if (_engine.Timeout.ExpiredLastDeferral(_engine.ElectionTimeout.TotalMilliseconds / 2, out string currentLeader) == false) { _connection.Send(context, new RequestVoteResponse { Term = _engine.CurrentTerm, VoteGranted = false, Message = $"My leader {currentLeader} is keeping me up to date, so I don't want to vote for you" }); _connection.Dispose(); return; } _connection.Send(context, new RequestVoteResponse { Term = rv.Term, VoteGranted = true, Message = "I might vote for you" }); if (_thread == null) // let's wait for this in another thread { _thread = new Thread(HandleVoteRequest) { Name = $"Elector thread for {rv.Source} > {_engine.Tag}", IsBackground = true }; _thread.Start(); return; } continue; } bool alreadyVoted = false; using (context.OpenWriteTransaction()) { whoGotMyVoteIn = _engine.GetWhoGotMyVoteIn(context, rv.Term); if (whoGotMyVoteIn != null && whoGotMyVoteIn != rv.Source) { alreadyVoted = true; } else { _engine.CastVoteInTerm(context, rv.Term, rv.Source); } context.Transaction.Commit(); } if (alreadyVoted) { _connection.Send(context, new RequestVoteResponse { Term = _engine.CurrentTerm, VoteGranted = false, Message = $"Already voted in {rv.LastLogTerm}, for {whoGotMyVoteIn}" }); } else { _connection.Send(context, new RequestVoteResponse { Term = _engine.CurrentTerm, VoteGranted = true, Message = "I've voted for you" }); } _connection.Dispose(); } } } catch (OperationCanceledException) { } catch (ObjectDisposedException) { } catch (AggregateException ae) when(ae.InnerException is OperationCanceledException || ae.InnerException is ObjectDisposedException) { } catch (Exception e) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info("Failed to talk to candidate: " + _engine.Tag, e); } } }
private void NegotiateMatchEntryWithLeaderAndApplyEntries(TransactionOperationContext context, RemoteConnection connection, LogLengthNegotiation negotiation) { long minIndex; long maxIndex; long midpointTerm; long midpointIndex; using (context.OpenReadTransaction()) { minIndex = _engine.GetFirstEntryIndex(context); if (minIndex == 0) // no entries at all { connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Acceptable, Message = "No entries at all here, give me everything from the start", CurrentTerm = _term, LastLogIndex = 0 }); return; // leader will know where to start from here } maxIndex = Math.Min( _engine.GetLastEntryIndex(context), // max negotiation.PrevLogIndex ); midpointIndex = (maxIndex + minIndex) / 2; midpointTerm = _engine.GetTermForKnownExisting(context, midpointIndex); } while (minIndex < maxIndex) { _engine.Timeout.Defer(_connection.Source); _engine.ValidateTerm(_term); connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Negotiation, Message = $"Term/Index mismatch from leader, need to figure out at what point the logs match, range: {maxIndex} - {minIndex} | {midpointIndex} in term {midpointTerm}", CurrentTerm = _term, MaxIndex = maxIndex, MinIndex = minIndex, MidpointIndex = midpointIndex, MidpointTerm = midpointTerm }); var response = connection.Read <LogLengthNegotiation>(context); _engine.Timeout.Defer(_connection.Source); if (response.Truncated) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got a truncated response from the leader will request all entries"); } connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Acceptable, Message = "We have entries that are already truncated at the leader, will ask for full snapshot", CurrentTerm = _term, LastLogIndex = 0 }); return; } using (context.OpenReadTransaction()) { if (_engine.GetTermFor(context, response.PrevLogIndex) == response.PrevLogTerm) { minIndex = Math.Min(midpointIndex + 1, maxIndex); } else { maxIndex = Math.Max(midpointIndex - 1, minIndex); } } midpointIndex = (maxIndex + minIndex) / 2; using (context.OpenReadTransaction()) midpointTerm = _engine.GetTermForKnownExisting(context, midpointIndex); } if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: agreed upon last matched index = {midpointIndex} on term = {_term}"); } connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Acceptable, Message = $"Found a log index / term match at {midpointIndex} with term {midpointTerm}", CurrentTerm = _term, LastLogIndex = midpointIndex }); }
private void FollowerSteadyState() { var entries = new List <RachisEntry>(); long lastCommit = 0, lastTruncate = 0; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Entering steady state"); } while (true) { entries.Clear(); using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context)) { var appendEntries = _connection.Read <AppendEntries>(context); if (appendEntries.Term != _engine.CurrentTerm) { _connection.Send(context, new AppendEntriesResponse { CurrentTerm = _engine.CurrentTerm, Message = "The current term that I have " + _engine.CurrentTerm + " doesn't match " + appendEntries.Term, Success = false }); if (_engine.Log.IsInfoEnabled && entries.Count > 0) { _engine.Log.Info($"Follower {_engine.Tag}: Got invalid term {appendEntries.Term} while the current term is {_engine.CurrentTerm}, aborting connection..."); } return; } _engine.Timeout.Defer(_connection.Source); var sp = Stopwatch.StartNew(); if (appendEntries.EntriesCount != 0) { for (int i = 0; i < appendEntries.EntriesCount; i++) { entries.Add(_connection.ReadRachisEntry(context)); _engine.Timeout.Defer(_connection.Source); } if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Got non empty append entries request with {entries.Count} entries. Last: ({entries[entries.Count - 1].Index} - {entries[entries.Count - 1].Flags})" #if DEBUG + $"[{string.Join(" ,", entries.Select(x => x.ToString()))}]" #endif ); } } var lastLogIndex = appendEntries.PrevLogIndex; // don't start write transaction fro noop if (lastCommit != appendEntries.LeaderCommit || lastTruncate != appendEntries.TruncateLogBefore || entries.Count != 0) { bool removedFromTopology = false; // we start the tx after we finished reading from the network if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Ready to start tx in {sp.Elapsed}"); } using (var tx = context.OpenWriteTransaction()) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Tx running in {sp.Elapsed}"); } if (entries.Count > 0) { using (var lastTopology = _engine.AppendToLog(context, entries)) { if (lastTopology != null) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Topology changed to {lastTopology}"); } var topology = JsonDeserializationRachis <ClusterTopology> .Deserialize(lastTopology); if (topology.Members.ContainsKey(_engine.Tag) || topology.Promotables.ContainsKey(_engine.Tag) || topology.Watchers.ContainsKey(_engine.Tag)) { RachisConsensus.SetTopology(_engine, context, topology); } else { removedFromTopology = true; } } } } lastLogIndex = _engine.GetLastEntryIndex(context); var lastEntryIndexToCommit = Math.Min( lastLogIndex, appendEntries.LeaderCommit); var lastAppliedIndex = _engine.GetLastCommitIndex(context); if (lastEntryIndexToCommit > lastAppliedIndex) { _engine.Apply(context, lastEntryIndexToCommit, null); } lastTruncate = Math.Min(appendEntries.TruncateLogBefore, lastEntryIndexToCommit); _engine.TruncateLogBefore(context, lastTruncate); lastCommit = lastEntryIndexToCommit; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Ready to commit in {sp.Elapsed}"); } tx.Commit(); } if (_engine.Log.IsInfoEnabled && entries.Count > 0) { _engine.Log.Info($"Follower {_engine.Tag}: Processing entries request with {entries.Count} entries took {sp.Elapsed}"); } if (removedFromTopology) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info("Was notified that I was removed from the node topoloyg, will be moving to passive mode now."); } _engine.SetNewState(RachisState.Passive, null, appendEntries.Term, "I was kicked out of the cluster and moved to passive mode"); return; } } if (appendEntries.ForceElections) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Got a request to become candidate from the leader."); } _engine.SwitchToCandidateState("Was asked to do so by my leader", forced: true); return; } _connection.Send(context, new AppendEntriesResponse { CurrentTerm = _engine.CurrentTerm, LastLogIndex = lastLogIndex, Success = true }); _engine.Timeout.Defer(_connection.Source); _engine.ReportLeaderTime(appendEntries.TimeAsLeader); } } }