private void MoveCandidateToPassive(string message) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: {message}"); } _engine.SetNewState(RachisState.Passive, this, _engine.CurrentTerm, message); }
private void OnVoterConfirmation() { TransactionOperationContext context; if (_hasNewTopology.Lower()) { ClusterTopology clusterTopology; using (_engine.ContextPool.AllocateOperationContext(out context)) using (context.OpenReadTransaction()) { clusterTopology = _engine.GetTopology(context); } if (clusterTopology.Contains(_engine.LeaderTag) == false) { TaskExecutor.CompleteAndReplace(ref _newEntriesArrived); _engine.SetNewState(RachisState.Passive, this, Term, "I was kicked out of the cluster and moved to passive mode"); return; } RefreshAmbassadors(clusterTopology); } var maxIndexOnQuorum = GetMaxIndexOnQuorum(VotersMajority); if (_lastCommit >= maxIndexOnQuorum || maxIndexOnQuorum == 0) { return; // nothing to do here } bool changedFromLeaderElectToLeader; using (_engine.ContextPool.AllocateOperationContext(out context)) using (context.OpenWriteTransaction()) { _lastCommit = _engine.GetLastCommitIndex(context); if (_lastCommit >= maxIndexOnQuorum || maxIndexOnQuorum == 0) { return; // nothing to do here } if (_engine.GetTermForKnownExisting(context, maxIndexOnQuorum) < Term) { return;// can't commit until at least one entry from our term has been published } changedFromLeaderElectToLeader = _engine.TakeOffice(); maxIndexOnQuorum = _engine.Apply(context, maxIndexOnQuorum, this, Stopwatch.StartNew()); context.Transaction.Commit(); _lastCommit = maxIndexOnQuorum; } foreach (var kvp in _entries) { if (kvp.Key > _lastCommit) { continue; } if (_entries.TryRemove(kvp.Key, out CommandState value)) { TaskExecutor.Execute(o => { var tuple = (CommandState)o; if (tuple.OnNotify != null) { tuple.OnNotify(tuple.TaskCompletionSource); return; } tuple.TaskCompletionSource.TrySetResult((tuple.CommandIndex, tuple.Result)); }, value); } } if (_entries.Count != 0) { // we have still items to process, run them in 1 node cluster // and speed up the followers ambassadors if they can _newEntry.Set(); } if (changedFromLeaderElectToLeader) { _engine.LeaderElectToLeaderChanged(); } }
/// <summary> /// This method may run for a long while, as we are trying to get agreement /// from a majority of the cluster /// </summary> private void Run() { try { while (_candidate.Running && _disposed == false) { try { Stream stream; try { using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context)) { stream = _engine.ConnectToPeer(_url, _certificate, context).Result; } if (_candidate.Running == false) { break; } } catch (Exception e) { Status = AmbassadorStatus.FailedToConnect; StatusMessage = $"Failed to connect with {_tag}.{Environment.NewLine} " + e.Message; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: Failed to connect to remote peer: " + _url, e); } // wait a bit _candidate.WaitForChangeInState(); continue; // we'll retry connecting } Status = AmbassadorStatus.Connected; StatusMessage = $"Connected to {_tag}"; Connection = new RemoteConnection(_tag, _engine.Tag, stream); using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context)) { ClusterTopology topology; long lastLogIndex; long lastLogTerm; using (context.OpenReadTransaction()) { topology = _engine.GetTopology(context); lastLogIndex = _engine.GetLastEntryIndex(context); lastLogTerm = _engine.GetTermForKnownExisting(context, lastLogIndex); } Debug.Assert(topology.TopologyId != null); Connection.Send(context, new RachisHello { TopologyId = topology.TopologyId, DebugSourceIdentifier = _engine.Tag, DebugDestinationIdentifier = _tag, InitialMessageType = InitialMessageType.RequestVote }); while (_candidate.Running) { RequestVoteResponse rvr; var currentElectionTerm = _candidate.ElectionTerm; var engineCurrentTerm = _engine.CurrentTerm; if (_candidate.IsForcedElection == false || _candidate.RunRealElectionAtTerm != currentElectionTerm) { Connection.Send(context, new RequestVote { Source = _engine.Tag, Term = currentElectionTerm, IsForcedElection = false, IsTrialElection = true, LastLogIndex = lastLogIndex, LastLogTerm = lastLogTerm }); rvr = Connection.Read <RequestVoteResponse>(context); if (rvr.Term > currentElectionTerm) { var message = $"Candidate ambassador {_engine.Tag}: found election term {rvr.Term} that is higher than ours {currentElectionTerm}"; // we need to abort the current elections _engine.SetNewState(RachisState.Follower, null, engineCurrentTerm, message); if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: {message}"); } _engine.FoundAboutHigherTerm(rvr.Term); throw new InvalidOperationException(message); } NotInTopology = rvr.NotInTopology; if (rvr.VoteGranted == false) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: Got a negative response from {_tag} reason: {rvr.Message}"); } // we go a negative response here, so we can't proceed // we'll need to wait until the candidate has done something, like // change term or given up _candidate.WaitForChangeInState(); continue; } TrialElectionWonAtTerm = rvr.Term; _candidate.WaitForChangeInState(); } Connection.Send(context, new RequestVote { Source = _engine.Tag, Term = currentElectionTerm, IsForcedElection = _candidate.IsForcedElection, IsTrialElection = false, LastLogIndex = lastLogIndex, LastLogTerm = lastLogTerm }); rvr = Connection.Read <RequestVoteResponse>(context); if (rvr.Term > currentElectionTerm) { var message = $"Candidate ambassador {_engine.Tag}: found election term {rvr.Term} that is higher than ours {currentElectionTerm}"; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: {message}"); } // we need to abort the current elections _engine.SetNewState(RachisState.Follower, null, engineCurrentTerm, message); _engine.FoundAboutHigherTerm(rvr.Term); throw new InvalidOperationException(message); } NotInTopology = rvr.NotInTopology; if (rvr.VoteGranted == false) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: Got a negative response from {_tag} reason: {rvr.Message}"); } // we go a negative response here, so we can't proceed // we'll need to wait until the candidate has done something, like // change term or given up _candidate.WaitForChangeInState(); continue; } RealElectionWonAtTerm = rvr.Term; _candidate.WaitForChangeInState(); } SendElectionResult(); } } catch (OperationCanceledException) { Status = AmbassadorStatus.Closed; StatusMessage = "Closed"; SendElectionResult(); break; } catch (ObjectDisposedException) { Status = AmbassadorStatus.Closed; StatusMessage = "Closed"; SendElectionResult(); break; } catch (AggregateException ae) when(ae.InnerException is OperationCanceledException || ae.InnerException is ObjectDisposedException) { Status = AmbassadorStatus.Closed; StatusMessage = "Closed"; SendElectionResult(); break; } catch (Exception e) { Status = AmbassadorStatus.FailedToConnect; StatusMessage = $"Failed to get vote from {_tag}.{Environment.NewLine}" + e.Message; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: Failed to get vote from remote peer url={_url} tag={_tag}", e); } Connection?.Dispose(); _candidate.WaitForChangeInState(); } } } catch (Exception e) { Status = AmbassadorStatus.FailedToConnect; StatusMessage = $"Failed to talk to {_url}.{Environment.NewLine}" + e; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info("Failed to talk to remote peer: " + _url, e); } } finally { if (_candidate.ElectionResult != ElectionResult.Won) { Connection?.Dispose(); } } }
public void HandleVoteRequest() { try { while (true) { using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context)) { var rv = _connection.Read <RequestVote>(context); if (_engine.Log.IsInfoEnabled) { var election = rv.IsTrialElection ? "Trial" : "Real"; _engine.Log.Info($"Received ({election}) 'RequestVote' from {rv.Source}: Election is {rv.ElectionResult} in term {rv.Term} while our current term is {_engine.CurrentTerm}, " + $"Forced election is {rv.IsForcedElection}. (Sent from:{rv.SendingThread})"); } //We are getting a request to vote for our known leader if (_engine.LeaderTag == rv.Source) { _engine.LeaderTag = null; //If we are followers we want to drop the connection with the leader right away. //We shouldn't be in any other state since if we are candidate our leaderTag should be null but its safer to verify. if (_engine.CurrentState == RachisState.Follower) { _engine.SetNewState(RachisState.Follower, null, _engine.CurrentTerm, $"We got a vote request from our leader {rv.Source} so we switch to leaderless state."); } } ClusterTopology clusterTopology; long lastIndex; long lastTerm; string whoGotMyVoteIn; long lastVotedTerm; using (context.OpenReadTransaction()) { lastIndex = _engine.GetLastEntryIndex(context); lastTerm = _engine.GetTermForKnownExisting(context, lastIndex); (whoGotMyVoteIn, lastVotedTerm) = _engine.GetWhoGotMyVoteIn(context, rv.Term); clusterTopology = _engine.GetTopology(context); } // this should be only the case when we where once in a cluster, then we were brought down and our data was wiped. if (clusterTopology.TopologyId == null) { _connection.Send(context, new RequestVoteResponse { Term = rv.Term, VoteGranted = true, Message = "I might vote for you, because I'm not part of any cluster." }); continue; } if (clusterTopology.Members.ContainsKey(rv.Source) == false && clusterTopology.Promotables.ContainsKey(rv.Source) == false && clusterTopology.Watchers.ContainsKey(rv.Source) == false) { _connection.Send(context, new RequestVoteResponse { Term = _engine.CurrentTerm, VoteGranted = false, // we only report to the node asking for our vote if we are the leader, this gives // the oust node a authorotative confirmation that they were removed from the cluster NotInTopology = _engine.CurrentState == RachisState.Leader, Message = $"Node {rv.Source} is not in my topology, cannot vote for it" }); _connection.Dispose(); return; } var currentTerm = _engine.CurrentTerm; if (rv.Term == currentTerm && rv.ElectionResult == ElectionResult.Won) { _electionWon = true; if (Follower.CheckIfValidLeader(_engine, _connection, out var negotiation)) { var follower = new Follower(_engine, negotiation.Term, _connection); follower.AcceptConnection(negotiation); } return; } if (rv.ElectionResult != ElectionResult.InProgress) { _connection.Dispose(); return; } if (rv.Term <= _engine.CurrentTerm) { _connection.Send(context, new RequestVoteResponse { Term = _engine.CurrentTerm, VoteGranted = false, Message = "My term is higher or equals to yours" }); _connection.Dispose(); return; } if (rv.IsForcedElection == false && ( _engine.CurrentState == RachisState.Leader || _engine.CurrentState == RachisState.LeaderElect ) ) { _connection.Send(context, new RequestVoteResponse { Term = _engine.CurrentLeader.Term, VoteGranted = false, Message = "I'm a leader in good standing, coup will be resisted" }); _connection.Dispose(); return; } if (whoGotMyVoteIn != null && whoGotMyVoteIn != rv.Source) { _connection.Send(context, new RequestVoteResponse { Term = _engine.CurrentTerm, VoteGranted = false, Message = $"Already voted in {rv.LastLogTerm}, for {whoGotMyVoteIn}" }); continue; } if (lastVotedTerm > rv.Term) { _connection.Send(context, new RequestVoteResponse { Term = _engine.CurrentTerm, VoteGranted = false, Message = $"Already voted for another node in {lastVotedTerm}" }); continue; } if (lastTerm > rv.LastLogTerm) { // we aren't going to vote for this guy, but we need to check if it is more up to date // in the state of the cluster than we are if (rv.Term > _engine.CurrentTerm + 1) { // trail election is often done on the current term + 1, but if there is any // election on a term that is greater than the current term plus one, we should // consider this an indication that the cluster was able to move past our term // and update the term accordingly using (context.OpenWriteTransaction()) { // double checking things under the transaction lock if (rv.Term > _engine.CurrentTerm + 1) { _engine.CastVoteInTerm(context, rv.Term - 1, null, "Noticed that the term in the cluster grew beyond what I was familiar with, increasing it"); } context.Transaction.Commit(); } } _connection.Send(context, new RequestVoteResponse { Term = _engine.CurrentTerm, VoteGranted = false, Message = $"My last log entry is of term {lastTerm} / {lastIndex} while yours is {rv.LastLogTerm}, so I'm more up to date" }); continue; } if (rv.IsTrialElection) { if (_engine.Timeout.ExpiredLastDeferral(_engine.ElectionTimeout.TotalMilliseconds / 2, out string currentLeader) == false && string.IsNullOrEmpty(currentLeader) == false) // if we are leaderless we can't refuse to cast our vote. { _connection.Send(context, new RequestVoteResponse { Term = _engine.CurrentTerm, VoteGranted = false, Message = $"My leader {currentLeader} is keeping me up to date, so I don't want to vote for you" }); continue; } if (lastTerm == rv.LastLogTerm && lastIndex > rv.LastLogIndex) { _connection.Send(context, new RequestVoteResponse { Term = _engine.CurrentTerm, VoteGranted = false, Message = $"My log {lastIndex} is more up to date than yours {rv.LastLogIndex}" }); continue; } _connection.Send(context, new RequestVoteResponse { Term = rv.Term, VoteGranted = true, Message = "I might vote for you" }); continue; } HandleVoteResult result; using (context.OpenWriteTransaction()) { result = ShouldGrantVote(context, lastIndex, rv, lastTerm); if (result.DeclineVote == false) { _engine.CastVoteInTerm(context, rv.Term, rv.Source, "Casting vote as elector"); context.Transaction.Commit(); } } if (result.DeclineVote) { _connection.Send(context, new RequestVoteResponse { Term = result.VotedTerm, VoteGranted = false, Message = result.DeclineReason }); } else { _connection.Send(context, new RequestVoteResponse { Term = rv.Term, VoteGranted = true, Message = "I've voted for you" }); } } } } catch (OperationCanceledException) { } catch (ObjectDisposedException) { } catch (AggregateException ae) when(ae.InnerException is OperationCanceledException || ae.InnerException is ObjectDisposedException) { } catch (Exception e) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Failed to talk to candidate: {_engine.Tag}", e); } } finally { if (_electionWon == false) { _connection.Dispose(); } } }
private void FollowerSteadyState() { var entries = new List <RachisEntry>(); long lastCommit = 0, lastTruncate = 0, lastAcknowledgedIndex = 0; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Entering steady state"); } while (true) { entries.Clear(); using (_engine.ContextPool.AllocateOperationContext(out ClusterOperationContext context)) { using (context.OpenReadTransaction()) { if (_engine.RequestSnapshot) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Snapshot was requested, so we close this follower."); } return; } } _debugRecorder.Record("Wait for entries"); var appendEntries = _connection.Read <AppendEntries>(context); var sp = Stopwatch.StartNew(); if (appendEntries.Term != _engine.CurrentTerm) { _connection.Send(context, new AppendEntriesResponse { CurrentTerm = _engine.CurrentTerm, Message = "The current term that I have " + _engine.CurrentTerm + " doesn't match " + appendEntries.Term, Success = false }); if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got invalid term {appendEntries.Term} while the current term is {_engine.CurrentTerm}, aborting connection..."); } return; } ClusterCommandsVersionManager.SetClusterVersion(appendEntries.MinCommandVersion); _debugRecorder.Record("Got entries"); _engine.Timeout.Defer(_connection.Source); if (appendEntries.EntriesCount != 0) { using (var cts = new CancellationTokenSource()) { var task = Concurrent_SendAppendEntriesPendingToLeaderAsync(cts, _term, appendEntries.PrevLogIndex); try { for (int i = 0; i < appendEntries.EntriesCount; i++) { entries.Add(_connection.ReadRachisEntry(context)); } } finally { cts.Cancel(); task.Wait(CancellationToken.None); } _engine.Timeout.Defer(_connection.Source); } if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got non empty append entries request with {entries.Count} entries. Last: ({entries[entries.Count - 1].Index} - {entries[entries.Count - 1].Flags})" #if DEBUG + $"[{string.Join(" ,", entries.Select(x => x.ToString()))}]" #endif ); } } // don't start write transaction for noop if (lastCommit != appendEntries.LeaderCommit || lastTruncate != appendEntries.TruncateLogBefore || entries.Count != 0) { using (var cts = new CancellationTokenSource()) { // applying the leader state may take a while, we need to ping // the server and let us know that we are still there var task = Concurrent_SendAppendEntriesPendingToLeaderAsync(cts, _term, appendEntries.PrevLogIndex); try { bool hasRemovedFromTopology; (hasRemovedFromTopology, lastAcknowledgedIndex, lastTruncate, lastCommit) = ApplyLeaderStateToLocalState(sp, context, entries, appendEntries); if (hasRemovedFromTopology) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info("Was notified that I was removed from the node topology, will be moving to passive mode now."); } _engine.SetNewState(RachisState.Passive, null, appendEntries.Term, "I was kicked out of the cluster and moved to passive mode"); return; } } catch (RachisInvalidOperationException) { // on raft protocol violation propagate the error and close this follower. throw; } catch (ConcurrencyException) { // the term was changed throw; } catch (Exception e) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Failed to apply leader state to local state with {entries.Count:#,#;;0} entries with leader commit: {appendEntries.LeaderCommit}, term: {appendEntries.Term}. Prev log index: {appendEntries.PrevLogIndex}", e); } } finally { // here we need to wait until the concurrent send pending to leader // is completed to avoid concurrent writes to the leader cts.Cancel(); task.Wait(CancellationToken.None); } } } if (appendEntries.ForceElections) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got a request to become candidate from the leader."); } _engine.SwitchToCandidateState("Was asked to do so by my leader", forced: true); return; } _debugRecorder.Record("Processing entries is completed"); _connection.Send(context, new AppendEntriesResponse { CurrentTerm = _term, LastLogIndex = lastAcknowledgedIndex, Success = true }); if (sp.Elapsed > _engine.ElectionTimeout / 2) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Took a long time to complete the cycle with {entries.Count} entries: {sp.Elapsed}"); } } _engine.Timeout.Defer(_connection.Source); _engine.ReportLeaderTime(appendEntries.TimeAsLeader); _debugRecorder.Record("Cycle done"); _debugRecorder.Start(); } } }
private void FollowerSteadyState() { var entries = new List <RachisEntry>(); long lastCommit = 0, lastTruncate = 0; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Entering steady state"); } while (true) { entries.Clear(); using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context)) { var appendEntries = _connection.Read <AppendEntries>(context); if (appendEntries.Term != _engine.CurrentTerm) { _connection.Send(context, new AppendEntriesResponse { CurrentTerm = _engine.CurrentTerm, Message = "The current term that I have " + _engine.CurrentTerm + " doesn't match " + appendEntries.Term, Success = false }); if (_engine.Log.IsInfoEnabled && entries.Count > 0) { _engine.Log.Info($"Follower {_engine.Tag}: Got invalid term {appendEntries.Term} while the current term is {_engine.CurrentTerm}, aborting connection..."); } return; } _engine.Timeout.Defer(_connection.Source); var sp = Stopwatch.StartNew(); if (appendEntries.EntriesCount != 0) { for (int i = 0; i < appendEntries.EntriesCount; i++) { entries.Add(_connection.ReadRachisEntry(context)); _engine.Timeout.Defer(_connection.Source); } if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Got non empty append entries request with {entries.Count} entries. Last: ({entries[entries.Count - 1].Index} - {entries[entries.Count - 1].Flags})" #if DEBUG + $"[{string.Join(" ,", entries.Select(x => x.ToString()))}]" #endif ); } } var lastLogIndex = appendEntries.PrevLogIndex; // don't start write transaction fro noop if (lastCommit != appendEntries.LeaderCommit || lastTruncate != appendEntries.TruncateLogBefore || entries.Count != 0) { bool removedFromTopology = false; // we start the tx after we finished reading from the network if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Ready to start tx in {sp.Elapsed}"); } using (var tx = context.OpenWriteTransaction()) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Tx running in {sp.Elapsed}"); } if (entries.Count > 0) { using (var lastTopology = _engine.AppendToLog(context, entries)) { if (lastTopology != null) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Topology changed to {lastTopology}"); } var topology = JsonDeserializationRachis <ClusterTopology> .Deserialize(lastTopology); if (topology.Members.ContainsKey(_engine.Tag) || topology.Promotables.ContainsKey(_engine.Tag) || topology.Watchers.ContainsKey(_engine.Tag)) { RachisConsensus.SetTopology(_engine, context, topology); } else { removedFromTopology = true; } } } } lastLogIndex = _engine.GetLastEntryIndex(context); var lastEntryIndexToCommit = Math.Min( lastLogIndex, appendEntries.LeaderCommit); var lastAppliedIndex = _engine.GetLastCommitIndex(context); if (lastEntryIndexToCommit > lastAppliedIndex) { _engine.Apply(context, lastEntryIndexToCommit, null); } lastTruncate = Math.Min(appendEntries.TruncateLogBefore, lastEntryIndexToCommit); _engine.TruncateLogBefore(context, lastTruncate); lastCommit = lastEntryIndexToCommit; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Ready to commit in {sp.Elapsed}"); } tx.Commit(); } if (_engine.Log.IsInfoEnabled && entries.Count > 0) { _engine.Log.Info($"Follower {_engine.Tag}: Processing entries request with {entries.Count} entries took {sp.Elapsed}"); } if (removedFromTopology) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info("Was notified that I was removed from the node topoloyg, will be moving to passive mode now."); } _engine.SetNewState(RachisState.Passive, null, appendEntries.Term, "I was kicked out of the cluster and moved to passive mode"); return; } } if (appendEntries.ForceElections) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Got a request to become candidate from the leader."); } _engine.SwitchToCandidateState("Was asked to do so by my leader", forced: true); return; } _connection.Send(context, new AppendEntriesResponse { CurrentTerm = _engine.CurrentTerm, LastLogIndex = lastLogIndex, Success = true }); _engine.Timeout.Defer(_connection.Source); _engine.ReportLeaderTime(appendEntries.TimeAsLeader); } } }
private void Run() { try { try { // Operation may fail, that's why we don't RaiseOrDie _running.Raise(); ElectionTerm = _engine.CurrentTerm; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Candidate {_engine.Tag}: Starting elections"); } ClusterTopology clusterTopology; using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context)) using (context.OpenReadTransaction()) { clusterTopology = _engine.GetTopology(context); } if (clusterTopology.Members.Count == 1) { CastVoteForSelf(ElectionTerm + 1, "Single member cluster, natural leader"); _engine.SwitchToLeaderState(ElectionTerm, "I'm the only one in the cluster, so no need for elections, I rule."); return; } if (IsForcedElection) { CastVoteForSelf(ElectionTerm + 1, "Voting for self in forced elections"); } else { ElectionTerm = ElectionTerm + 1; } foreach (var voter in clusterTopology.Members) { if (voter.Key == _engine.Tag) { continue; // we already voted for ourselves } var candidateAmbassador = new CandidateAmbassador(_engine, this, voter.Key, voter.Value, _engine.ClusterCertificate); _voters.Add(candidateAmbassador); try { _engine.AppendStateDisposable(this, candidateAmbassador); } catch (ConcurrencyException) { foreach (var ambassador in _voters) { ambassador.Dispose(); } return; // we lost the election, because someone else changed our state to follower } candidateAmbassador.Start(); } while (_running && _engine.CurrentState == RachisState.Candidate) { if (_peersWaiting.WaitOne(_engine.Timeout.TimeoutPeriod) == false) { ElectionTerm = _engine.CurrentTerm; // timeout? if (IsForcedElection) { CastVoteForSelf(ElectionTerm + 1, "Timeout during forced elections"); } else { ElectionTerm = ElectionTerm + 1; } _engine.RandomizeTimeout(extend: true); StateChange(); // will wake ambassadors and make them ping peers again continue; } if (_running == false) { return; } _peersWaiting.Reset(); bool removedFromTopology = false; var trialElectionsCount = 1; var realElectionsCount = 1; foreach (var ambassador in _voters) { if (ambassador.NotInTopology) { removedFromTopology = true; break; } if (ambassador.RealElectionWonAtTerm == ElectionTerm) { realElectionsCount++; } if (ambassador.TrialElectionWonAtTerm == ElectionTerm) { trialElectionsCount++; } } var majority = ((_voters.Count + 1) / 2) + 1; if (removedFromTopology) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info( $"Candidate {_engine.Tag}: A leader node has indicated that I'm not in their topology, I was probably kicked out. Moving to passive mode"); } _engine.SetNewState(RachisState.Passive, this, _engine.CurrentTerm, "I just learned from the leader that I'm not in their topology, moving to passive state"); break; } if (realElectionsCount >= majority) { ElectionResult = ElectionResult.Won; _running.Lower(); StateChange(); var connections = new Dictionary <string, RemoteConnection>(); foreach (var candidateAmbassador in _voters) { connections[candidateAmbassador.Tag] = candidateAmbassador.Connection; } _engine.SwitchToLeaderState(ElectionTerm, $"Was elected by {realElectionsCount} nodes to leadership in {ElectionTerm}", connections); break; } if (RunRealElectionAtTerm != ElectionTerm && trialElectionsCount >= majority) { CastVoteForSelf(ElectionTerm, "Won in the trial elections"); } } } catch (Exception e) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Candidate {_engine.Tag}: Failure during candidacy run with current state of {_engine.CurrentState}", e); } if (_engine.CurrentState == RachisState.Candidate) { // if we are still a candidate, start the candidacy again. _engine.SwitchToCandidateState("An error occured during the last candidacy: " + e); } else if (_engine.CurrentState != RachisState.Passive) { _engine.Timeout.Start(_engine.SwitchToCandidateStateOnTimeout); } } } finally { try { Dispose(); } catch (Exception) { // nothing to be done here } } }