private void Run() { var ambassadorsToRemove = new List <CandidateAmbassador>(); try { try { // Operation may fail, that's why we don't RaiseOrDie _running.Raise(); ElectionTerm = _engine.CurrentTerm; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Candidate {_engine.Tag}: Starting elections"); } ClusterTopology clusterTopology; using (_engine.ContextPool.AllocateOperationContext(out ClusterOperationContext context)) using (context.OpenReadTransaction()) { clusterTopology = _engine.GetTopology(context); } if (clusterTopology.Members.Count == 1) { CastVoteForSelf(ElectionTerm + 1, "Single member cluster, natural leader"); _engine.SwitchToLeaderState(ElectionTerm, ClusterCommandsVersionManager.CurrentClusterMinimalVersion, "I'm the only one in the cluster, so no need for elections, I rule."); return; } if (_engine.RequestSnapshot) { // we aren't allowed to be elected for leadership if we requested a snapshot if (_engine.Log.IsOperationsEnabled) { _engine.Log.Operations("we aren't allowed to be elected for leadership if we requested a snapshot"); } return; } if (IsForcedElection) { CastVoteForSelf(ElectionTerm + 1, "Voting for self in forced elections"); } else { ElectionTerm = ElectionTerm + 1; } foreach (var voter in clusterTopology.Members) { if (voter.Key == _engine.Tag) { continue; // we already voted for ourselves } var candidateAmbassador = new CandidateAmbassador(_engine, this, voter.Key, voter.Value); _voters = new List <CandidateAmbassador>(_voters) { candidateAmbassador }; _engine.AppendStateDisposable(this, candidateAmbassador); // concurrency exception here will dispose the current candidate and it ambassadors candidateAmbassador.Start(); } while (_running && _engine.CurrentState == RachisState.Candidate) { if (_peersWaiting.WaitOne(_engine.Timeout.TimeoutPeriod) == false) { ElectionTerm = _engine.CurrentTerm + 1; _engine.RandomizeTimeout(extend: true); StateChange(); // will wake ambassadors and make them ping peers again continue; } if (_running == false) { return; } _peersWaiting.Reset(); var trialElectionsCount = 1; var realElectionsCount = 1; foreach (var ambassador in _voters) { if (ambassador.NotInTopology) { MoveCandidateToPassive("A leader node has indicated that I'm not in their topology, I was probably kicked out."); return; } if (ambassador.TopologyMismatch) { ambassadorsToRemove.Add(ambassador); continue; } if (ambassador.RealElectionWonAtTerm == ElectionTerm) { realElectionsCount++; } if (ambassador.TrialElectionWonAtTerm == ElectionTerm) { trialElectionsCount++; } } if (StillHavePeers(ambassadorsToRemove) == false) { MoveCandidateToPassive("I'm left alone in the cluster."); return; } var majority = ((_voters.Count + 1) / 2) + 1; if (realElectionsCount >= majority) { ElectionResult = ElectionResult.Won; _running.Lower(); var connections = new Dictionary <string, RemoteConnection>(); var versions = new List <int> { ClusterCommandsVersionManager.MyCommandsVersion }; foreach (var candidateAmbassador in _voters) { if (candidateAmbassador.ClusterCommandsVersion > 0) { versions.Add(candidateAmbassador.ClusterCommandsVersion); } if (candidateAmbassador.TryGetPublishedConnection(out var connection)) { connections[candidateAmbassador.Tag] = connection; } } StateChange(); var minimalVersion = ClusterCommandsVersionManager.GetClusterMinimalVersion(versions, _engine.MaximalVersion); string msg = $"Was elected by {realElectionsCount} nodes for leadership in term {ElectionTerm} with cluster version of {minimalVersion}"; _engine.SwitchToLeaderState(ElectionTerm, minimalVersion, msg, connections); break; } if (RunRealElectionAtTerm != ElectionTerm && trialElectionsCount >= majority) { CastVoteForSelf(ElectionTerm, "Won in the trial elections"); } } } catch (Exception e) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Candidate {_engine.Tag}: Failure during candidacy run with current state of {_engine.CurrentState}", e); } if (_engine.CurrentState == RachisState.Candidate) { // if we are still a candidate, start the candidacy again. _engine.SwitchToCandidateState("An error occurred during the last candidacy: " + e); } else if (_engine.CurrentState != RachisState.Passive) { _engine.Timeout.Start(_engine.SwitchToCandidateStateOnTimeout); } } } finally { try { Dispose(); } catch (Exception) { // nothing to be done here } } }
private void FollowerSteadyState() { var entries = new List <RachisEntry>(); long lastCommit = 0, lastTruncate = 0, lastAcknowledgedIndex = 0; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Entering steady state"); } while (true) { entries.Clear(); using (_engine.ContextPool.AllocateOperationContext(out ClusterOperationContext context)) { using (context.OpenReadTransaction()) { if (_engine.RequestSnapshot) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Snapshot was requested, so we close this follower."); } return; } } _debugRecorder.Record("Wait for entries"); var appendEntries = _connection.Read <AppendEntries>(context); var sp = Stopwatch.StartNew(); if (appendEntries.Term != _engine.CurrentTerm) { _connection.Send(context, new AppendEntriesResponse { CurrentTerm = _engine.CurrentTerm, Message = "The current term that I have " + _engine.CurrentTerm + " doesn't match " + appendEntries.Term, Success = false }); if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got invalid term {appendEntries.Term} while the current term is {_engine.CurrentTerm}, aborting connection..."); } return; } ClusterCommandsVersionManager.SetClusterVersion(appendEntries.MinCommandVersion); _debugRecorder.Record("Got entries"); _engine.Timeout.Defer(_connection.Source); if (appendEntries.EntriesCount != 0) { using (var cts = new CancellationTokenSource()) { var task = Concurrent_SendAppendEntriesPendingToLeaderAsync(cts, _term, appendEntries.PrevLogIndex); try { for (int i = 0; i < appendEntries.EntriesCount; i++) { entries.Add(_connection.ReadRachisEntry(context)); } } finally { cts.Cancel(); task.Wait(CancellationToken.None); } _engine.Timeout.Defer(_connection.Source); } if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got non empty append entries request with {entries.Count} entries. Last: ({entries[entries.Count - 1].Index} - {entries[entries.Count - 1].Flags})" #if DEBUG + $"[{string.Join(" ,", entries.Select(x => x.ToString()))}]" #endif ); } } // don't start write transaction for noop if (lastCommit != appendEntries.LeaderCommit || lastTruncate != appendEntries.TruncateLogBefore || entries.Count != 0) { using (var cts = new CancellationTokenSource()) { // applying the leader state may take a while, we need to ping // the server and let us know that we are still there var task = Concurrent_SendAppendEntriesPendingToLeaderAsync(cts, _term, appendEntries.PrevLogIndex); try { bool hasRemovedFromTopology; (hasRemovedFromTopology, lastAcknowledgedIndex, lastTruncate, lastCommit) = ApplyLeaderStateToLocalState(sp, context, entries, appendEntries); if (hasRemovedFromTopology) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info("Was notified that I was removed from the node topology, will be moving to passive mode now."); } _engine.SetNewState(RachisState.Passive, null, appendEntries.Term, "I was kicked out of the cluster and moved to passive mode"); return; } } catch (RachisInvalidOperationException) { // on raft protocol violation propagate the error and close this follower. throw; } catch (ConcurrencyException) { // the term was changed throw; } catch (Exception e) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Failed to apply leader state to local state with {entries.Count:#,#;;0} entries with leader commit: {appendEntries.LeaderCommit}, term: {appendEntries.Term}. Prev log index: {appendEntries.PrevLogIndex}", e); } } finally { // here we need to wait until the concurrent send pending to leader // is completed to avoid concurrent writes to the leader cts.Cancel(); task.Wait(CancellationToken.None); } } } if (appendEntries.ForceElections) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got a request to become candidate from the leader."); } _engine.SwitchToCandidateState("Was asked to do so by my leader", forced: true); return; } _debugRecorder.Record("Processing entries is completed"); _connection.Send(context, new AppendEntriesResponse { CurrentTerm = _term, LastLogIndex = lastAcknowledgedIndex, Success = true }); if (sp.Elapsed > _engine.ElectionTimeout / 2) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Took a long time to complete the cycle with {entries.Count} entries: {sp.Elapsed}"); } } _engine.Timeout.Defer(_connection.Source); _engine.ReportLeaderTime(appendEntries.TimeAsLeader); _debugRecorder.Record("Cycle done"); _debugRecorder.Start(); } } }
private void Run() { try { try { // Operation may fail, that's why we don't RaiseOrDie _running.Raise(); ElectionTerm = _engine.CurrentTerm; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Candidate {_engine.Tag}: Starting elections"); } ClusterTopology clusterTopology; using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context)) using (context.OpenReadTransaction()) { clusterTopology = _engine.GetTopology(context); } if (clusterTopology.Members.Count == 1) { CastVoteForSelf(ElectionTerm + 1, "Single member cluster, natural leader"); _engine.SwitchToLeaderState(ElectionTerm, ClusterCommandsVersionManager.CurrentClusterMinimalVersion, "I'm the only one in the cluster, so no need for elections, I rule."); return; } if (IsForcedElection) { CastVoteForSelf(ElectionTerm + 1, "Voting for self in forced elections"); } else { ElectionTerm = ElectionTerm + 1; } foreach (var voter in clusterTopology.Members) { if (voter.Key == _engine.Tag) { continue; // we already voted for ourselves } var candidateAmbassador = new CandidateAmbassador(_engine, this, voter.Key, voter.Value, _engine.ClusterCertificate); _voters.Add(candidateAmbassador); try { _engine.AppendStateDisposable(this, candidateAmbassador); } catch (ConcurrencyException) { foreach (var ambassador in _voters) { ambassador.Dispose(); } return; // we lost the election, because someone else changed our state to follower } candidateAmbassador.Start(); } while (_running && _engine.CurrentState == RachisState.Candidate) { if (_peersWaiting.WaitOne(_engine.Timeout.TimeoutPeriod) == false) { ElectionTerm = _engine.CurrentTerm; // timeout? if (IsForcedElection) { CastVoteForSelf(ElectionTerm + 1, "Timeout during forced elections"); } else { ElectionTerm = ElectionTerm + 1; } _engine.RandomizeTimeout(extend: true); StateChange(); // will wake ambassadors and make them ping peers again continue; } if (_running == false) { return; } _peersWaiting.Reset(); bool removedFromTopology = false; var trialElectionsCount = 1; var realElectionsCount = 1; foreach (var ambassador in _voters) { if (ambassador.NotInTopology) { removedFromTopology = true; break; } if (ambassador.RealElectionWonAtTerm == ElectionTerm) { realElectionsCount++; } if (ambassador.TrialElectionWonAtTerm == ElectionTerm) { trialElectionsCount++; } } var majority = ((_voters.Count + 1) / 2) + 1; if (removedFromTopology) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info( $"Candidate {_engine.Tag}: A leader node has indicated that I'm not in their topology, I was probably kicked out. Moving to passive mode"); } _engine.SetNewState(RachisState.Passive, this, _engine.CurrentTerm, "I just learned from the leader that I'm not in their topology, moving to passive state"); break; } if (realElectionsCount >= majority) { ElectionResult = ElectionResult.Won; _running.Lower(); StateChange(); var connections = new Dictionary <string, RemoteConnection>(); var versions = new List <int> { ClusterCommandsVersionManager.MyCommandsVersion }; foreach (var candidateAmbassador in _voters) { connections[candidateAmbassador.Tag] = candidateAmbassador.Connection; if (candidateAmbassador.ClusterCommandsVersion > 0) { versions.Add(candidateAmbassador.ClusterCommandsVersion); } } var minimalVersion = ClusterCommandsVersionManager.GetClusterMinimalVersion(versions, _engine.MaximalVersion); _engine.SwitchToLeaderState(ElectionTerm, minimalVersion, $"Was elected by {realElectionsCount} nodes to leadership in {ElectionTerm} with cluster version of {minimalVersion}", connections); break; } if (RunRealElectionAtTerm != ElectionTerm && trialElectionsCount >= majority) { CastVoteForSelf(ElectionTerm, "Won in the trial elections"); } } } catch (Exception e) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Candidate {_engine.Tag}: Failure during candidacy run with current state of {_engine.CurrentState}", e); } if (_engine.CurrentState == RachisState.Candidate) { // if we are still a candidate, start the candidacy again. _engine.SwitchToCandidateState("An error occured during the last candidacy: " + e); } else if (_engine.CurrentState != RachisState.Passive) { _engine.Timeout.Start(_engine.SwitchToCandidateStateOnTimeout); } } } finally { try { Dispose(); } catch (Exception) { // nothing to be done here } } }