private void FollowerSteadyState() { var entries = new List <RachisEntry>(); long lastCommit = 0, lastTruncate = 0, lastAcknowledgedIndex = 0; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Entering steady state"); } while (true) { entries.Clear(); using (_engine.ContextPool.AllocateOperationContext(out ClusterOperationContext context)) { using (context.OpenReadTransaction()) { if (_engine.RequestSnapshot) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Snapshot was requested, so we close this follower."); } return; } } _debugRecorder.Record("Wait for entries"); var appendEntries = _connection.Read <AppendEntries>(context); var sp = Stopwatch.StartNew(); if (appendEntries.Term != _engine.CurrentTerm) { _connection.Send(context, new AppendEntriesResponse { CurrentTerm = _engine.CurrentTerm, Message = "The current term that I have " + _engine.CurrentTerm + " doesn't match " + appendEntries.Term, Success = false }); if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got invalid term {appendEntries.Term} while the current term is {_engine.CurrentTerm}, aborting connection..."); } return; } ClusterCommandsVersionManager.SetClusterVersion(appendEntries.MinCommandVersion); _debugRecorder.Record("Got entries"); _engine.Timeout.Defer(_connection.Source); if (appendEntries.EntriesCount != 0) { using (var cts = new CancellationTokenSource()) { var task = Concurrent_SendAppendEntriesPendingToLeaderAsync(cts, _term, appendEntries.PrevLogIndex); try { for (int i = 0; i < appendEntries.EntriesCount; i++) { entries.Add(_connection.ReadRachisEntry(context)); } } finally { cts.Cancel(); task.Wait(CancellationToken.None); } _engine.Timeout.Defer(_connection.Source); } if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got non empty append entries request with {entries.Count} entries. Last: ({entries[entries.Count - 1].Index} - {entries[entries.Count - 1].Flags})" #if DEBUG + $"[{string.Join(" ,", entries.Select(x => x.ToString()))}]" #endif ); } } // don't start write transaction for noop if (lastCommit != appendEntries.LeaderCommit || lastTruncate != appendEntries.TruncateLogBefore || entries.Count != 0) { using (var cts = new CancellationTokenSource()) { // applying the leader state may take a while, we need to ping // the server and let us know that we are still there var task = Concurrent_SendAppendEntriesPendingToLeaderAsync(cts, _term, appendEntries.PrevLogIndex); try { bool hasRemovedFromTopology; (hasRemovedFromTopology, lastAcknowledgedIndex, lastTruncate, lastCommit) = ApplyLeaderStateToLocalState(sp, context, entries, appendEntries); if (hasRemovedFromTopology) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info("Was notified that I was removed from the node topology, will be moving to passive mode now."); } _engine.SetNewState(RachisState.Passive, null, appendEntries.Term, "I was kicked out of the cluster and moved to passive mode"); return; } } catch (RachisInvalidOperationException) { // on raft protocol violation propagate the error and close this follower. throw; } catch (ConcurrencyException) { // the term was changed throw; } catch (Exception e) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Failed to apply leader state to local state with {entries.Count:#,#;;0} entries with leader commit: {appendEntries.LeaderCommit}, term: {appendEntries.Term}. Prev log index: {appendEntries.PrevLogIndex}", e); } } finally { // here we need to wait until the concurrent send pending to leader // is completed to avoid concurrent writes to the leader cts.Cancel(); task.Wait(CancellationToken.None); } } } if (appendEntries.ForceElections) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got a request to become candidate from the leader."); } _engine.SwitchToCandidateState("Was asked to do so by my leader", forced: true); return; } _debugRecorder.Record("Processing entries is completed"); _connection.Send(context, new AppendEntriesResponse { CurrentTerm = _term, LastLogIndex = lastAcknowledgedIndex, Success = true }); if (sp.Elapsed > _engine.ElectionTimeout / 2) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Took a long time to complete the cycle with {entries.Count} entries: {sp.Elapsed}"); } } _engine.Timeout.Defer(_connection.Source); _engine.ReportLeaderTime(appendEntries.TimeAsLeader); _debugRecorder.Record("Cycle done"); _debugRecorder.Start(); } } }
/// <summary> /// This is expected to run for a long time, and it cannot leak exceptions /// </summary> private void Run() { try { var handles = new WaitHandle[] { _newEntry, _voterResponded, _promotableUpdated, _shutdownRequested }; var noopCmd = new DynamicJsonValue { ["Command"] = "noop" }; using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context)) using (var tx = context.OpenWriteTransaction()) using (var cmd = context.ReadObject(noopCmd, "noop-cmd")) { _engine.InsertToLeaderLog(context, Term, cmd, RachisEntryFlags.Noop); tx.Commit(); } _newEntry.Set(); //This is so the noop would register right away while (_running) { switch (WaitHandle.WaitAny(handles, _engine.ElectionTimeout)) { case 0: // new entry _newEntry.Reset(); // release any waiting ambassadors to send immediately TaskExecutor.CompleteAndReplace(ref _newEntriesArrived); if (_voters.Count == 0) { goto case 1; } break; case 1: // voter responded _voterResponded.Reset(); OnVoterConfirmation(); break; case 2: // promotable updated _promotableUpdated.Reset(); CheckPromotables(); break; case WaitHandle.WaitTimeout: break; case 3: // shutdown requested if (_engine.Log.IsInfoEnabled && _voters.Count != 0) { _engine.Log.Info($"{ToString()}: shutting down"); } _running.Lower(); return; } EnsureThatWeHaveLeadership(VotersMajority); _engine.ReportLeaderTime(LeaderShipDuration); // don't trancate if we are disposing an old peer // otherwise he would not recieve notification that he was // kick out of the cluster if (_previousPeersWereDisposed > 0) // Not Interlocked, because the race here is not interesting. { continue; } var lowestIndexInEntireCluster = GetLowestIndexInEntireCluster(); if (lowestIndexInEntireCluster != LowestIndexInEntireCluster) { using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context)) using (context.OpenWriteTransaction()) { _engine.TruncateLogBefore(context, lowestIndexInEntireCluster); LowestIndexInEntireCluster = lowestIndexInEntireCluster; context.Transaction.Commit(); } } } } catch (Exception e) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info("Error when running leader behavior", e); } if (e is VoronErrorException) { _engine.Notify(AlertRaised.Create( null, "Error when running leader behavior", e.Message, AlertType.ClusterTopologyWarning, NotificationSeverity.Error, details: new ExceptionDetails(e))); } try { _engine.SwitchToCandidateState("An error occurred during our leadership." + Environment.NewLine + e); } catch (Exception e2) { if (_engine.Log.IsOperationsEnabled) { _engine.Log.Operations("After leadership failure, could not setup switch to candidate state", e2); } } } }
private void FollowerSteadyState() { var entries = new List <RachisEntry>(); long lastCommit = 0, lastTruncate = 0; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Entering steady state"); } while (true) { entries.Clear(); using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context)) { var appendEntries = _connection.Read <AppendEntries>(context); if (appendEntries.Term != _engine.CurrentTerm) { _connection.Send(context, new AppendEntriesResponse { CurrentTerm = _engine.CurrentTerm, Message = "The current term that I have " + _engine.CurrentTerm + " doesn't match " + appendEntries.Term, Success = false }); if (_engine.Log.IsInfoEnabled && entries.Count > 0) { _engine.Log.Info($"Follower {_engine.Tag}: Got invalid term {appendEntries.Term} while the current term is {_engine.CurrentTerm}, aborting connection..."); } return; } _engine.Timeout.Defer(_connection.Source); var sp = Stopwatch.StartNew(); if (appendEntries.EntriesCount != 0) { for (int i = 0; i < appendEntries.EntriesCount; i++) { entries.Add(_connection.ReadRachisEntry(context)); _engine.Timeout.Defer(_connection.Source); } if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Got non empty append entries request with {entries.Count} entries. Last: ({entries[entries.Count - 1].Index} - {entries[entries.Count - 1].Flags})" #if DEBUG + $"[{string.Join(" ,", entries.Select(x => x.ToString()))}]" #endif ); } } var lastLogIndex = appendEntries.PrevLogIndex; // don't start write transaction fro noop if (lastCommit != appendEntries.LeaderCommit || lastTruncate != appendEntries.TruncateLogBefore || entries.Count != 0) { bool removedFromTopology = false; // we start the tx after we finished reading from the network if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Ready to start tx in {sp.Elapsed}"); } using (var tx = context.OpenWriteTransaction()) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Tx running in {sp.Elapsed}"); } if (entries.Count > 0) { using (var lastTopology = _engine.AppendToLog(context, entries)) { if (lastTopology != null) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Topology changed to {lastTopology}"); } var topology = JsonDeserializationRachis <ClusterTopology> .Deserialize(lastTopology); if (topology.Members.ContainsKey(_engine.Tag) || topology.Promotables.ContainsKey(_engine.Tag) || topology.Watchers.ContainsKey(_engine.Tag)) { RachisConsensus.SetTopology(_engine, context, topology); } else { removedFromTopology = true; } } } } lastLogIndex = _engine.GetLastEntryIndex(context); var lastEntryIndexToCommit = Math.Min( lastLogIndex, appendEntries.LeaderCommit); var lastAppliedIndex = _engine.GetLastCommitIndex(context); if (lastEntryIndexToCommit > lastAppliedIndex) { _engine.Apply(context, lastEntryIndexToCommit, null); } lastTruncate = Math.Min(appendEntries.TruncateLogBefore, lastEntryIndexToCommit); _engine.TruncateLogBefore(context, lastTruncate); lastCommit = lastEntryIndexToCommit; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Ready to commit in {sp.Elapsed}"); } tx.Commit(); } if (_engine.Log.IsInfoEnabled && entries.Count > 0) { _engine.Log.Info($"Follower {_engine.Tag}: Processing entries request with {entries.Count} entries took {sp.Elapsed}"); } if (removedFromTopology) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info("Was notified that I was removed from the node topoloyg, will be moving to passive mode now."); } _engine.SetNewState(RachisState.Passive, null, appendEntries.Term, "I was kicked out of the cluster and moved to passive mode"); return; } } if (appendEntries.ForceElections) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Got a request to become candidate from the leader."); } _engine.SwitchToCandidateState("Was asked to do so by my leader", forced: true); return; } _connection.Send(context, new AppendEntriesResponse { CurrentTerm = _engine.CurrentTerm, LastLogIndex = lastLogIndex, Success = true }); _engine.Timeout.Defer(_connection.Source); _engine.ReportLeaderTime(appendEntries.TimeAsLeader); } } }