private (bool HasRemovedFromTopology, long LastAcknowledgedIndex, long LastTruncate, long LastCommit) ApplyLeaderStateToLocalState(Stopwatch sp, ClusterOperationContext context, List <RachisEntry> entries, AppendEntries appendEntries) { long lastTruncate; long lastCommit; bool removedFromTopology = false; // we start the tx after we finished reading from the network if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Ready to start tx in {sp.Elapsed}"); } using (var tx = context.OpenWriteTransaction()) { _engine.ValidateTerm(_term); if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Tx running in {sp.Elapsed}"); } if (entries.Count > 0) { var(lastTopology, lastTopologyIndex) = _engine.AppendToLog(context, entries); using (lastTopology) { if (lastTopology != null) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Topology changed to {lastTopology}"); } var topology = JsonDeserializationRachis <ClusterTopology> .Deserialize(lastTopology); if (topology.Members.ContainsKey(_engine.Tag) || topology.Promotables.ContainsKey(_engine.Tag) || topology.Watchers.ContainsKey(_engine.Tag)) { RachisConsensus.SetTopology(_engine, context, topology); } else { removedFromTopology = true; _engine.ClearAppendedEntriesAfter(context, lastTopologyIndex); } } } } var lastEntryIndexToCommit = Math.Min( _engine.GetLastEntryIndex(context), appendEntries.LeaderCommit); var lastAppliedIndex = _engine.GetLastCommitIndex(context); var lastAppliedTerm = _engine.GetTermFor(context, lastEntryIndexToCommit); // we start to commit only after we have any log with a term of the current leader if (lastEntryIndexToCommit > lastAppliedIndex && lastAppliedTerm == appendEntries.Term) { lastAppliedIndex = _engine.Apply(context, lastEntryIndexToCommit, null, sp); } lastTruncate = Math.Min(appendEntries.TruncateLogBefore, lastAppliedIndex); _engine.TruncateLogBefore(context, lastTruncate); lastCommit = lastAppliedIndex; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Ready to commit in {sp.Elapsed}"); } tx.Commit(); } if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Processing entries request with {entries.Count} entries took {sp.Elapsed}"); } var lastAcknowledgedIndex = entries.Count == 0 ? appendEntries.PrevLogIndex : entries[entries.Count - 1].Index; return(HasRemovedFromTopology : removedFromTopology, LastAcknowledgedIndex : lastAcknowledgedIndex, LastTruncate : lastTruncate, LastCommit : lastCommit); }
/// <summary> /// This is expected to run for a long time, and it cannot leak exceptions /// </summary> private void Run() { try { var handles = new WaitHandle[] { _newEntry, _voterResponded, _promotableUpdated, _shutdownRequested }; var noopCmd = new DynamicJsonValue { ["Command"] = "noop" }; using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context)) using (var tx = context.OpenWriteTransaction()) using (var cmd = context.ReadObject(noopCmd, "noop-cmd")) { _engine.InsertToLeaderLog(context, Term, cmd, RachisEntryFlags.Noop); tx.Commit(); } _newEntry.Set(); //This is so the noop would register right away while (_running) { switch (WaitHandle.WaitAny(handles, _engine.ElectionTimeout)) { case 0: // new entry _newEntry.Reset(); // release any waiting ambassadors to send immediately TaskExecutor.CompleteAndReplace(ref _newEntriesArrived); if (_voters.Count == 0) { goto case 1; } break; case 1: // voter responded _voterResponded.Reset(); OnVoterConfirmation(); break; case 2: // promotable updated _promotableUpdated.Reset(); CheckPromotables(); break; case WaitHandle.WaitTimeout: break; case 3: // shutdown requested if (_engine.Log.IsInfoEnabled && _voters.Count != 0) { _engine.Log.Info($"{ToString()}: shutting down"); } _running.Lower(); return; } EnsureThatWeHaveLeadership(VotersMajority); _engine.ReportLeaderTime(LeaderShipDuration); // don't trancate if we are disposing an old peer // otherwise he would not recieve notification that he was // kick out of the cluster if (_previousPeersWereDisposed > 0) // Not Interlocked, because the race here is not interesting. { continue; } var lowestIndexInEntireCluster = GetLowestIndexInEntireCluster(); if (lowestIndexInEntireCluster != LowestIndexInEntireCluster) { using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context)) using (context.OpenWriteTransaction()) { _engine.TruncateLogBefore(context, lowestIndexInEntireCluster); LowestIndexInEntireCluster = lowestIndexInEntireCluster; context.Transaction.Commit(); } } } } catch (Exception e) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info("Error when running leader behavior", e); } if (e is VoronErrorException) { _engine.Notify(AlertRaised.Create( null, "Error when running leader behavior", e.Message, AlertType.ClusterTopologyWarning, NotificationSeverity.Error, details: new ExceptionDetails(e))); } try { _engine.SwitchToCandidateState("An error occurred during our leadership." + Environment.NewLine + e); } catch (Exception e2) { if (_engine.Log.IsOperationsEnabled) { _engine.Log.Operations("After leadership failure, could not setup switch to candidate state", e2); } } } }
private void FollowerSteadyState() { var entries = new List <RachisEntry>(); long lastCommit = 0, lastTruncate = 0; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Entering steady state"); } while (true) { entries.Clear(); using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context)) { var appendEntries = _connection.Read <AppendEntries>(context); if (appendEntries.Term != _engine.CurrentTerm) { _connection.Send(context, new AppendEntriesResponse { CurrentTerm = _engine.CurrentTerm, Message = "The current term that I have " + _engine.CurrentTerm + " doesn't match " + appendEntries.Term, Success = false }); if (_engine.Log.IsInfoEnabled && entries.Count > 0) { _engine.Log.Info($"Follower {_engine.Tag}: Got invalid term {appendEntries.Term} while the current term is {_engine.CurrentTerm}, aborting connection..."); } return; } _engine.Timeout.Defer(_connection.Source); var sp = Stopwatch.StartNew(); if (appendEntries.EntriesCount != 0) { for (int i = 0; i < appendEntries.EntriesCount; i++) { entries.Add(_connection.ReadRachisEntry(context)); _engine.Timeout.Defer(_connection.Source); } if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Got non empty append entries request with {entries.Count} entries. Last: ({entries[entries.Count - 1].Index} - {entries[entries.Count - 1].Flags})" #if DEBUG + $"[{string.Join(" ,", entries.Select(x => x.ToString()))}]" #endif ); } } var lastLogIndex = appendEntries.PrevLogIndex; // don't start write transaction fro noop if (lastCommit != appendEntries.LeaderCommit || lastTruncate != appendEntries.TruncateLogBefore || entries.Count != 0) { bool removedFromTopology = false; // we start the tx after we finished reading from the network if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Ready to start tx in {sp.Elapsed}"); } using (var tx = context.OpenWriteTransaction()) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Tx running in {sp.Elapsed}"); } if (entries.Count > 0) { using (var lastTopology = _engine.AppendToLog(context, entries)) { if (lastTopology != null) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Topology changed to {lastTopology}"); } var topology = JsonDeserializationRachis <ClusterTopology> .Deserialize(lastTopology); if (topology.Members.ContainsKey(_engine.Tag) || topology.Promotables.ContainsKey(_engine.Tag) || topology.Watchers.ContainsKey(_engine.Tag)) { RachisConsensus.SetTopology(_engine, context, topology); } else { removedFromTopology = true; } } } } lastLogIndex = _engine.GetLastEntryIndex(context); var lastEntryIndexToCommit = Math.Min( lastLogIndex, appendEntries.LeaderCommit); var lastAppliedIndex = _engine.GetLastCommitIndex(context); if (lastEntryIndexToCommit > lastAppliedIndex) { _engine.Apply(context, lastEntryIndexToCommit, null); } lastTruncate = Math.Min(appendEntries.TruncateLogBefore, lastEntryIndexToCommit); _engine.TruncateLogBefore(context, lastTruncate); lastCommit = lastEntryIndexToCommit; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Ready to commit in {sp.Elapsed}"); } tx.Commit(); } if (_engine.Log.IsInfoEnabled && entries.Count > 0) { _engine.Log.Info($"Follower {_engine.Tag}: Processing entries request with {entries.Count} entries took {sp.Elapsed}"); } if (removedFromTopology) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info("Was notified that I was removed from the node topoloyg, will be moving to passive mode now."); } _engine.SetNewState(RachisState.Passive, null, appendEntries.Term, "I was kicked out of the cluster and moved to passive mode"); return; } } if (appendEntries.ForceElections) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Got a request to become candidate from the leader."); } _engine.SwitchToCandidateState("Was asked to do so by my leader", forced: true); return; } _connection.Send(context, new AppendEntriesResponse { CurrentTerm = _engine.CurrentTerm, LastLogIndex = lastLogIndex, Success = true }); _engine.Timeout.Defer(_connection.Source); _engine.ReportLeaderTime(appendEntries.TimeAsLeader); } } }