public Task SendNodeUpdateAsync(NodeConnectionInfo node) { try { var currentTopology = raftEngine.CurrentTopology; var requestedTopology = new Topology( currentTopology.TopologyId, currentTopology.AllVotingNodes.Select(n => n.Uri.AbsoluteUri == node.Uri.AbsoluteUri ? node : n).ToList(), currentTopology.NonVotingNodes.Select(n => n.Uri.AbsoluteUri == node.Uri.AbsoluteUri ? node : n).ToList(), currentTopology.PromotableNodes.Select(n => n.Uri.AbsoluteUri == node.Uri.AbsoluteUri ? node : n).ToList()); var command = new TopologyChangeCommand { Completion = new TaskCompletionSource <object>(), Requested = requestedTopology, Previous = currentTopology }; raftEngine.AppendCommand(command); return(command.Completion.Task); } catch (NotLeadingException) { return(SendNodeUpdateInternalAsync(raftEngine.GetLeaderNode(WaitForLeaderTimeoutInSeconds), node)); } }
private void OnTopologyChanged(TopologyChangeCommand tcc) { // if we have any removed servers, we need to know let them know that they have // been removed, we do that by committing the current entry (hopefully they already // have topology change command, so they know they are being removed from the cluster). // This is mostly us being nice neighbors, this isn't required, and the cluster will reject // messages from nodes not considered to be in the cluster. if (tcc.Previous == null) { return; } var removedNodes = tcc.Previous.AllNodeNames.Except(tcc.Requested.AllNodeNames).ToList(); foreach (var removedNode in removedNodes) { var nodeByName = tcc.Previous.GetNodeByName(removedNode); if (nodeByName == null) { continue; } // try sending the latest updates (which include the topology removal entry) SendEntriesToPeer(nodeByName); // at any rate, try sending the disconnection command explicitly, to gracefully shut down the node if we can Engine.Transport.Send(nodeByName, new DisconnectedFromCluster { From = Engine.Name, ClusterTopologyId = Engine.CurrentTopology.TopologyId, Term = Engine.PersistentState.CurrentTerm }); } }
internal Task ModifyTopology(Topology requested) { if (State != RaftEngineState.Leader) { throw new InvalidOperationException("Cannot modify topology from a non leader node, current leader is: " + (CurrentLeader ?? "no leader")); } var tcc = new TopologyChangeCommand { Completion = new TaskCompletionSource <object>(), Requested = requested, Previous = _currentTopology, BufferCommand = false, }; if (Interlocked.CompareExchange(ref _changingTopology, tcc.Completion.Task, null) != null) { throw new InvalidOperationException("Cannot change the cluster topology while another topology change is in progress"); } try { _log.Debug("Topology change started on leader"); StartTopologyChange(tcc); AppendCommand(tcc); return(tcc.Completion.Task); } catch (Exception) { Interlocked.Exchange(ref _changingTopology, null); throw; } }
public void InitializeTopology(NodeConnectionInfo nodeConnection = null, bool isPartOfExistingCluster = false, bool forceCandidateState = false) { var topologyId = Guid.NewGuid(); var topology = new Topology(topologyId, new List <NodeConnectionInfo> { nodeConnection ?? Engine.Options.SelfConnection }, Enumerable.Empty <NodeConnectionInfo>(), Enumerable.Empty <NodeConnectionInfo>()); var tcc = new TopologyChangeCommand { Requested = topology, Previous = isPartOfExistingCluster ? Engine.CurrentTopology : null }; Engine.PersistentState.SetCurrentTopology(tcc.Requested, 0); Engine.StartTopologyChange(tcc); Engine.CommitTopologyChange(tcc); if (isPartOfExistingCluster || forceCandidateState) { Engine.ForceCandidateState(); } else { Engine.CurrentLeader = null; } Log.Info("Initialized Topology: " + topologyId); }
private void OnTopologyChanged(TopologyChangeCommand tcc) { // if we have any removed servers, we need to know let them know that they have // been removed, we do that by committing the current entry (hopefully they already // have topology change command, so they know they are being removed from the cluster). // This is mostly us being nice neighbors, this isn't required, and the cluster will reject // messages from nodes not considered to be in the cluster. if (tcc.Previous == null) { return; } var removedNodes = tcc.Previous.AllNodeNames.Except(tcc.Requested.AllNodeNames).ToList(); foreach (var removedNode in removedNodes) { var nodeByName = tcc.Previous.GetNodeByName(removedNode); if (nodeByName == null) { continue; } // try sending the latest updates (which include the topology removal entry) SendEntriesToPeer(nodeByName); } }
public void Handle(DisconnectedFromCluster req) { if (FromOurTopology(req) == false) { _log.Info("Got a disconnection notification message outside my cluster topology (id: {0}), ignoring", req.ClusterTopologyId); return; } if (req.Term < Engine.PersistentState.CurrentTerm) { _log.Info("Got disconnection notification from an older term, ignoring"); return; } if (req.From != Engine.CurrentLeader) { _log.Info("Got disconnection notification from {0}, who isn't the current leader, ignoring.", req.From); return; } _log.Warn("Got disconnection notification from the leader, clearing topology and moving to idle follower state"); var tcc = new TopologyChangeCommand { Requested = new Topology(req.ClusterTopologyId) }; Engine.PersistentState.SetCurrentTopology(tcc.Requested, 0L); Engine.StartTopologyChange(tcc); Engine.CommitTopologyChange(tcc); Engine.SetState(RaftEngineState.Follower); }
public void CommitTopologyChange(TopologyChangeCommand tcc) { //it is logical that _before_ OnTopologyChanged is fired the topology change task will be complete // - since this task is used to track progress of topoplogy changes in the interface Interlocked.Exchange(ref _changingTopology, null); //if no topology was present and TopologyChangeCommand is issued to just //accept new topology id - then tcc.Previous == null - it means that //shouldRemainInTopology should be true - because there is no removal from topology actually var isRemovedFromTopology = tcc.Requested.Contains(Name) == false && tcc.Previous != null && tcc.Previous.Contains(Name); if (isRemovedFromTopology) { if (_log.IsDebugEnabled) { _log.Debug("This node is being removed from topology, setting its state to follower, it will be idle until a leader will join it to the cluster again"); } CurrentLeader = null; SetState(RaftEngineState.Follower); } else { if (_log.IsInfoEnabled) { _log.Info("Finished applying new topology: {0}{1}", _currentTopology, tcc.Previous == null ? ", Previous topology was null - perhaps it is setting topology for the first time?" : String.Empty); } } OnTopologyChanged(tcc); }
internal Task ModifyTopology(Topology requested) { if (State != RaftEngineState.Leader) { throw new NotLeadingException("Cannot modify topology from a non leader node, current leader is: " + (CurrentLeader ?? "no leader")); } var logEntry = PersistentState.GetLogEntry(CommitIndex); if (logEntry == null) { throw new InvalidOperationException("No log entry for committed for index " + CommitIndex + ", this is probably a brand new cluster with no committed entries or a serious problem"); } if (logEntry.Term != PersistentState.CurrentTerm) { throw new InvalidOperationException("Cannot modify the cluster topology when the committed index " + CommitIndex + " is in term " + logEntry.Term + " but the current term is " + PersistentState.CurrentTerm + ". Wait until the leader finishes committing entries from the current term and try again"); } var tcc = new TopologyChangeCommand { Completion = new TaskCompletionSource <object>(), Requested = requested, Previous = _currentTopology, BufferCommand = false, }; if (Interlocked.CompareExchange(ref _changingTopology, tcc.Completion.Task, null) != null) { throw new InvalidOperationException("Cannot change the cluster topology while another topology change is in progress"); } try { if (_log.IsDebugEnabled) { _log.Debug("Topology change started on leader"); } StartTopologyChange(tcc); AppendCommand(tcc); return(tcc.Completion.Task); } catch (Exception) { Interlocked.Exchange(ref _changingTopology, null); throw; } }
public void InitializeEmptyTopologyWithId(Guid id) { var tcc = new TopologyChangeCommand { Requested = new Topology(id), Previous = Engine.CurrentTopology }; Engine.PersistentState.SetCurrentTopology(tcc.Requested, 0); Engine.StartTopologyChange(tcc); Engine.CommitTopologyChange(tcc); Log.Info("Changed topology id: " + id + " and set the empty cluster topology"); }
private void OnTopologyChanging(TopologyChangeCommand tcc) { var handler = TopologyChanging; if (handler != null) { try { handler(); } catch (Exception e) { _log.Error("Error on raising TopologyChanging event", e); } } }
protected virtual void OnTopologyChanged(TopologyChangeCommand cmd) { _log.Info("OnTopologyChanged() - " + this.Name); var handler = TopologyChanged; if (handler != null) { try { handler(cmd); } catch (Exception e) { _log.Error("Error on raising TopologyChanged event", e); } } }
private static void InitializeTopology(RaftNode node) { var topologyId = Guid.NewGuid(); var topology = new Topology(topologyId, new List <NodeConnectionInfo> { node.RaftEngine.Options.SelfConnection }, Enumerable.Empty <NodeConnectionInfo>(), Enumerable.Empty <NodeConnectionInfo>()); var tcc = new TopologyChangeCommand { Requested = topology }; node.RaftEngine.PersistentState.SetCurrentTopology(tcc.Requested, 0); node.RaftEngine.StartTopologyChange(tcc); node.RaftEngine.CommitTopologyChange(tcc); node.RaftEngine.CurrentLeader = null; }
/// <summary> /// Purpose of this methos is to detect if previous topo has difference with new topo, but node type(voting, promotable) is ignored. /// </summary> /// <param name="command"></param> /// <returns></returns> public static bool HasDifferentNodes(TopologyChangeCommand command) { if (command.Previous == null && command.Requested == null) { return(false); } if (command.Previous == null || command.Requested == null) { return(true); } var prevAllNodes = command.Previous.AllNodes.ToHashSet(); var requestedAllNodes = command.Requested.AllNodes.ToHashSet(); if (prevAllNodes.Count != requestedAllNodes.Count) { return(true); } prevAllNodes.SymmetricExceptWith(requestedAllNodes); return(prevAllNodes.Any()); }
private void HandleTopologyChanges(TopologyChangeCommand command) { if (RaftHelper.HasDifferentNodes(command) == false) { return; } if (command.Previous == null) { HandleClusterConfigurationChanges(); return; } var removedNodeUrls = command .Previous .AllNodes.Select(x => x.Uri.AbsoluteUri) .Except(command.Requested.AllNodes.Select(x => x.Uri.AbsoluteUri)) .ToList(); HandleClusterConfigurationChanges(removedNodeUrls); }
protected bool FromOurTopology(BaseMessage msg) { if (msg.ClusterTopologyId == Engine.CurrentTopology.TopologyId) { return(true); } // if we don't have the same topology id, maybe we have _no_ topology, if that is the case, // we are accepting the new topology id immediately if (Engine.CurrentTopology.TopologyId == Guid.Empty && Engine.CurrentTopology.HasVoters == false) { var tcc = new TopologyChangeCommand { Requested = new Topology(msg.ClusterTopologyId) }; Engine.StartTopologyChange(tcc); Engine.CommitTopologyChange(tcc); return(true); } return(false); }
public virtual AppendEntriesResponse Handle(AppendEntriesRequest req) { var lastLogIndex = Engine.PersistentState.LastLogEntry().Index; if (FromOurTopology(req) == false) { _log.Info("Got an append entries message outside my cluster topology (id: {0}), ignoring", req.ClusterTopologyId); return(new AppendEntriesResponse { Success = false, CurrentTerm = Engine.PersistentState.CurrentTerm, LastLogIndex = lastLogIndex, LeaderId = Engine.CurrentLeader, Message = "Cannot accept append entries from a node outside my cluster. My topology id is: " + Engine.CurrentTopology.TopologyId, From = Engine.Name, ClusterTopologyId = Engine.CurrentTopology.TopologyId, }); } if (req.Term < Engine.PersistentState.CurrentTerm) { var msg = string.Format( "Rejecting append entries because msg term {0} is lower then current term: {1}", req.Term, Engine.PersistentState.CurrentTerm); _log.Info(msg); return(new AppendEntriesResponse { Success = false, CurrentTerm = Engine.PersistentState.CurrentTerm, LastLogIndex = lastLogIndex, LeaderId = Engine.CurrentLeader, Message = msg, From = Engine.Name, ClusterTopologyId = Engine.CurrentTopology.TopologyId, }); } if (req.Term > Engine.PersistentState.CurrentTerm) { Engine.UpdateCurrentTerm(req.Term, req.From); } if (Engine.CurrentLeader == null || req.From.Equals(Engine.CurrentLeader) == false) { Engine.CurrentLeader = req.From; Engine.SetState(RaftEngineState.Follower); } var prevTerm = Engine.PersistentState.TermFor(req.PrevLogIndex) ?? 0; if (prevTerm != req.PrevLogTerm) { var midpointIndex = req.PrevLogIndex / 2; var midpointTerm = Engine.PersistentState.TermFor(midpointIndex) ?? 0; var msg = $"Rejecting append entries because msg previous term {req.PrevLogTerm} is not the same as the persisted current term {prevTerm}" + $" at log index {req.PrevLogIndex}. Midpoint index {midpointIndex}, midpoint term: {midpointTerm}"; _log.Info(msg); return(new AppendEntriesResponse { Success = false, CurrentTerm = Engine.PersistentState.CurrentTerm, LastLogIndex = req.PrevLogIndex, Message = msg, LeaderId = Engine.CurrentLeader, MidpointIndex = midpointIndex, MidpointTerm = midpointTerm, From = Engine.Name, ClusterTopologyId = Engine.CurrentTopology.TopologyId, }); } LastHeartbeatTime = DateTime.UtcNow; LastMessageTime = DateTime.UtcNow; var appendEntriesResponse = new AppendEntriesResponse { Success = true, CurrentTerm = Engine.PersistentState.CurrentTerm, From = Engine.Name, ClusterTopologyId = Engine.CurrentTopology.TopologyId, }; if (req.Entries.Length > 0) { if (_log.IsDebugEnabled) { _log.Debug("Appending log (persistant state), entries count: {0} (node state = {1})", req.Entries.Length, Engine.State); foreach (var logEntry in req.Entries) { _log.Debug("Entry {0} (term {1})", logEntry.Index, logEntry.Term); } } // if is possible that we'll get the same event multiple times (for example, if we took longer than a heartbeat // to process a message). In this case, our log already have the entries in question, and it would be a waste to // truncate the log and re-add them all the time. What we are doing here is to find the next match for index/term // values in our log and in the entries, and then skip over the duplicates. var skip = 0; for (int i = 0; i < req.Entries.Length; i++) { var termForEntry = Engine.PersistentState.TermFor(req.Entries[i].Index) ?? -1; if (termForEntry != req.Entries[i].Term) { break; } skip++; } var topologyChange = req.Entries.Skip(skip).LastOrDefault(x => x.IsTopologyChange == true); if (topologyChange != null) { var command = Engine.PersistentState.CommandSerializer.Deserialize(topologyChange.Data); var topologyChangeCommand = command as TopologyChangeCommand; if (topologyChangeCommand != null && topologyChangeCommand.Requested.AllNodes.Select(x => x.Name).Contains(Engine.Options.SelfConnection.Name) == false) { _log.Warn("Got topology without self, disconnecting from the leader, clearing topology and moving to leader state"); var tcc = new TopologyChangeCommand { Requested = new Topology(Guid.NewGuid(), new[] { Engine.Options.SelfConnection }, new List <NodeConnectionInfo>(), new List <NodeConnectionInfo>()) }; Engine.PersistentState.SetCurrentTopology(tcc.Requested, 0L); Engine.StartTopologyChange(tcc); Engine.CommitTopologyChange(tcc); Engine.SetState(RaftEngineState.Leader); return(new AppendEntriesResponse { Success = true, CurrentTerm = Engine.PersistentState.CurrentTerm, LastLogIndex = lastLogIndex, Message = "Leaving cluster, because received topology from the leader that didn't contain us", From = Engine.Name, ClusterTopologyId = req.ClusterTopologyId, // we send this "older" ID, so the leader won't reject us }); } } if (skip != req.Entries.Length) { Engine.PersistentState.AppendToLog(Engine, req.Entries.Skip(skip), req.PrevLogIndex + skip); } else { // if we skipped the whole thing, this is fine, but let us hint to the leader that we are more // up to date then it thinks var lastReceivedIndex = req.Entries[req.Entries.Length - 1].Index; appendEntriesResponse.MidpointIndex = lastReceivedIndex + (lastLogIndex - lastReceivedIndex) / 2; appendEntriesResponse.MidpointTerm = Engine.PersistentState.TermFor(appendEntriesResponse.MidpointIndex.Value) ?? 0; _log.Info($"Got {req.Entries.Length} entires from index {req.Entries[0].Index} with term {req.Entries[0].Term} skipping all. " + $"Setting midpoint index to {appendEntriesResponse.MidpointIndex} with term {appendEntriesResponse.MidpointTerm}."); } // we consider the latest topology change to be in effect as soon as we see it, even before the // it is committed, see raft spec section 6: // a server always uses the latest con?guration in its log, // regardless of whether the entry is committed if (topologyChange != null) { var command = Engine.PersistentState.CommandSerializer.Deserialize(topologyChange.Data); var topologyChangeCommand = command as TopologyChangeCommand; if (topologyChangeCommand == null) //precaution,should never be true //if this is true --> it is a serious issue and should be fixed immediately! { throw new InvalidOperationException(@"Log entry that is marked with IsTopologyChange should be of type TopologyChangeCommand. Instead, it is of type: " + command.GetType() + ". It is probably a bug!"); } _log.Info("Topology change started (TopologyChangeCommand committed to the log): {0}", topologyChangeCommand.Requested); Engine.PersistentState.SetCurrentTopology(topologyChangeCommand.Requested, topologyChange.Index); Engine.StartTopologyChange(topologyChangeCommand); } } var lastIndex = req.Entries.Length == 0 ? lastLogIndex : req.Entries[req.Entries.Length - 1].Index; try { var nextCommitIndex = Math.Min(req.LeaderCommit, lastIndex); if (nextCommitIndex > Engine.CommitIndex) { CommitEntries(req.Entries, nextCommitIndex); } appendEntriesResponse.LastLogIndex = lastLogIndex; return(appendEntriesResponse); } catch (Exception e) { return(new AppendEntriesResponse { Success = false, CurrentTerm = Engine.PersistentState.CurrentTerm, LastLogIndex = lastLogIndex, Message = "Failed to apply new entries. Reason: " + e, From = Engine.Name, ClusterTopologyId = Engine.CurrentTopology.TopologyId, }); } }
public override InstallSnapshotResponse Handle(MessageContext context, InstallSnapshotRequest req, Stream stream) { if (_installingSnapshot != null) { return(new InstallSnapshotResponse { Success = false, Message = "Cannot install snapshot because we are already installing a snapshot", CurrentTerm = Engine.PersistentState.CurrentTerm, From = Engine.Name, ClusterTopologyId = Engine.CurrentTopology.TopologyId, LastLogIndex = Engine.PersistentState.LastLogEntry().Index }); } if (FromOurTopology(req) == false) { _log.Info("Got an install snapshot message outside my cluster topology (id: {0}), ignoring", req.ClusterTopologyId); return(new InstallSnapshotResponse { Success = false, Message = "Cannot install snapshot because the cluster topology id doesn't match, mine is: " + Engine.CurrentTopology.TopologyId, CurrentTerm = Engine.PersistentState.CurrentTerm, From = Engine.Name, ClusterTopologyId = Engine.CurrentTopology.TopologyId, LastLogIndex = Engine.PersistentState.LastLogEntry().Index }); } var lastLogEntry = Engine.PersistentState.LastLogEntry(); if (req.Term < lastLogEntry.Term || req.LastIncludedIndex < lastLogEntry.Index) { stream.Dispose(); return(new InstallSnapshotResponse { From = Engine.Name, ClusterTopologyId = Engine.CurrentTopology.TopologyId, CurrentTerm = lastLogEntry.Term, LastLogIndex = lastLogEntry.Index, Message = string.Format("Snapshot is too old (term {0} index {1}) while we have (term {2} index {3})", req.Term, req.LastIncludedIndex, lastLogEntry.Term, lastLogEntry.Index), Success = false }); } _log.Info("Received InstallSnapshotRequest from {0} until term {1} / {2}", req.From, req.LastIncludedTerm, req.LastIncludedIndex); Engine.OnSnapshotInstallationStarted(); // this can be a long running task _installingSnapshot = Task.Run(() => { try { Engine.StateMachine.ApplySnapshot(req.LastIncludedTerm, req.LastIncludedIndex, stream); Engine.PersistentState.MarkSnapshotFor(req.LastIncludedIndex, req.LastIncludedTerm, int.MaxValue); Engine.PersistentState.SetCurrentTopology(req.Topology, req.LastIncludedIndex); var tcc = new TopologyChangeCommand { Requested = req.Topology }; Engine.StartTopologyChange(tcc); Engine.CommitTopologyChange(tcc); } catch (Exception e) { _log.Warn(string.Format("Failed to install snapshot term {0} index {1}", req.LastIncludedIndex, req.LastIncludedIndex), e); context.ExecuteInEventLoop(() => { _installingSnapshot = null; }); } // we are doing it this way to ensure that we are single threaded context.ExecuteInEventLoop(() => { Engine.UpdateCurrentTerm(req.Term, req.From); // implicitly put us in follower state _log.Info("Updating the commit index to the snapshot last included index of {0}", req.LastIncludedIndex); Engine.OnSnapshotInstallationEnded(req.Term); context.Reply(new InstallSnapshotResponse { From = Engine.Name, ClusterTopologyId = Engine.CurrentTopology.TopologyId, CurrentTerm = req.Term, LastLogIndex = req.LastIncludedIndex, Success = true }); }); }); return(null); }
public void StartTopologyChange(TopologyChangeCommand tcc) { Interlocked.Exchange(ref _currentTopology, tcc.Requested); Interlocked.Exchange(ref _changingTopology, new TaskCompletionSource <object>().Task); OnTopologyChanging(tcc); }