Esempio n. 1
0
		private void OnTopologyChanged(TopologyChangeCommand tcc)
		{
			// if we have any removed servers, we need to know let them know that they have
			// been removed, we do that by committing the current entry (hopefully they already
			// have topology change command, so they know they are being removed from the cluster).
			// This is mostly us being nice neighbors, this isn't required, and the cluster will reject
			// messages from nodes not considered to be in the cluster.
			if (tcc.Previous == null)
				return;

			var removedNodes = tcc.Previous.AllNodeNames.Except(tcc.Requested.AllNodeNames).ToList();
			foreach (var removedNode in removedNodes)
			{
				var nodeByName = tcc.Previous.GetNodeByName(removedNode);
				if (nodeByName == null)
					continue;
				// try sending the latest updates (which include the topology removal entry)
				SendEntriesToPeer(nodeByName);
				// at any rate, try sending the disconnection command explicitly, to gracefully shut down the node if we can
				Engine.Transport.Send(nodeByName, new DisconnectedFromCluster
				{
					From = Engine.Name,
					ClusterTopologyId = Engine.CurrentTopology.TopologyId,
					Term = Engine.PersistentState.CurrentTerm
				});
			}
		}
	    protected bool FromOurTopology(BaseMessage msg)
	    {
	        if (msg.ClusterTopologyId == Engine.CurrentTopology.TopologyId)
	            return true;

            // if we don't have the same topology id, maybe we have _no_ topology, if that is the case,
			// we are accepting the new topology id immediately
			if (Engine.CurrentTopology.TopologyId == Guid.Empty && 
				Engine.CurrentTopology.HasVoters == false)
		    {
			    var tcc = new TopologyChangeCommand
			    {
				    Requested = new Topology(msg.ClusterTopologyId)
			    };

			    Engine.StartTopologyChange(tcc);
				Engine.CommitTopologyChange(tcc);
			    return true;
		    }

	        return false;
	    }
Esempio n. 3
0
		internal void StartTopologyChange(TopologyChangeCommand tcc)
		{			
			Interlocked.Exchange(ref _currentTopology, tcc.Requested);
			Interlocked.Exchange(ref _changingTopology, new TaskCompletionSource<object>().Task);
			OnTopologyChanging(tcc);
		}
Esempio n. 4
0
		private void OnTopologyChanging(TopologyChangeCommand tcc)
		{
			var handler = TopologyChanging;
			if (handler != null)
			{
				try
				{
					handler();
				}
				catch (Exception e)
				{
					_log.Error("Error on raising TopologyChanging event", e);
				}
			}
		}
Esempio n. 5
0
		protected virtual void OnTopologyChanged(TopologyChangeCommand cmd)
		{
			_log.Info ("OnTopologyChanged() - " + this.Name);
			var handler = TopologyChanged;
			if (handler != null)
			{
				try
				{
					handler(cmd);
				}
				catch (Exception e)
				{
					_log.Error("Error on raising TopologyChanged event", e);
				}
			}
		}
Esempio n. 6
0
		public void CommitTopologyChange(TopologyChangeCommand tcc)
		{
			//it is logical that _before_ OnTopologyChanged is fired the topology change task will be complete
			// - since this task is used to track progress of topoplogy changes in the interface
			Interlocked.Exchange(ref _changingTopology, null);

			//if no topology was present and TopologyChangeCommand is issued to just
			//accept new topology id - then tcc.Previous == null - it means that 
			//shouldRemainInTopology should be true - because there is no removal from topology actually
			var isRemovedFromTopology = tcc.Requested.Contains(Name) == false && 
										 tcc.Previous != null && 
										 tcc.Previous.Contains(Name);
			if (isRemovedFromTopology)
			{
				_log.Debug("This node is being removed from topology, setting its state to follower, it will be idle until a leader will join it to the cluster again");
				CurrentLeader = null;

				SetState(RaftEngineState.Follower);
				return;
			}

			if (_log.IsInfoEnabled)
			{
				_log.Info("Finished applying new topology: {0}{1}", _currentTopology,
					tcc.Previous == null ? ", Previous topology was null - perhaps it is setting topology for the first time?" : String.Empty);
			}
				
			OnTopologyChanged(tcc);
		}
Esempio n. 7
0
		internal Task ModifyTopology(Topology requested)
		{
			if (State != RaftEngineState.Leader)
				throw new InvalidOperationException("Cannot modify topology from a non leader node, current leader is: " +
													(CurrentLeader ?? "no leader"));

			var tcc = new TopologyChangeCommand
				{
					Completion = new TaskCompletionSource<object>(),
					Requested = requested,
					Previous = _currentTopology,
					BufferCommand = false,
				};

			if (Interlocked.CompareExchange(ref _changingTopology, tcc.Completion.Task, null) != null)
				throw new InvalidOperationException("Cannot change the cluster topology while another topology change is in progress");

			try
			{
				_log.Debug("Topology change started on leader");
				StartTopologyChange(tcc);
				AppendCommand(tcc);
				return tcc.Completion.Task;
			}
			catch (Exception)
			{
				Interlocked.Exchange(ref _changingTopology, null);
				throw;
			}
		}
		public void Handle(DisconnectedFromCluster req)
		{
		    if (FromOurTopology(req) == false)
		    {
		        _log.Info("Got a disconnection notification message outside my cluster topology (id: {0}), ignoring", req.ClusterTopologyId);
		        return;
		    }
			if (req.Term < Engine.PersistentState.CurrentTerm)
			{
				_log.Info("Got disconnection notification from an older term, ignoring");
				return;
			}
			if (req.From != Engine.CurrentLeader)
			{
				_log.Info("Got disconnection notification from {0}, who isn't the current leader, ignoring.",
					req.From);
				return;
			}
			_log.Warn("Got disconnection notification  from the leader, clearing topology and moving to idle follower state");
			var tcc = new TopologyChangeCommand
			{
				Requested = new Topology(req.ClusterTopologyId)
			};
			Engine.PersistentState.SetCurrentTopology(tcc.Requested, 0L);
			Engine.StartTopologyChange(tcc);
			Engine.CommitTopologyChange(tcc);
			Engine.SetState(RaftEngineState.Follower);
		}
		public override InstallSnapshotResponse Handle(MessageContext context, InstallSnapshotRequest req, Stream stream)
		{
			if (_installingSnapshot != null)
			{
                return new InstallSnapshotResponse
                {
                    Success = false,
                    Message = "Cannot install snapshot because we are already installing a snapshot",
                    CurrentTerm = Engine.PersistentState.CurrentTerm,
                    From = Engine.Name,
                    ClusterTopologyId = Engine.CurrentTopology.TopologyId,
                    LastLogIndex = Engine.PersistentState.LastLogEntry().Index
                };
			}


            if (FromOurTopology(req) == false)
            {
                _log.Info("Got an install snapshot message outside my cluster topology (id: {0}), ignoring", req.ClusterTopologyId);

                return new InstallSnapshotResponse
                {
                    Success = false,
                    Message = "Cannot install snapshot because the cluster topology id doesn't match, mine is: " + Engine.CurrentTopology.TopologyId,
                    CurrentTerm = Engine.PersistentState.CurrentTerm,
                    From = Engine.Name,
                    ClusterTopologyId = Engine.CurrentTopology.TopologyId,
                    LastLogIndex = Engine.PersistentState.LastLogEntry().Index
                };
            }

            var lastLogEntry = Engine.PersistentState.LastLogEntry();
            if (req.Term < lastLogEntry.Term || req.LastIncludedIndex < lastLogEntry.Index)
			{
				stream.Dispose();

				return new InstallSnapshotResponse
				{
					From = Engine.Name,
					ClusterTopologyId = Engine.CurrentTopology.TopologyId,
					CurrentTerm = lastLogEntry.Term,
					LastLogIndex = lastLogEntry.Index,
					Message = string.Format("Snapshot is too old (term {0} index {1}) while we have (term {2} index {3})",
						req.Term, req.LastIncludedIndex, lastLogEntry.Term, lastLogEntry.Index),
					Success = false
				};
			}

			_log.Info("Received InstallSnapshotRequest from {0} until term {1} / {2}", req.From, req.LastIncludedTerm, req.LastIncludedIndex);

			Engine.OnSnapshotInstallationStarted();
			
			// this can be a long running task
			_installingSnapshot = Task.Run(() =>
			{
				try
				{
					Engine.StateMachine.ApplySnapshot(req.LastIncludedTerm, req.LastIncludedIndex, stream);
					Engine.PersistentState.MarkSnapshotFor(req.LastIncludedIndex, req.LastIncludedTerm, int.MaxValue);
					Engine.PersistentState.SetCurrentTopology(req.Topology, req.LastIncludedIndex);
					var tcc = new TopologyChangeCommand { Requested = req.Topology };
					Engine.StartTopologyChange(tcc);
					Engine.CommitTopologyChange(tcc);
				}
				catch (Exception e)
				{
					_log.Warn(string.Format("Failed to install snapshot term {0} index {1}", req.LastIncludedIndex, req.LastIncludedIndex), e);
					context.ExecuteInEventLoop(() =>
					{
						_installingSnapshot = null;
					});
				}

				// we are doing it this way to ensure that we are single threaded
				context.ExecuteInEventLoop(() =>
				{
					Engine.UpdateCurrentTerm(req.Term, req.From); // implicitly put us in follower state
					_log.Info("Updating the commit index to the snapshot last included index of {0}", req.LastIncludedIndex);
					Engine.OnSnapshotInstallationEnded(req.Term);

					context.Reply(new InstallSnapshotResponse
					{
						From = Engine.Name,
						ClusterTopologyId = Engine.CurrentTopology.TopologyId,
						CurrentTerm = req.Term,
						LastLogIndex = req.LastIncludedIndex,
						Success = true
					});
				});
			});

			return null;
		}