private void ChangeMembershipShardwide(MembershipChangeArgs args) { Message message = new Message(); message.NeedsResponse = false; message.MessageType = MessageType.MembershipOperation; //Since, in order to handle deadlock scenarios, the message cannot be sent to the local node, // this method is being called in its stead. ((LocalShard)_shard).OnMembershipChanged(args); message.Payload = args; IList <Server> activeList = new List <Server>(); foreach (var node in _shard.ActiveChannelsList) { if (!node.Address.Equals(_context.LocalAddress)) { activeList.Add(node); } } ShardMulticastRequest <ResponseCollection <object>, object> request = _shard.CreateMulticastRequest <ResponseCollection <object>, object>(activeList, message); IAsyncResult asyncResult = request.BeginExecute(); request.EndExecute(asyncResult); }
/// <summary> /// We need to ask the current primary to quit taking write ops /// so that the left over ops can first be replicated /// before the actual election mechanism begins. /// </summary> /// <returns></returns> private bool RequestPrimaryToStopOperations() { ShardConfiguration sConfg = null; bool endResult = false; if (_clusterConfigMgr != null) { sConfg = _clusterConfigMgr.GetShardConfiguration(_context.LocalShardName); } if (sConfg != null) { MembershipChangeArgs args = new MembershipChangeArgs(); args.ChangeType = MembershipChangeArgs.MembershipChangeType.RestrictPrimary; args.ServerName = _context.LocalAddress; args.ElectionId = _latestMembership.ElectionId; DatabaseMessage msg = new DatabaseMessage(); msg.Payload = args; msg.NeedsResponse = true; msg.OpCode = OpCode.RestrictPrimary; msg.MessageType = MessageType.DBOperation; ShardRequestBase <bool> request = _shard.CreateUnicastRequest <bool>(new Server(new Address(_latestMembership.Primary.Name, sConfg.Port), Status.Running), msg); IAsyncResult result = request.BeginExecute(); endResult = request.EndExecute(result); if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsInfoEnabled) { LoggerManager.Instance.ShardLogger.Info("MembershipMgr.RequestPrimaryToStopOperations()", "Requested primary to stop taking write operations. Primary response: " + endResult.ToString()); } } return(endResult); }
public object Clone() { MembershipChangeArgs args = new MembershipChangeArgs(); args.ServerName = ServerName; args.ElectionId = ElectionId; args.ChangeType = ChangeType; return(args); }
internal bool AbortTakeoverMechanismTask(MembershipChangeArgs args) { if (args != null && args.ChangeType == MembershipChangeArgs.MembershipChangeType.TimeoutOnRestrictedPrimary) { if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsInfoEnabled) { LoggerManager.Instance.ShardLogger.Info("MembershipMgr.AbortTakeoverMechanismTask()", "Takeover unsuccessful. Beginning takeover retry task."); } if (_retryTask == null) { _retryTask = new TakeoverRetryTask(this); } _retryTask.Start(); if (_takeOverElectionTask != null && _takeOverElectionTask.IsStarted()) { _takeOverElectionTask.Stop(); } return(true); } return(false); }
internal bool OnForcefulPrimaryDemotion(MembershipChangeArgs args) { try { if (args != null && args.ChangeType == MembershipChangeArgs.MembershipChangeType.ForcefullyDemotePrimary && args.ServerName != null) { if (_strategy != null) { lock (_mutex) { _strategy.TriggerElectionMechanism(Activity.ForcefulPrimaryDemotion, new Server(args.ServerName, Status.Running), _heartbeatReporting, _latestMembership); if (_latestMembership != null && (_latestMembership.Primary == null || _latestMembership.Primary.Name != _context.LocalAddress.IpAddress.ToString())) { if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsDebugEnabled) { LoggerManager.Instance.ShardLogger.Debug("MembershipManager.OnForcefulPrimaryDemotion()", "Call for forceful demotion of the primary node received. Primary demoted successfully."); } return(true); } } } } if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsInfoEnabled) { LoggerManager.Instance.ShardLogger.Info("MembershipManager.OnForcefulPrimaryDemotion()", "Forceful primary demotion unsuccessful."); } return(false); } catch (Exception e) { if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsErrorEnabled) { LoggerManager.Instance.ShardLogger.Error("MembershipMgr.OnForcefulPrimaryDemotion(): ", e.StackTrace); } return(false); } }
public void TriggerElectionMechanism(Activity activity, Server server, LocalShardHeartbeatReporting heartbeatReport, Membership existingMembership) { LoggerManager.Instance.SetThreadContext(new LoggerContext() { ShardName = _context.LocalShardName != null ? _context.LocalShardName : "", DatabaseName = "" }); if (existingMembership == null) { existingMembership = new Membership(); } ShardConfiguration sConfig = null; //Get the shard configuration if (_clusterConfigMgr != null) { sConfig = _clusterConfigMgr.GetShardConfiguration(_context.LocalShardName); } IList <Address> activeNodes = null; MembershipChangeArgs args = new MembershipChangeArgs(); ServerNodes staticServerNodes = null; if (sConfig == null || sConfig.Servers == null) { if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsWarnEnabled) { LoggerManager.Instance.ShardLogger.Warn("ElectionBasedMembershipStrategy.TiggerElectionMechanism() ", "The shard " + _context.LocalShardName + " does not exist in the configuration."); } return; } staticServerNodes = sConfig.Servers; ElectionResult result = null; if (heartbeatReport != null) { activeNodes = heartbeatReport.GetReportTable.Keys.ToList(); } Address activityNode = null; if (server == null) { activityNode = _context.LocalAddress; } else { activityNode = server.Address; } switch (activity) { case Activity.NodeJoining: if (server == null) { return; } //On node join, we need to get membership from the config server for the first time. Membership csMembership = _context.ConfigurationSession.GetMembershipInfo(_context.ClusterName, _context.LocalShardName); ServerNode joiningNode = sConfig.Servers.GetServerNode(server.Address.IpAddress.ToString()); // If the added node is configured while the cluster is up and running, do the following. if (joiningNode == null) { if (_clusterConfigMgr != null) { _clusterConfigMgr.UpdateClusterConfiguration(); sConfig = _clusterConfigMgr.GetShardConfiguration(_context.LocalShardName); } if (sConfig == null) { if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsWarnEnabled) { LoggerManager.Instance.ShardLogger.Warn("ElectionBasedMembershipStrategy.TriggerElectionMechanism() ", "The shard " + _context.LocalShardName + " does not exist in the configuration."); } return; } joiningNode = sConfig.Servers.GetServerNode(server.Address.IpAddress.ToString()); } if (joiningNode == null) { if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsWarnEnabled) { LoggerManager.Instance.ShardLogger.Warn("ElectionBasedMembershipStrategy.TriggerElectionMechanism() ", "The node " + server.Address + " is not part of the configuration."); } return; } if (existingMembership == null || existingMembership.Servers == null || !existingMembership.Servers.Contains(joiningNode)) { if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsDebugEnabled) { LoggerManager.Instance.ShardLogger.Debug("ElectBasedMemSt.TriggerElectMech", "Node joining activity triggered for " + activityNode); } } bool thisNodeIsPrimary = false; OperationId lastOpId = null; if (heartbeatReport != null && heartbeatReport.GetReportTable.ContainsKey(server.Address)) { args.ServerName = _context.LocalAddress; args.ElectionId = null; args.ChangeType = MembershipChangeArgs.MembershipChangeType.NodeJoined; if (server.Address.Equals(_context.LocalAddress)) { _context.ConfigurationSession.ReportNodeJoining(_context.ClusterName, _context.LocalShardName, sConfig.Servers.GetServerNode(_context.LocalAddress.IpAddress.ToString())); if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsInfoEnabled) { LoggerManager.Instance.ShardLogger.Info("electBasedMemSt.TriggerElectMech", server.Address + " reported its joining to the config server. "); } //if the primary is not null and the channel is not disconnected, it can be set here. if ((existingMembership == null || existingMembership.Primary == null) && csMembership.Primary != null && _shard.ActiveChannelsList.Contains(new Server(new Address(csMembership.Primary.Name, sConfig.Port), Status.Initializing)) && ObeysMajorityRule(_shard.ActiveChannelsList.Count, sConfig.Servers.Nodes.Count)) { args.ServerName = new Address(csMembership.Primary.Name, sConfig.Port); args.ElectionId = csMembership.ElectionId; args.ChangeType = MembershipChangeArgs.MembershipChangeType.PrimarySet; //if the node which was lost comes back up before the CS or the nodes can declare it dead, //it should resume its status as a primary. There should be no need for an election in this case. if (args.ServerName.Equals(_context.LocalAddress)) { thisNodeIsPrimary = true; } } } if (thisNodeIsPrimary) { if (csMembership.ElectionId != null && LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsInfoEnabled) { LoggerManager.Instance.ShardLogger.Info("electBasedMemSt.TriggerElectMech", "election_id: " + csMembership.ElectionId.Id + " election time :" + csMembership.ElectionId.ElectionTime); } if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsInfoEnabled) { LoggerManager.Instance.ShardLogger.Info("electBasedMemSt.TriggerElectMech", "I am already declared primary"); } lastOpId = LastOperationId; ChangeMembershipShardwide(args); } else { ((LocalShard)_shard).OnMembershipChanged(args); } if (server.Address.Equals(_context.LocalAddress)) { ServerNode sNode = sConfig.Servers.GetServerNode(_context.LocalAddress.IpAddress.ToString()); if (sNode == null) { if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsWarnEnabled) { LoggerManager.Instance.ShardLogger.Warn("ElectionBasedMembershipStrategy.TriggerElectionMechanism() ", "The node " + sNode.Name + " does not exist in the configuration."); } return; } _context.ConfigurationSession.ReportHeartbeat(_context.ClusterName, _context.LocalShardName, sNode, existingMembership, lastOpId); } } else { if (existingMembership.Primary != null && existingMembership.Primary.Name.Equals(server.Address.IpAddress.ToString())) { if (sConfig.Servers == null || sConfig.Servers.Nodes == null || !ObeysMajorityRule(activeNodes.Count, sConfig.Servers.Nodes.Count)) { _context.ConfigurationSession.SetNodeStatus(_context.ClusterName, _context.LocalShardName, existingMembership.Primary, NodeRole.None); args.ChangeType = MembershipChangeArgs.MembershipChangeType.PrimaryDemoted; args.ServerName = _context.LocalAddress; ChangeMembershipShardwide(args); if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsInfoEnabled) { LoggerManager.Instance.ShardLogger.Info("electBasedMemSt.TriggerElectMech", " Node addition activity occured. Primary node " + _context.LocalAddress.IpAddress.ToString() + " demoted."); } return; } } } break; case Activity.NodeLeaving: if (server == null) { return; } bool hasMajority = ObeysMajorityRule(activeNodes.Count, staticServerNodes.Nodes.Count); args.ServerName = server.Address; args.ChangeType = MembershipChangeArgs.MembershipChangeType.NodeLeft; _clusterConfigMgr.UpdateClusterConfiguration(); if (existingMembership.Primary != null) { // if the existing primary is actually the node lost, we need to update the configuration. if (existingMembership.Primary.Name == server.Address.IpAddress.ToString()) { //if Primary leaves, it should be updated locally. args.ChangeType = MembershipChangeArgs.MembershipChangeType.PrimaryLost; if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsDebugEnabled) { LoggerManager.Instance.ShardLogger.Debug("electBasedMemSt.TriggerElectMech", "Node leaving activity triggered for " + server.Address + " . Primary lost."); } } else if (existingMembership.Primary.Name == _context.LocalAddress.IpAddress.ToString()) // if the existing primary is the local node, we need to check for possible demotion of the current primary. { if (!hasMajority) { _context.ConfigurationSession.SetNodeStatus(_context.ClusterName, _context.LocalShardName, existingMembership.Primary, NodeRole.None); args.ChangeType = MembershipChangeArgs.MembershipChangeType.PrimaryDemoted; args.ServerName = _context.LocalAddress; ChangeMembershipShardwide(args); if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsDebugEnabled) { LoggerManager.Instance.ShardLogger.Debug("electBasedMemSt.TriggerElectMech", " Node leaving activity occurred. Primary node " + _context.LocalAddress.IpAddress.ToString() + " demoted."); } return; } } } ((LocalShard)_shard).OnMembershipChanged(args); break; case Activity.GeneralElectionsTriggered: case Activity.TakeoverElectionsTriggered: // this is where the actual election mechanism takes place. //Step 1: if no node in the heartbeat table has a primary and there is no primary in the local node's membership, we proceed forward. //Else if there is a primary but this looks like the takeover election mechanism, we proceed along as well. if ((activity.Equals(Activity.GeneralElectionsTriggered) && !heartbeatReport.PrimaryExists() && existingMembership.Primary == null) || (activity.Equals(Activity.TakeoverElectionsTriggered) && heartbeatReport.PrimaryExists())) { //Step 2: we verify that this node has a majority of the shard nodes connected to it. if (activeNodes != null && ObeysMajorityRule(activeNodes.Count, staticServerNodes.Nodes.Count)) { //Step 3: Perform the initial sanity check. (Speculative phase) if (ShouldIInitiateElection(heartbeatReport, activity)) { if (existingMembership != null && existingMembership.Primary != null && activity == Activity.GeneralElectionsTriggered) { if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsInfoEnabled) { LoggerManager.Instance.ShardLogger.Info("electBasedMemSt.TriggerElectMech", "A primary has already been selected for " + _context.LocalShardName + " hence exiting the election mechanism."); } return; } //Step 4: The elections take place in real. (Authoritative Phase) result = HoldElection(heartbeatReport, activity); if (result != null) { if (result.PollingResult == ElectionResult.Result.PrimarySelected) { //if the shard is undergoing the takeover election mechanism, the old primary needs to //be demoted first. bool oldPrimaryDemoted = false; if (activity == Activity.TakeoverElectionsTriggered) { MembershipChangeArgs args2 = new MembershipChangeArgs(); args2.ChangeType = MembershipChangeArgs.MembershipChangeType.ForcefullyDemotePrimary; args2.ServerName = _context.LocalAddress; args2.ElectionId = existingMembership.ElectionId; Message msg = new Message(); msg.Payload = args2; msg.MessageType = MessageType.MembershipOperation; msg.NeedsResponse = true; ShardRequestBase <bool> request = _shard.CreateUnicastRequest <bool>(new Server(new Address(existingMembership.Primary.Name, sConfig.Port), Status.Running), msg); IAsyncResult result2 = request.BeginExecute(); oldPrimaryDemoted = request.EndExecute(result2); } //Submit the result to the CS. if (activity == Activity.GeneralElectionsTriggered || (activity == Activity.TakeoverElectionsTriggered && oldPrimaryDemoted)) { _context.ConfigurationSession.SubmitElectionResult(_context.ClusterName.ToLower(), _context.LocalShardName.ToLower(), result); } if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsDebugEnabled) { LoggerManager.Instance.ShardLogger.Debug("electBasedMemSt.TriggerElectMech", "Election result submitted for shard " + _context.LocalShardName.ToString()); } _context.ElectionResult = result; args.ServerName = _context.LocalAddress; args.ElectionId = result.ElectionId; args.ChangeType = MembershipChangeArgs.MembershipChangeType.PrimarySelected; //Once, the result is submitted, inform the shard nodes. ChangeMembershipShardwide(args); _context.ConfigurationSession.ReportHeartbeat(_context.ClusterName, _context.LocalShardName, result.ElectedPrimary, existingMembership, LastOperationId); } //Finally, end this round of elections. _context.ConfigurationSession.EndElection(_context.ClusterName, _context.LocalShardName, result.ElectionId); } } } } break; case Activity.CSDisconnected: //this is called whenever a node loses connection with the config server. if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsDebugEnabled) { LoggerManager.Instance.ShardLogger.Debug("ElectionBasedMembershipStrategy.TriggerElectionMechanism() ", "Config Server disconnected. "); } //if the number of configured nodes are even and the primary loses connection with the CS, it needs to demote itself. if (existingMembership != null && existingMembership.Primary != null && existingMembership.Primary.Name == _context.LocalAddress.IpAddress.ToString() && staticServerNodes.Nodes.Count % 2 == 0) { if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsDebugEnabled) { LoggerManager.Instance.ShardLogger.Debug("electBasedMemSt.TriggerElectMech", " Connection of the node " + _context.LocalAddress.ToString() + " with the config server is lost."); } args.ServerName = _context.LocalAddress; args.ElectionId = existingMembership.ElectionId; args.ChangeType = MembershipChangeArgs.MembershipChangeType.PrimaryDemoted; ChangeMembershipShardwide(args); if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsInfoEnabled) { LoggerManager.Instance.ShardLogger.Info("electBasedMemSt.TriggerElectMech", " Primary node " + _context.LocalAddress.IpAddress.ToString() + " demoted because the primary lost connection with the CS."); } } break; case Activity.ForcefulPrimaryDemotion: if (existingMembership != null && existingMembership.Primary != null && existingMembership.Primary.Name == _context.LocalAddress.IpAddress.ToString()) { _context.ConfigurationSession.SetNodeStatus(_context.ClusterName, _context.LocalShardName, existingMembership.Primary, NodeRole.None); args.ChangeType = MembershipChangeArgs.MembershipChangeType.PrimaryDemoted; args.ServerName = _context.LocalAddress; if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsInfoEnabled) { LoggerManager.Instance.ShardLogger.Info("electBasedMemSt.TriggerElectMech", "Primary node " + _context.LocalAddress.IpAddress.ToString() + " demoted in order to complete the take over election mechanism. "); } ((LocalShard)_shard).OnMembershipChanged(args); } break; } }
internal void UpdateLocalMembership(MembershipChangeArgs args) { LoggerManager.Instance.SetThreadContext(new LoggerContext() { ShardName = _context.LocalShardName != null ? _context.LocalShardName : "", DatabaseName = "" }); ShardConfiguration sConfig = null; if (_clusterConfigMgr != null) { sConfig = _clusterConfigMgr.GetShardConfiguration(_context.LocalShardName); } if (sConfig == null || sConfig.Servers == null) { if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsWarnEnabled) { LoggerManager.Instance.ShardLogger.Warn("MembershipManager.UpdateLocalMembership() ", "The shard (or the nodes of the shard) " + _context.LocalShardName + " does not exist in the configuration."); } return; } ServerNode affectedServer = sConfig.Servers.GetServerNode(args.ServerName.IpAddress.ToString()); switch (args.ChangeType) { case MembershipChangeArgs.MembershipChangeType.NodeJoined: lock (_mutexOnUpdateConfig) { _latestMembership.AddServer(affectedServer); } break; case MembershipChangeArgs.MembershipChangeType.NodeLeft: lock (_mutexOnUpdateConfig) { _latestMembership.RemoveServer(affectedServer); } break; case MembershipChangeArgs.MembershipChangeType.PrimarySelected: case MembershipChangeArgs.MembershipChangeType.PrimarySet: lock (_mutexOnUpdateConfig) { _latestMembership.AddServer(affectedServer); if (args.ServerName != null) { _latestMembership.Primary = sConfig.Servers.GetServerNode(args.ServerName.IpAddress.ToString()); } if (args.ElectionId != null) { _latestMembership.ElectionId = args.ElectionId; } SanityCheckForTakeoverElect(); } _strategy.OnPrimaryChanged(); break; case MembershipChangeArgs.MembershipChangeType.PrimaryLost: case MembershipChangeArgs.MembershipChangeType.PrimaryDemoted: lock (_mutexOnUpdateConfig) { if (args.ChangeType.Equals(MembershipChangeArgs.MembershipChangeType.PrimaryLost)) { _latestMembership.RemoveServer(affectedServer); } _latestMembership.Primary = null; _latestMembership.ElectionId = null; } break; } if (args.ServerName != null && args.ChangeType != MembershipChangeArgs.MembershipChangeType.None) { if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsDebugEnabled) { LoggerManager.Instance.ShardLogger.Debug("MembershipManager.UpdateLocalMembership() ", "Membership updated: " + args.ChangeType.ToString() + " of node " + args.ServerName.IpAddress.ToString()); } } }