// Receives the heartbeats from different nodes which are part of a shard and // adds them to the report table. public void ReceiveHeartbeat(Address source, HeartbeatInfo heartbeatInfo) { try { if (_localShardHeartbeatReporter != null) { // We need to verify if the node sending the heartbeat is part of the existing configuration. // Updating the config is a costly process so we check the node in the existing config. // If the node was freshly added in an existing (active) shard, we will not add the heartbeat to the report // until it exists in the local node config instance. if (_clusterConfigMgr != null) { ShardConfiguration sConfig = _clusterConfigMgr.GetShardConfiguration(_context.LocalShardName); ServerNode sNode = null; if (sConfig != null && sConfig.Servers != null) { sNode = sConfig.Servers.GetServerNode(source.IpAddress.ToString()); } if (sNode == null) { if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsDebugEnabled) { LoggerManager.Instance.ShardLogger.Debug("LocalShardCheckHeartbeatTask.ReceiveHeartbeat() ", "The node " + source + " is not part of the configuration."); } return; } } bool isAnOldNode = _localShardHeartbeatReporter.AddToReport(source, heartbeatInfo); if (!isAnOldNode) { if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsDebugEnabled) { LoggerManager.Instance.ShardLogger.Debug("LocalShardCheckHeartbeatTask.ReceiveHeartbeat() ", "Node " + source.IpAddress + " added to the table for the first time. "); } lock (_membershipManager) { _membershipManager.HeartbeatReport = _localShardHeartbeatReporter; } _electionExecTask = new ElectionMechanismExecutionTask(_membershipManager, Activity.NodeJoining, new Server(source, Status.Running)); _electionExecTask.Start(); OnActivityCompleted(); } } } catch (Exception e) { if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsErrorEnabled) { LoggerManager.Instance.ShardLogger.Error("LocalShardCheckHeartbeatTask.ReceiveHeartbeat() ", e.ToString()); } } }
// the method which runs in a thread. public void Run() { LoggerManager.Instance.SetThreadContext(new LoggerContext() { ShardName = _context.LocalShardName != null ? _context.LocalShardName : "", DatabaseName = "" }); _context.StatusLatch.WaitForAny(NodeStatus.Running); _startSignal.WaitOne(); while (_running) { IDictionary <Address, HeartbeatInfo> reportTable = null; try { ShardConfiguration sConfig = null; if (_clusterConfigMgr != null) { sConfig = _clusterConfigMgr.GetShardConfiguration(_context.LocalShardName); } if (sConfig == null || sConfig.Servers == null) { if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsWarnEnabled) { LoggerManager.Instance.ShardLogger.Warn("LocalShardCheckHeartbeatTask.Run() ", "The shard (or the nodes of the shard) " + _context.LocalShardName + " does not exist in the configuration."); } return; } reportTable = _localShardHeartbeatReporter.GetReportTable; if (reportTable != null && reportTable.Count > 0) { CheckHeartbeats(reportTable); IList <Address> tentativeLostNodes = CheckForLostNodes(reportTable); if (tentativeLostNodes != null && tentativeLostNodes.Count > 0) { // 1. These are the nodes lost. // 2. The lost node needs to be removed from the heartbeats table and the missing heartbeats // 3. Elections will be triggered when a node is lost. foreach (var node in tentativeLostNodes) { if (_localShardHeartbeatReporter.GetReportTable.ContainsKey(node) && !ChannelExists(sConfig.Servers.GetServerNode(node.IpAddress.ToString()))) { if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsDebugEnabled) { LoggerManager.Instance.ShardLogger.Debug("LocalShard.CheckHeartbeatTask", "did not receive heart beat from " + node); } //_localShardHeartbeatReporter.RemoveFromReport(node); OnActivityTriggered(Activity.NodeLeaving, node); lock (_membershipManager) { _membershipManager.HeartbeatReport = (LocalShardHeartbeatReporting)_localShardHeartbeatReporter; } _electionExecTask = new ElectionMechanismExecutionTask(_membershipManager, Activity.NodeLeaving, new Server(node, Status.Stopped)); _electionExecTask.Start(); OnActivityCompleted(); } else { if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsDebugEnabled) { LoggerManager.Instance.ShardLogger.Debug("LocalShard.CheckHeartbeatTask", "did not receive heart beat from " + node + ", however channel is connected"); } } } } //if no primary is set, we proceed to the following steps: if (!_shard.NodeRole.Equals(NodeRole.Intermediate) && _membershipManager.LatestMembership == null || ( /*_membershipManager.LatestMembership != null &&*/ (_membershipManager.LatestMembership.Primary == null || ( /*_membershipManager.LatestMembership.Primary != null &&*/ !ChannelExists(_membershipManager.LatestMembership.Primary))))) { //First, check if the conduction of elections is feasible. //If yes, the actual election mechanism is triggered. if (AreElectionsFeasible(_localShardHeartbeatReporter, sConfig)) { lock (_membershipManager) { _membershipManager.HeartbeatReport = (LocalShardHeartbeatReporting)_localShardHeartbeatReporter; } _electionExecTask = new ElectionMechanismExecutionTask(_membershipManager, Activity.GeneralElectionsTriggered, null); _electionExecTask.Start(); } //RTD: badddd logic. //else if (_localShardHeartbeatReporter.PrimaryExists() && // _localShardHeartbeatReporter.GetCurrentPrimary() != null && // ChannelExists(_localShardHeartbeatReporter.GetCurrentPrimary())) //{ // ServerNode currentPrimaNode = _localShardHeartbeatReporter.GetCurrentPrimary(); // if (currentPrimaNode != null) // { // if (LoggerManager.Instance.ShardLogger != null && // LoggerManager.Instance.ShardLogger.IsDebugEnabled) // LoggerManager.Instance.ShardLogger.Debug("LocalShard.CheckHeartbeatTask", // "no primary set so far, setting " + currentPrimaNode.Name + // " as primary as other nodes are reporting"); // MembershipChangeArgs args = new MembershipChangeArgs(); // args.ChangeType = MembershipChangeArgs.MembershipChangeType.PrimarySet; // args.ElectionId = _localShardHeartbeatReporter.GetCurrentElectionId(); // args.ServerName = new Address(currentPrimaNode.Name, sConfig.Port); // ((LocalShard)_shard).OnMembershipChanged(args); // } //} } LogMembership(); } } catch (ThreadAbortException e) { if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsErrorEnabled && _checkHbThread != null) { LoggerManager.Instance.ShardLogger.Error(_checkHbThread.Name, "Task aborted."); } break; } catch (Exception e) { if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsErrorEnabled) { LoggerManager.Instance.ShardLogger.Error("LocalShardCheckHeartbeatTask.Run() ", e.ToString()); } } lock (_syncMutex) { Monitor.Wait(_syncMutex, _poolingThreshold); } _startSignal.WaitOne(); } }