Example #1
0
        // Receives the heartbeats from different nodes which are part of a shard and
        // adds them to the report table.
        public void ReceiveHeartbeat(Address source, HeartbeatInfo heartbeatInfo)
        {
            try
            {
                if (_localShardHeartbeatReporter != null)
                {
                    // We need to verify if the node sending the heartbeat is part of the existing configuration.
                    // Updating the config is a costly process so we check the node in the existing config.
                    // If the node was freshly added in an existing (active) shard, we will not add the heartbeat to the report
                    // until it exists in the local node config instance.
                    if (_clusterConfigMgr != null)
                    {
                        ShardConfiguration sConfig = _clusterConfigMgr.GetShardConfiguration(_context.LocalShardName);
                        ServerNode         sNode   = null;
                        if (sConfig != null && sConfig.Servers != null)
                        {
                            sNode = sConfig.Servers.GetServerNode(source.IpAddress.ToString());
                        }
                        if (sNode == null)
                        {
                            if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsDebugEnabled)
                            {
                                LoggerManager.Instance.ShardLogger.Debug("LocalShardCheckHeartbeatTask.ReceiveHeartbeat() ", "The node " + source + " is not part of the configuration.");
                            }
                            return;
                        }
                    }

                    bool isAnOldNode = _localShardHeartbeatReporter.AddToReport(source, heartbeatInfo);

                    if (!isAnOldNode)
                    {
                        if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsDebugEnabled)
                        {
                            LoggerManager.Instance.ShardLogger.Debug("LocalShardCheckHeartbeatTask.ReceiveHeartbeat() ", "Node " + source.IpAddress + " added to the table for the first time. ");
                        }

                        lock (_membershipManager)
                        {
                            _membershipManager.HeartbeatReport = _localShardHeartbeatReporter;
                        }

                        _electionExecTask = new ElectionMechanismExecutionTask(_membershipManager, Activity.NodeJoining, new Server(source, Status.Running));
                        _electionExecTask.Start();
                        OnActivityCompleted();
                    }
                }
            }
            catch (Exception e)
            {
                if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsErrorEnabled)
                {
                    LoggerManager.Instance.ShardLogger.Error("LocalShardCheckHeartbeatTask.ReceiveHeartbeat() ", e.ToString());
                }
            }
        }
Example #2
0
        // the method which runs in a thread.
        public void Run()
        {
            LoggerManager.Instance.SetThreadContext(new LoggerContext()
            {
                ShardName = _context.LocalShardName != null ? _context.LocalShardName : "", DatabaseName = ""
            });
            _context.StatusLatch.WaitForAny(NodeStatus.Running);
            _startSignal.WaitOne();
            while (_running)
            {
                IDictionary <Address, HeartbeatInfo> reportTable = null;
                try
                {
                    ShardConfiguration sConfig = null;
                    if (_clusterConfigMgr != null)
                    {
                        sConfig = _clusterConfigMgr.GetShardConfiguration(_context.LocalShardName);
                    }
                    if (sConfig == null || sConfig.Servers == null)
                    {
                        if (LoggerManager.Instance.ShardLogger != null &&
                            LoggerManager.Instance.ShardLogger.IsWarnEnabled)
                        {
                            LoggerManager.Instance.ShardLogger.Warn("LocalShardCheckHeartbeatTask.Run() ", "The shard (or the nodes of the shard) " +
                                                                    _context.LocalShardName +
                                                                    " does not exist in the configuration.");
                        }
                        return;
                    }
                    reportTable = _localShardHeartbeatReporter.GetReportTable;
                    if (reportTable != null && reportTable.Count > 0)
                    {
                        CheckHeartbeats(reportTable);

                        IList <Address> tentativeLostNodes = CheckForLostNodes(reportTable);
                        if (tentativeLostNodes != null && tentativeLostNodes.Count > 0)
                        {
                            // 1. These are the nodes lost.
                            // 2. The lost node needs to be removed from the heartbeats table and the missing heartbeats
                            // 3. Elections will be triggered when a node is lost.

                            foreach (var node in tentativeLostNodes)
                            {
                                if (_localShardHeartbeatReporter.GetReportTable.ContainsKey(node) &&
                                    !ChannelExists(sConfig.Servers.GetServerNode(node.IpAddress.ToString())))
                                {
                                    if (LoggerManager.Instance.ShardLogger != null &&
                                        LoggerManager.Instance.ShardLogger.IsDebugEnabled)
                                    {
                                        LoggerManager.Instance.ShardLogger.Debug("LocalShard.CheckHeartbeatTask",
                                                                                 "did not receive heart beat from " + node);
                                    }


                                    //_localShardHeartbeatReporter.RemoveFromReport(node);
                                    OnActivityTriggered(Activity.NodeLeaving, node);

                                    lock (_membershipManager)
                                    {
                                        _membershipManager.HeartbeatReport =
                                            (LocalShardHeartbeatReporting)_localShardHeartbeatReporter;
                                    }
                                    _electionExecTask = new ElectionMechanismExecutionTask(_membershipManager,
                                                                                           Activity.NodeLeaving, new Server(node, Status.Stopped));
                                    _electionExecTask.Start();
                                    OnActivityCompleted();
                                }
                                else
                                {
                                    if (LoggerManager.Instance.ShardLogger != null &&
                                        LoggerManager.Instance.ShardLogger.IsDebugEnabled)
                                    {
                                        LoggerManager.Instance.ShardLogger.Debug("LocalShard.CheckHeartbeatTask",
                                                                                 "did not receive heart beat from " + node + ", however channel is connected");
                                    }
                                }
                            }
                        }
                        //if no primary is set, we proceed to the following steps:
                        if (!_shard.NodeRole.Equals(NodeRole.Intermediate) && _membershipManager.LatestMembership == null ||
                            (      /*_membershipManager.LatestMembership != null &&*/
                                (_membershipManager.LatestMembership.Primary == null ||
                                 ( /*_membershipManager.LatestMembership.Primary != null &&*/
                                     !ChannelExists(_membershipManager.LatestMembership.Primary)))))
                        {
                            //First, check if the conduction of elections is feasible.
                            //If yes, the actual election mechanism is triggered.
                            if (AreElectionsFeasible(_localShardHeartbeatReporter, sConfig))
                            {
                                lock (_membershipManager)
                                {
                                    _membershipManager.HeartbeatReport =
                                        (LocalShardHeartbeatReporting)_localShardHeartbeatReporter;
                                }

                                _electionExecTask = new ElectionMechanismExecutionTask(_membershipManager, Activity.GeneralElectionsTriggered, null);
                                _electionExecTask.Start();
                            }
                            //RTD: badddd logic.
                            //else if (_localShardHeartbeatReporter.PrimaryExists() &&
                            //         _localShardHeartbeatReporter.GetCurrentPrimary() != null &&
                            //         ChannelExists(_localShardHeartbeatReporter.GetCurrentPrimary()))
                            //{
                            //    ServerNode currentPrimaNode = _localShardHeartbeatReporter.GetCurrentPrimary();
                            //    if (currentPrimaNode != null)
                            //    {
                            //        if (LoggerManager.Instance.ShardLogger != null &&
                            //            LoggerManager.Instance.ShardLogger.IsDebugEnabled)
                            //            LoggerManager.Instance.ShardLogger.Debug("LocalShard.CheckHeartbeatTask",
                            //                "no primary set so far, setting " + currentPrimaNode.Name +
                            //                " as primary as other nodes are reporting");

                            //        MembershipChangeArgs args = new MembershipChangeArgs();
                            //        args.ChangeType = MembershipChangeArgs.MembershipChangeType.PrimarySet;
                            //        args.ElectionId = _localShardHeartbeatReporter.GetCurrentElectionId();
                            //        args.ServerName = new Address(currentPrimaNode.Name, sConfig.Port);
                            //        ((LocalShard)_shard).OnMembershipChanged(args);
                            //    }
                            //}
                        }
                        LogMembership();
                    }
                }
                catch (ThreadAbortException e)
                {
                    if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsErrorEnabled && _checkHbThread != null)
                    {
                        LoggerManager.Instance.ShardLogger.Error(_checkHbThread.Name, "Task aborted.");
                    }
                    break;
                }
                catch (Exception e)
                {
                    if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsErrorEnabled)
                    {
                        LoggerManager.Instance.ShardLogger.Error("LocalShardCheckHeartbeatTask.Run() ", e.ToString());
                    }
                }

                lock (_syncMutex)
                {
                    Monitor.Wait(_syncMutex, _poolingThreshold);
                }
                _startSignal.WaitOne();
            }
        }