Beispiel #1
0
        /// <summary>
        /// Checks the status of a node whether he is running or not. We send a status request
        /// message and wait for the response for a particular timeout. If the node is alive
        /// it sends backs its status otherwise timeout occurs and we consider hime DEAD.
        /// </summary>
        private void CheckStatus()
        {
            while (_statusCheckingThread != null)
            {
                lock (_checkStatusList.SyncRoot)
                {
                    if (_checkStatusList.Count > 0)
                    {
                        _currentSuspect = _checkStatusList[0] as Address;
                        _checkStatusList.Remove(_currentSuspect);
                    }
                    else
                    {
                        _currentSuspect = null;
                    }

                    if (_currentSuspect == null)
                    {
                        _statusCheckingThread = null;
                        continue;
                    }
                }

                lock (_status_mutex)
                {
                    try
                    {
                        NodeStatus nodeStatus = null;
                        if (_enclosingInstance.ct.ConnectionExist(_currentSuspect))
                        {
                            Message msg = new Message(_currentSuspect, null, new byte[0]);
                            msg.putHeader(HeaderType.KEEP_ALIVE, new TCPHearBeat(TCPHearBeat.ARE_YOU_ALIVE));

                            if (_enclosingInstance.Stack.NCacheLog.IsInfoEnabled)
                            {
                                _enclosingInstance.Stack.NCacheLog.Info("ConnectionKeepAlive.CheckStatus", "sending status request to " + _currentSuspect);
                            }


                            _enclosingInstance.sendUnicastMessage(msg, false, msg.Payload, Priority.High);
                            _statusReceived = null;

                            //wait for the result or timeout occurs first;
                            Monitor.Wait(_status_mutex, _statusTimeout);

                            if (_statusReceived != null)
                            {
                                TCPHearBeat status = _statusReceived as TCPHearBeat;

                                if (_enclosingInstance.Stack.NCacheLog.IsInfoEnabled)
                                {
                                    _enclosingInstance.Stack.NCacheLog.Info("ConnectionKeepAlive.CheckStatus", "received status " + status + " from " + _currentSuspect);
                                }

                                if (status.Type == TCPHearBeat.I_AM_NOT_DEAD)
                                {
                                    nodeStatus = new NodeStatus(_currentSuspect, NodeStatus.IS_ALIVE);
                                }
                                else if (status.Type == TCPHearBeat.I_AM_LEAVING)
                                {
                                    nodeStatus = new NodeStatus(_currentSuspect, NodeStatus.IS_LEAVING);
                                }
                                else if (status.Type == TCPHearBeat.I_AM_STARTING)
                                {
                                    nodeStatus = new NodeStatus(_currentSuspect, NodeStatus.IS_DEAD);
                                }
                            }
                            else
                            {
                                nodeStatus = new NodeStatus(_currentSuspect, NodeStatus.IS_DEAD);
                                if (_enclosingInstance.Stack.NCacheLog.IsInfoEnabled)
                                {
                                    _enclosingInstance.Stack.NCacheLog.Info("ConnectionKeepAlive.CheckStatus", "did not receive status from " + _currentSuspect + "; consider him DEAD");
                                }
                            }
                        }
                        else
                        {
                            if (_enclosingInstance.Stack.NCacheLog.IsInfoEnabled)
                            {
                                _enclosingInstance.Stack.NCacheLog.Info("ConnectionKeepAlive.CheckStatus", "no connection exists for " + _currentSuspect);
                            }
                            nodeStatus = new NodeStatus(_currentSuspect, NodeStatus.IS_DEAD);
                        }

                        Event statusEvent = new Event(Event.GET_NODE_STATUS_OK, nodeStatus);
                        _enclosingInstance.passUp(statusEvent);
                    }
                    catch (Exception e)
                    {
                        _enclosingInstance.Stack.NCacheLog.Error("ConnectionKeepAlive.CheckStatus", e.ToString());
                    }
                    finally
                    {
                        _currentSuspect = null;
                        _statusReceived = null;
                    }
                }
            }
        }
Beispiel #2
0
        public override void up(Event evt)
        {
            object obj;
            Message msg;
            HDR hdr;
            MergeData merge_data;

            switch (evt.Type)
            {


                case Event.MSG:
                    msg = (Message)evt.Arg;

                    obj = msg.getHeader(HeaderType.GMS);
                    if (obj == null || !(obj is HDR))
                        break;
                    hdr = (HDR)msg.removeHeader(HeaderType.GMS);
                    switch (hdr.type)
                    {

                        case HDR.JOIN_REQ:
                            object[] args = new object[4];
                            args[0] = hdr.mbr;
                            args[1] = hdr.subGroup_name;
                            args[2] = hdr.isStartedAsMirror;
                            args[3] = hdr.GMSId;
                            ThreadPool.QueueUserWorkItem(new WaitCallback(handleJoinrequestAsync), args);
                           
                            break;

                        case HDR.SPECIAL_JOIN_REQUEST:
                            HandleSpecialJoinRequest(hdr.mbr, hdr.GMSId);
                            break;

                        case HDR.JOIN_RSP:
                            MarkStateTransferInProcess();
                            impl.handleJoinResponse(hdr.join_rsp);
                            break;

                        case HDR.LEAVE_REQ:
                            Stack.NCacheLog.Debug("received LEAVE_REQ " + hdr + " from " + msg.Src);

                            if (hdr.mbr == null)
                            {
                                Stack.NCacheLog.Error( "LEAVE_REQ's mbr field is null");
                                return;
                            }
                           
                            if (isPartReplica && IsCoordinator)
                            {
                                //if replica node on the coordinator is leaving then send a special event to TCP
                                //to mark himself leaving. This way other node asking for death status through keep
                                //alive will get dead status.
                                if (hdr.mbr != null && hdr.mbr.IpAddress.Equals(local_addr.IpAddress))
                                {
                                    down(new Event(Event.I_AM_LEAVING));
                                }
                            }
                            ThreadPool.QueueUserWorkItem(new WaitCallback(handleLeaveAsync), new object[] { hdr.mbr, false });
                            
                            break;

                        case HDR.LEAVE_RSP:
                            impl.handleLeaveResponse();
                            break;

                        case HDR.VIEW_RESPONSE:
                            if (_promise != null)
                                _promise.SetResult(hdr.arg);
                            break;

                        case HDR.VIEW:
                            if (hdr.view == null)
                            {
                                Stack.NCacheLog.Error("[VIEW]: view == null");
                                return;
                            }
                            else
                                Stack.NCacheLog.CriticalInfo("gms.Up", "received view from :" + msg.Src + " ; view = " + hdr.view);  
                            impl.handleViewChange(hdr.view, hdr.digest);
                            break;


                        case HDR.MERGE_REQ:
                            impl.handleMergeRequest(msg.Src, hdr.merge_id);
                            break;


                        case HDR.MERGE_RSP:
                            merge_data = new MergeData(msg.Src, hdr.view, hdr.digest);
                            merge_data.merge_rejected = hdr.merge_rejected;
                            impl.handleMergeResponse(merge_data, hdr.merge_id);
                            break;


                        case HDR.INSTALL_MERGE_VIEW:
                            impl.handleMergeView(new MergeData(msg.Src, hdr.view, hdr.digest), hdr.merge_id);
                            break;


                        case HDR.CANCEL_MERGE:
                            impl.handleMergeCancelled(hdr.merge_id);
                            break;

                        case HDR.CAN_NOT_CONNECT_TO:
                            impl.handleCanNotConnectTo(msg.Src, hdr.nodeList);
                            break;

                        case HDR.LEAVE_CLUSTER:
                           
                            string gmsId = hdr.arg as string;//reported gms id
                            string myGmsId = GetNodeGMSId(local_addr);

                            if (gmsId != null && myGmsId != null && gmsId.Equals(myGmsId))
                            {
                                ThreadPool.QueueUserWorkItem(new WaitCallback(handleLeaveClusterRequestAsync), hdr.mbr);
                            }
                            break;

                        case HDR.CONNECTION_BROKEN:
                            impl.handleConnectionBroken(msg.Src, hdr.mbr);
                            break;

                        case HDR.VIEW_REJECTED:
                            impl.handleViewRejected(hdr.mbr);
                            break;

                        case HDR.INFORM_NODE_REJOINING:
                            impl.handleInformNodeRejoining(msg.Src, hdr.mbr);
                            break;

                        case HDR.RESET_ON_NODE_REJOINING:
                            impl.handleResetOnNodeRejoining(msg.Src, hdr.mbr, hdr.view);
                            break;

                        case HDR.RE_CHECK_CLUSTER_HEALTH:
                           
                            Thread t = new Thread(new ParameterizedThreadStart(impl.ReCheckClusterHealth));
                            t.Start(hdr.mbr);
                           
                            break;

                        case HDR.INFORM_ABOUT_NODE_DEATH:
                            //Replica is not supposed to handle this event
                            if (isPartReplica && _startedAsMirror) break;

                            impl.handleInformAboutNodeDeath(msg.Src, (Address)hdr.arg);
                            break;

                        case HDR.IS_NODE_IN_STATE_TRANSFER:
                            impl.handleIsClusterInStateTransfer(msg.Src);
                            break;

                        case HDR.IS_NODE_IN_STATE_TRANSFER_RSP:
                            if (_stateTransferPromise != null)
                            {
                                if(Stack.NCacheLog.IsInfoEnabled) Stack.NCacheLog.Info("gms.UP", "(state transfer rsp) sender: " + msg.Src + " ->" + hdr.arg);
                                _stateTransferPromise.SetResult(hdr.arg);
                            }
                            break;

                        default:
                            Stack.NCacheLog.Error( "HDR with type=" + hdr.type + " not known");
                            break;

                    }

                   
                    return; // don't pass up


                case Event.CONNECT_OK:
                // sent by someone else, but WE are responsible for sending this !
                case Event.DISCONNECT_OK:  // dito (e.g. sent by UDP layer). Don't send up the stack
                    return;

                case Event.GET_NODE_STATUS_OK:
                    lock (suspect_verify_mutex)
                    {
                        NodeStatus status = evt.Arg as NodeStatus;
                        if (status.Node != null && status.Node.Equals(nodeTobeSuspect))
                        {
                            nodeStatus = status;
                            Monitor.PulseAll(suspect_verify_mutex);
                        }
                    }
                    break;

                case Event.SET_LOCAL_ADDRESS:
                    local_addr = (Address)evt.Arg;
                  
                    break; // pass up


                case Event.SUSPECT:
                    ThreadPool.QueueUserWorkItem(new WaitCallback(handleSuspectAsync), evt.Arg);
                    break; // pass up


                case Event.UNSUSPECT:
                    impl.unsuspect((Address)evt.Arg);
                    return; // discard


                case Event.MERGE:
                    impl.merge((System.Collections.ArrayList)evt.Arg);
                    return; // don't pass up

                case Event.CONNECTION_FAILURE:
                   
                    impl.handleConnectionFailure(evt.Arg as ArrayList);
                    return;//dont passup

                case Event.NODE_REJOINING:
                   
                    impl.handleNodeRejoining(evt.Arg as Address);
                    return;

                case Event.CONNECTION_BREAKAGE:
                    Address node = evt.Arg as Address;
                    if (!disconnected_nodes.Contains(node))
                        disconnected_nodes.Add(node);
                    break;

                case Event.CONNECTION_RE_ESTABLISHED:
                    node = evt.Arg as Address;
                    if (disconnected_nodes.Contains(node))
                        disconnected_nodes.Remove(node);
                    break;
            }

            if (impl.handleUpEvent(evt))
                passUp(evt);
        }
Beispiel #3
0
            /// <summary>
            /// Checks the status of a node whether he is running or not. We send a status request
            /// message and wait for the response for a particular timeout. If the node is alive
            /// it sends backs its status otherwise timeout occurs and we consider hime DEAD.
            /// </summary>
            private void CheckStatus()
            {

                while (_statusCheckingThread != null)
                {
                    lock (_checkStatusList.SyncRoot)
                    {
                        if (_checkStatusList.Count > 0)
                        {
                            _currentSuspect = _checkStatusList[0] as Address;
                            _checkStatusList.Remove(_currentSuspect);
                        }
                        else
                            _currentSuspect = null;

                        if (_currentSuspect == null)
                        {
                            _statusCheckingThread = null;
                            continue;
                        }
                    }

                    lock (_status_mutex)
                    {
                        try
                        {
                            NodeStatus nodeStatus = null;
                            if (_enclosingInstance.ct.ConnectionExist(_currentSuspect))
                            {
                                Message msg = new Message(_currentSuspect, null, new byte[0]);
                                msg.putHeader(HeaderType.KEEP_ALIVE, new HearBeat(HearBeat.ARE_YOU_ALIVE));

                                if (_enclosingInstance.Stack.NCacheLog.IsInfoEnabled) _enclosingInstance.Stack.NCacheLog.Info("ConnectionKeepAlive.CheckStatus", "sending status request to " + _currentSuspect);


                                _enclosingInstance.sendUnicastMessage(msg, false, msg.Payload, Priority.Critical);
                                _statusReceived = null;

                                //wait for the result or timeout occurs first;
                                Monitor.Wait(_status_mutex, _statusTimeout);

                                if (_statusReceived != null)
                                {
                                    HearBeat status = _statusReceived as HearBeat;

                                    
                                    if (_enclosingInstance.Stack.NCacheLog.IsInfoEnabled) _enclosingInstance.Stack.NCacheLog.Info("ConnectionKeepAlive.CheckStatus", "received status " + status + " from " + _currentSuspect);


                                    if (status.Type == HearBeat.I_AM_NOT_DEAD)
                                        nodeStatus = new NodeStatus(_currentSuspect, NodeStatus.IS_ALIVE);
                                    else if (status.Type == HearBeat.I_AM_LEAVING)
                                        nodeStatus = new NodeStatus(_currentSuspect, NodeStatus.IS_LEAVING);
                                    else if (status.Type == HearBeat.I_AM_STARTING)
                                        nodeStatus = new NodeStatus(_currentSuspect, NodeStatus.IS_DEAD);

                                }
                                else
                                {
                                    nodeStatus = new NodeStatus(_currentSuspect, NodeStatus.IS_DEAD);
                                    if (_enclosingInstance.Stack.NCacheLog.IsInfoEnabled) _enclosingInstance.Stack.NCacheLog.Info("ConnectionKeepAlive.CheckStatus", "did not receive status from " + _currentSuspect + "; consider him DEAD");
                                }
                            }
                            else
                            {
                                if (_enclosingInstance.Stack.NCacheLog.IsInfoEnabled) _enclosingInstance.Stack.NCacheLog.Info("ConnectionKeepAlive.CheckStatus", "no connection exists for " + _currentSuspect);
                                nodeStatus = new NodeStatus(_currentSuspect, NodeStatus.IS_DEAD);
                            }

                            Event statusEvent = new Event(Event.GET_NODE_STATUS_OK, nodeStatus);
                            _enclosingInstance.passUp(statusEvent);
                        }
                        catch (Exception e)
                        {
                            _enclosingInstance.Stack.NCacheLog.Error("ConnectionKeepAlive.CheckStatus", e.ToString());
                        }
                        finally
                        {
                            _currentSuspect = null;
                            _statusReceived = null;
                        }
                    }

                }
            }
Beispiel #4
0
        /// <summary>
        /// Verifes whether the given node is dead or not.
        /// </summary>
        /// <param name="suspect">suspected node</param>
        /// <returns>true, if node is dead otherwise false</returns>
        public bool VerifySuspect(Address suspect, bool matchGmsId)
        {
            bool isDead = true;
            string gmsId = null;
            if (suspect != null)
            {
                Stack.NCacheLog.CriticalInfo("GMS.VerifySuspect", " verifying the death of node " + suspect);
                if (Stack.NCacheLog.IsInfoEnabled) Stack.NCacheLog.Info("GMS.VerifySuspect", " verifying the death of node " + suspect);

                gmsId = GmsIds[suspect] as string;
                lock (suspect_verify_mutex)
                {
                    nodeStatus = null;
                    nodeTobeSuspect = suspect;
                    passDown(new Event(Event.GET_NODE_STATUS, suspect, Priority.Critical));
                    //we wait for the verification
                    
                    Monitor.Wait(suspect_verify_mutex);
                    if (nodeStatus != null)
                    {
                        if (Stack.NCacheLog.IsInfoEnabled) Stack.NCacheLog.Info("GMS.VerifySuspect", " node status is " + nodeStatus.ToString());
                        switch (nodeStatus.Status)
                        {
                            case NodeStatus.IS_ALIVE: isDead = false; break;
                            case NodeStatus.IS_DEAD: isDead = true; break;
                            case NodeStatus.IS_LEAVING: isDead = true; break;

                        }
                    }
                }

            }

            if (isDead && matchGmsId)
            {
                //we verify whether current gms id is same as when node was reported suspect.
                string currentGmsId = GmsIds[suspect] as string;

                if (currentGmsId != null && gmsId != null && currentGmsId.Equals(gmsId))
                    return true;
                else
                {
                    if (Stack.NCacheLog.IsErrorEnabled) Stack.NCacheLog.CriticalInfo("GMS.VerifySuspect", "node gms ids differ; old : " + gmsId + " new: " + currentGmsId +  nodeStatus.ToString());
                    return false;
                }
            }

            return isDead;
        }