Example #1
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Org.Apache.Hadoop.HA.ServiceFailedException"/>
        private int TransitionToActive(CommandLine cmd)
        {
            string[] argv = cmd.GetArgs();
            if (argv.Length != 1)
            {
                errOut.WriteLine("transitionToActive: incorrect number of arguments");
                PrintUsage(errOut, "-transitionToActive");
                return(-1);
            }

            /*  returns true if other target node is active or some exception occurred
             * and forceActive was not set  */
            if (!cmd.HasOption(Forceactive))
            {
                if (IsOtherTargetNodeActive(argv[0], cmd.HasOption(Forceactive)))
                {
                    return(-1);
                }
            }
            HAServiceTarget target = ResolveTarget(argv[0]);

            if (!CheckManualStateManagementOK(target))
            {
                return(-1);
            }
            HAServiceProtocol proto = target.GetProxy(GetConf(), 0);

            HAServiceProtocolHelper.TransitionToActive(proto, CreateReqInfo());
            return(0);
        }
        /// <summary>Try to get the HA state of the node at the given address.</summary>
        /// <remarks>
        /// Try to get the HA state of the node at the given address. This
        /// function is guaranteed to be "quick" -- ie it has a short timeout
        /// and no retries. Its only purpose is to avoid fencing a node that
        /// has already restarted.
        /// </remarks>
        internal virtual bool TryGracefulFence(HAServiceTarget svc)
        {
            HAServiceProtocol proxy = null;

            try
            {
                proxy = svc.GetProxy(gracefulFenceConf, gracefulFenceTimeout);
                proxy.TransitionToStandby(CreateReqInfo());
                return(true);
            }
            catch (ServiceFailedException sfe)
            {
                Log.Warn("Unable to gracefully make " + svc + " standby (" + sfe.Message + ")");
            }
            catch (IOException ioe)
            {
                Log.Warn("Unable to gracefully make " + svc + " standby (unable to connect)", ioe
                         );
            }
            finally
            {
                if (proxy != null)
                {
                    RPC.StopProxy(proxy);
                }
            }
            return(false);
        }
        /// <summary>
        /// Perform pre-failover checks on the given service we plan to
        /// failover to, eg to prevent failing over to a service (eg due
        /// to it being inaccessible, already active, not healthy, etc).
        /// </summary>
        /// <remarks>
        /// Perform pre-failover checks on the given service we plan to
        /// failover to, eg to prevent failing over to a service (eg due
        /// to it being inaccessible, already active, not healthy, etc).
        /// An option to ignore toSvc if it claims it is not ready to
        /// become active is provided in case performing a failover will
        /// allow it to become active, eg because it triggers a log roll
        /// so the standby can learn about new blocks and leave safemode.
        /// </remarks>
        /// <param name="from">currently active service</param>
        /// <param name="target">service to make active</param>
        /// <param name="forceActive">ignore toSvc if it reports that it is not ready</param>
        /// <exception cref="FailoverFailedException">if we should avoid failover</exception>
        /// <exception cref="Org.Apache.Hadoop.HA.FailoverFailedException"/>
        private void PreFailoverChecks(HAServiceTarget from, HAServiceTarget target, bool
                                       forceActive)
        {
            HAServiceStatus   toSvcStatus;
            HAServiceProtocol toSvc;

            if (from.GetAddress().Equals(target.GetAddress()))
            {
                throw new FailoverFailedException("Can't failover a service to itself");
            }
            try
            {
                toSvc       = target.GetProxy(conf, rpcTimeoutToNewActive);
                toSvcStatus = toSvc.GetServiceStatus();
            }
            catch (IOException e)
            {
                string msg = "Unable to get service state for " + target;
                Log.Error(msg + ": " + e.GetLocalizedMessage());
                throw new FailoverFailedException(msg, e);
            }
            if (!toSvcStatus.GetState().Equals(HAServiceProtocol.HAServiceState.Standby))
            {
                throw new FailoverFailedException("Can't failover to an active service");
            }
            if (!toSvcStatus.IsReadyToBecomeActive())
            {
                string notReadyReason = toSvcStatus.GetNotReadyReason();
                if (!forceActive)
                {
                    throw new FailoverFailedException(target + " is not ready to become active: " + notReadyReason
                                                      );
                }
                else
                {
                    Log.Warn("Service is not ready to become active, but forcing: " + notReadyReason);
                }
            }
            try
            {
                HAServiceProtocolHelper.MonitorHealth(toSvc, CreateReqInfo());
            }
            catch (HealthCheckFailedException hce)
            {
                throw new FailoverFailedException("Can't failover to an unhealthy service", hce);
            }
            catch (IOException e)
            {
                throw new FailoverFailedException("Got an IO exception", e);
            }
        }
Example #4
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Org.Apache.Hadoop.HA.ServiceFailedException"/>
        private int TransitionToStandby(CommandLine cmd)
        {
            string[] argv = cmd.GetArgs();
            if (argv.Length != 1)
            {
                errOut.WriteLine("transitionToStandby: incorrect number of arguments");
                PrintUsage(errOut, "-transitionToStandby");
                return(-1);
            }
            HAServiceTarget target = ResolveTarget(argv[0]);

            if (!CheckManualStateManagementOK(target))
            {
                return(-1);
            }
            HAServiceProtocol proto = target.GetProxy(GetConf(), 0);

            HAServiceProtocolHelper.TransitionToStandby(proto, CreateReqInfo());
            return(0);
        }
Example #5
0
        /// <summary>Checks whether other target node is active or not</summary>
        /// <param name="targetNodeToActivate"/>
        /// <returns>
        /// true if other target node is active or some other exception
        /// occurred and forceActive was set otherwise false
        /// </returns>
        /// <exception cref="System.IO.IOException"/>
        private bool IsOtherTargetNodeActive(string targetNodeToActivate, bool forceActive
                                             )
        {
            ICollection <string> targetIds = GetTargetIds(targetNodeToActivate);

            targetIds.Remove(targetNodeToActivate);
            foreach (string targetId in targetIds)
            {
                HAServiceTarget target = ResolveTarget(targetId);
                if (!CheckManualStateManagementOK(target))
                {
                    return(true);
                }
                try
                {
                    HAServiceProtocol proto = target.GetProxy(GetConf(), 5000);
                    if (proto.GetServiceStatus().GetState() == HAServiceProtocol.HAServiceState.Active)
                    {
                        errOut.WriteLine("transitionToActive: Node " + targetId + " is already active");
                        PrintUsage(errOut, "-transitionToActive");
                        return(true);
                    }
                }
                catch (Exception e)
                {
                    //If forceActive switch is false then return true
                    if (!forceActive)
                    {
                        errOut.WriteLine("Unexpected error occurred  " + e.Message);
                        PrintUsage(errOut, "-transitionToActive");
                        return(true);
                    }
                }
            }
            return(false);
        }
        /// <summary>Failover from service 1 to service 2.</summary>
        /// <remarks>
        /// Failover from service 1 to service 2. If the failover fails
        /// then try to failback.
        /// </remarks>
        /// <param name="fromSvc">currently active service</param>
        /// <param name="toSvc">service to make active</param>
        /// <param name="forceFence">to fence fromSvc even if not strictly necessary</param>
        /// <param name="forceActive">try to make toSvc active even if it is not ready</param>
        /// <exception cref="FailoverFailedException">if the failover fails</exception>
        /// <exception cref="Org.Apache.Hadoop.HA.FailoverFailedException"/>
        public virtual void Failover(HAServiceTarget fromSvc, HAServiceTarget toSvc, bool
                                     forceFence, bool forceActive)
        {
            Preconditions.CheckArgument(fromSvc.GetFencer() != null, "failover requires a fencer"
                                        );
            PreFailoverChecks(fromSvc, toSvc, forceActive);
            // Try to make fromSvc standby
            bool tryFence = true;

            if (TryGracefulFence(fromSvc))
            {
                tryFence = forceFence;
            }
            // Fence fromSvc if it's required or forced by the user
            if (tryFence)
            {
                if (!fromSvc.GetFencer().Fence(fromSvc))
                {
                    throw new FailoverFailedException("Unable to fence " + fromSvc + ". Fencing failed."
                                                      );
                }
            }
            // Try to make toSvc active
            bool      failed = false;
            Exception cause  = null;

            try
            {
                HAServiceProtocolHelper.TransitionToActive(toSvc.GetProxy(conf, rpcTimeoutToNewActive
                                                                          ), CreateReqInfo());
            }
            catch (ServiceFailedException sfe)
            {
                Log.Error("Unable to make " + toSvc + " active (" + sfe.Message + "). Failing back."
                          );
                failed = true;
                cause  = sfe;
            }
            catch (IOException ioe)
            {
                Log.Error("Unable to make " + toSvc + " active (unable to connect). Failing back."
                          , ioe);
                failed = true;
                cause  = ioe;
            }
            // We failed to make toSvc active
            if (failed)
            {
                string msg = "Unable to failover to " + toSvc;
                // Only try to failback if we didn't fence fromSvc
                if (!tryFence)
                {
                    try
                    {
                        // Unconditionally fence toSvc in case it is still trying to
                        // become active, eg we timed out waiting for its response.
                        // Unconditionally force fromSvc to become active since it
                        // was previously active when we initiated failover.
                        Failover(toSvc, fromSvc, true, true);
                    }
                    catch (FailoverFailedException ffe)
                    {
                        msg += ". Failback to " + fromSvc + " failed (" + ffe.Message + ")";
                        Log.Fatal(msg);
                    }
                }
                throw new FailoverFailedException(msg, cause);
            }
        }