Exemplo n.º 1
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Org.Apache.Hadoop.HA.ServiceFailedException"/>
        private int TransitionToActive(CommandLine cmd)
        {
            string[] argv = cmd.GetArgs();
            if (argv.Length != 1)
            {
                errOut.WriteLine("transitionToActive: incorrect number of arguments");
                PrintUsage(errOut, "-transitionToActive");
                return(-1);
            }

            /*  returns true if other target node is active or some exception occurred
             * and forceActive was not set  */
            if (!cmd.HasOption(Forceactive))
            {
                if (IsOtherTargetNodeActive(argv[0], cmd.HasOption(Forceactive)))
                {
                    return(-1);
                }
            }
            HAServiceTarget target = ResolveTarget(argv[0]);

            if (!CheckManualStateManagementOK(target))
            {
                return(-1);
            }
            HAServiceProtocol proto = target.GetProxy(GetConf(), 0);

            HAServiceProtocolHelper.TransitionToActive(proto, CreateReqInfo());
            return(0);
        }
Exemplo n.º 2
0
 /// <exception cref="Org.Apache.Hadoop.HA.ServiceFailedException"/>
 private void BecomeActive()
 {
     lock (this)
     {
         Log.Info("Trying to make " + localTarget + " active...");
         try
         {
             HAServiceProtocolHelper.TransitionToActive(localTarget.GetProxy(conf, FailoverController
                                                                             .GetRpcTimeoutToNewActive(conf)), CreateReqInfo());
             string msg = "Successfully transitioned " + localTarget + " to active state";
             Log.Info(msg);
             serviceState = HAServiceProtocol.HAServiceState.Active;
             RecordActiveAttempt(new ZKFailoverController.ActiveAttemptRecord(true, msg));
         }
         catch (Exception t)
         {
             string msg = "Couldn't make " + localTarget + " active";
             Log.Fatal(msg, t);
             RecordActiveAttempt(new ZKFailoverController.ActiveAttemptRecord(false, msg + "\n"
                                                                              + StringUtils.StringifyException(t)));
             if (t is ServiceFailedException)
             {
                 throw (ServiceFailedException)t;
             }
             else
             {
                 throw new ServiceFailedException("Couldn't transition to active", t);
             }
         }
     }
 }
Exemplo n.º 3
0
        /// <summary>
        /// Perform pre-failover checks on the given service we plan to
        /// failover to, eg to prevent failing over to a service (eg due
        /// to it being inaccessible, already active, not healthy, etc).
        /// </summary>
        /// <remarks>
        /// Perform pre-failover checks on the given service we plan to
        /// failover to, eg to prevent failing over to a service (eg due
        /// to it being inaccessible, already active, not healthy, etc).
        /// An option to ignore toSvc if it claims it is not ready to
        /// become active is provided in case performing a failover will
        /// allow it to become active, eg because it triggers a log roll
        /// so the standby can learn about new blocks and leave safemode.
        /// </remarks>
        /// <param name="from">currently active service</param>
        /// <param name="target">service to make active</param>
        /// <param name="forceActive">ignore toSvc if it reports that it is not ready</param>
        /// <exception cref="FailoverFailedException">if we should avoid failover</exception>
        /// <exception cref="Org.Apache.Hadoop.HA.FailoverFailedException"/>
        private void PreFailoverChecks(HAServiceTarget from, HAServiceTarget target, bool
                                       forceActive)
        {
            HAServiceStatus   toSvcStatus;
            HAServiceProtocol toSvc;

            if (from.GetAddress().Equals(target.GetAddress()))
            {
                throw new FailoverFailedException("Can't failover a service to itself");
            }
            try
            {
                toSvc       = target.GetProxy(conf, rpcTimeoutToNewActive);
                toSvcStatus = toSvc.GetServiceStatus();
            }
            catch (IOException e)
            {
                string msg = "Unable to get service state for " + target;
                Log.Error(msg + ": " + e.GetLocalizedMessage());
                throw new FailoverFailedException(msg, e);
            }
            if (!toSvcStatus.GetState().Equals(HAServiceProtocol.HAServiceState.Standby))
            {
                throw new FailoverFailedException("Can't failover to an active service");
            }
            if (!toSvcStatus.IsReadyToBecomeActive())
            {
                string notReadyReason = toSvcStatus.GetNotReadyReason();
                if (!forceActive)
                {
                    throw new FailoverFailedException(target + " is not ready to become active: " + notReadyReason
                                                      );
                }
                else
                {
                    Log.Warn("Service is not ready to become active, but forcing: " + notReadyReason);
                }
            }
            try
            {
                HAServiceProtocolHelper.MonitorHealth(toSvc, CreateReqInfo());
            }
            catch (HealthCheckFailedException hce)
            {
                throw new FailoverFailedException("Can't failover to an unhealthy service", hce);
            }
            catch (IOException e)
            {
                throw new FailoverFailedException("Got an IO exception", e);
            }
        }
Exemplo n.º 4
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Org.Apache.Hadoop.HA.ServiceFailedException"/>
        private int TransitionToStandby(CommandLine cmd)
        {
            string[] argv = cmd.GetArgs();
            if (argv.Length != 1)
            {
                errOut.WriteLine("transitionToStandby: incorrect number of arguments");
                PrintUsage(errOut, "-transitionToStandby");
                return(-1);
            }
            HAServiceTarget target = ResolveTarget(argv[0]);

            if (!CheckManualStateManagementOK(target))
            {
                return(-1);
            }
            HAServiceProtocol proto = target.GetProxy(GetConf(), 0);

            HAServiceProtocolHelper.TransitionToStandby(proto, CreateReqInfo());
            return(0);
        }
Exemplo n.º 5
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Org.Apache.Hadoop.HA.ServiceFailedException"/>
        private int CheckHealth(CommandLine cmd)
        {
            string[] argv = cmd.GetArgs();
            if (argv.Length != 1)
            {
                errOut.WriteLine("checkHealth: incorrect number of arguments");
                PrintUsage(errOut, "-checkHealth");
                return(-1);
            }
            HAServiceProtocol proto = ResolveTarget(argv[0]).GetProxy(GetConf(), rpcTimeoutForChecks
                                                                      );

            try
            {
                HAServiceProtocolHelper.MonitorHealth(proto, CreateReqInfo());
            }
            catch (HealthCheckFailedException e)
            {
                errOut.WriteLine("Health check failed: " + e.GetLocalizedMessage());
                return(-1);
            }
            return(0);
        }
Exemplo n.º 6
0
        /// <summary>Failover from service 1 to service 2.</summary>
        /// <remarks>
        /// Failover from service 1 to service 2. If the failover fails
        /// then try to failback.
        /// </remarks>
        /// <param name="fromSvc">currently active service</param>
        /// <param name="toSvc">service to make active</param>
        /// <param name="forceFence">to fence fromSvc even if not strictly necessary</param>
        /// <param name="forceActive">try to make toSvc active even if it is not ready</param>
        /// <exception cref="FailoverFailedException">if the failover fails</exception>
        /// <exception cref="Org.Apache.Hadoop.HA.FailoverFailedException"/>
        public virtual void Failover(HAServiceTarget fromSvc, HAServiceTarget toSvc, bool
                                     forceFence, bool forceActive)
        {
            Preconditions.CheckArgument(fromSvc.GetFencer() != null, "failover requires a fencer"
                                        );
            PreFailoverChecks(fromSvc, toSvc, forceActive);
            // Try to make fromSvc standby
            bool tryFence = true;

            if (TryGracefulFence(fromSvc))
            {
                tryFence = forceFence;
            }
            // Fence fromSvc if it's required or forced by the user
            if (tryFence)
            {
                if (!fromSvc.GetFencer().Fence(fromSvc))
                {
                    throw new FailoverFailedException("Unable to fence " + fromSvc + ". Fencing failed."
                                                      );
                }
            }
            // Try to make toSvc active
            bool      failed = false;
            Exception cause  = null;

            try
            {
                HAServiceProtocolHelper.TransitionToActive(toSvc.GetProxy(conf, rpcTimeoutToNewActive
                                                                          ), CreateReqInfo());
            }
            catch (ServiceFailedException sfe)
            {
                Log.Error("Unable to make " + toSvc + " active (" + sfe.Message + "). Failing back."
                          );
                failed = true;
                cause  = sfe;
            }
            catch (IOException ioe)
            {
                Log.Error("Unable to make " + toSvc + " active (unable to connect). Failing back."
                          , ioe);
                failed = true;
                cause  = ioe;
            }
            // We failed to make toSvc active
            if (failed)
            {
                string msg = "Unable to failover to " + toSvc;
                // Only try to failback if we didn't fence fromSvc
                if (!tryFence)
                {
                    try
                    {
                        // Unconditionally fence toSvc in case it is still trying to
                        // become active, eg we timed out waiting for its response.
                        // Unconditionally force fromSvc to become active since it
                        // was previously active when we initiated failover.
                        Failover(toSvc, fromSvc, true, true);
                    }
                    catch (FailoverFailedException ffe)
                    {
                        msg += ". Failback to " + fromSvc + " failed (" + ffe.Message + ")";
                        Log.Fatal(msg);
                    }
                }
                throw new FailoverFailedException(msg, cause);
            }
        }