/// <exception cref="System.IO.IOException"/> /// <exception cref="Org.Apache.Hadoop.HA.ServiceFailedException"/> private int TransitionToActive(CommandLine cmd) { string[] argv = cmd.GetArgs(); if (argv.Length != 1) { errOut.WriteLine("transitionToActive: incorrect number of arguments"); PrintUsage(errOut, "-transitionToActive"); return(-1); } /* returns true if other target node is active or some exception occurred * and forceActive was not set */ if (!cmd.HasOption(Forceactive)) { if (IsOtherTargetNodeActive(argv[0], cmd.HasOption(Forceactive))) { return(-1); } } HAServiceTarget target = ResolveTarget(argv[0]); if (!CheckManualStateManagementOK(target)) { return(-1); } HAServiceProtocol proto = target.GetProxy(GetConf(), 0); HAServiceProtocolHelper.TransitionToActive(proto, CreateReqInfo()); return(0); }
/// <exception cref="Org.Apache.Hadoop.HA.ServiceFailedException"/> private void BecomeActive() { lock (this) { Log.Info("Trying to make " + localTarget + " active..."); try { HAServiceProtocolHelper.TransitionToActive(localTarget.GetProxy(conf, FailoverController .GetRpcTimeoutToNewActive(conf)), CreateReqInfo()); string msg = "Successfully transitioned " + localTarget + " to active state"; Log.Info(msg); serviceState = HAServiceProtocol.HAServiceState.Active; RecordActiveAttempt(new ZKFailoverController.ActiveAttemptRecord(true, msg)); } catch (Exception t) { string msg = "Couldn't make " + localTarget + " active"; Log.Fatal(msg, t); RecordActiveAttempt(new ZKFailoverController.ActiveAttemptRecord(false, msg + "\n" + StringUtils.StringifyException(t))); if (t is ServiceFailedException) { throw (ServiceFailedException)t; } else { throw new ServiceFailedException("Couldn't transition to active", t); } } } }
/// <summary> /// Perform pre-failover checks on the given service we plan to /// failover to, eg to prevent failing over to a service (eg due /// to it being inaccessible, already active, not healthy, etc). /// </summary> /// <remarks> /// Perform pre-failover checks on the given service we plan to /// failover to, eg to prevent failing over to a service (eg due /// to it being inaccessible, already active, not healthy, etc). /// An option to ignore toSvc if it claims it is not ready to /// become active is provided in case performing a failover will /// allow it to become active, eg because it triggers a log roll /// so the standby can learn about new blocks and leave safemode. /// </remarks> /// <param name="from">currently active service</param> /// <param name="target">service to make active</param> /// <param name="forceActive">ignore toSvc if it reports that it is not ready</param> /// <exception cref="FailoverFailedException">if we should avoid failover</exception> /// <exception cref="Org.Apache.Hadoop.HA.FailoverFailedException"/> private void PreFailoverChecks(HAServiceTarget from, HAServiceTarget target, bool forceActive) { HAServiceStatus toSvcStatus; HAServiceProtocol toSvc; if (from.GetAddress().Equals(target.GetAddress())) { throw new FailoverFailedException("Can't failover a service to itself"); } try { toSvc = target.GetProxy(conf, rpcTimeoutToNewActive); toSvcStatus = toSvc.GetServiceStatus(); } catch (IOException e) { string msg = "Unable to get service state for " + target; Log.Error(msg + ": " + e.GetLocalizedMessage()); throw new FailoverFailedException(msg, e); } if (!toSvcStatus.GetState().Equals(HAServiceProtocol.HAServiceState.Standby)) { throw new FailoverFailedException("Can't failover to an active service"); } if (!toSvcStatus.IsReadyToBecomeActive()) { string notReadyReason = toSvcStatus.GetNotReadyReason(); if (!forceActive) { throw new FailoverFailedException(target + " is not ready to become active: " + notReadyReason ); } else { Log.Warn("Service is not ready to become active, but forcing: " + notReadyReason); } } try { HAServiceProtocolHelper.MonitorHealth(toSvc, CreateReqInfo()); } catch (HealthCheckFailedException hce) { throw new FailoverFailedException("Can't failover to an unhealthy service", hce); } catch (IOException e) { throw new FailoverFailedException("Got an IO exception", e); } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="Org.Apache.Hadoop.HA.ServiceFailedException"/> private int TransitionToStandby(CommandLine cmd) { string[] argv = cmd.GetArgs(); if (argv.Length != 1) { errOut.WriteLine("transitionToStandby: incorrect number of arguments"); PrintUsage(errOut, "-transitionToStandby"); return(-1); } HAServiceTarget target = ResolveTarget(argv[0]); if (!CheckManualStateManagementOK(target)) { return(-1); } HAServiceProtocol proto = target.GetProxy(GetConf(), 0); HAServiceProtocolHelper.TransitionToStandby(proto, CreateReqInfo()); return(0); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="Org.Apache.Hadoop.HA.ServiceFailedException"/> private int CheckHealth(CommandLine cmd) { string[] argv = cmd.GetArgs(); if (argv.Length != 1) { errOut.WriteLine("checkHealth: incorrect number of arguments"); PrintUsage(errOut, "-checkHealth"); return(-1); } HAServiceProtocol proto = ResolveTarget(argv[0]).GetProxy(GetConf(), rpcTimeoutForChecks ); try { HAServiceProtocolHelper.MonitorHealth(proto, CreateReqInfo()); } catch (HealthCheckFailedException e) { errOut.WriteLine("Health check failed: " + e.GetLocalizedMessage()); return(-1); } return(0); }
/// <summary>Failover from service 1 to service 2.</summary> /// <remarks> /// Failover from service 1 to service 2. If the failover fails /// then try to failback. /// </remarks> /// <param name="fromSvc">currently active service</param> /// <param name="toSvc">service to make active</param> /// <param name="forceFence">to fence fromSvc even if not strictly necessary</param> /// <param name="forceActive">try to make toSvc active even if it is not ready</param> /// <exception cref="FailoverFailedException">if the failover fails</exception> /// <exception cref="Org.Apache.Hadoop.HA.FailoverFailedException"/> public virtual void Failover(HAServiceTarget fromSvc, HAServiceTarget toSvc, bool forceFence, bool forceActive) { Preconditions.CheckArgument(fromSvc.GetFencer() != null, "failover requires a fencer" ); PreFailoverChecks(fromSvc, toSvc, forceActive); // Try to make fromSvc standby bool tryFence = true; if (TryGracefulFence(fromSvc)) { tryFence = forceFence; } // Fence fromSvc if it's required or forced by the user if (tryFence) { if (!fromSvc.GetFencer().Fence(fromSvc)) { throw new FailoverFailedException("Unable to fence " + fromSvc + ". Fencing failed." ); } } // Try to make toSvc active bool failed = false; Exception cause = null; try { HAServiceProtocolHelper.TransitionToActive(toSvc.GetProxy(conf, rpcTimeoutToNewActive ), CreateReqInfo()); } catch (ServiceFailedException sfe) { Log.Error("Unable to make " + toSvc + " active (" + sfe.Message + "). Failing back." ); failed = true; cause = sfe; } catch (IOException ioe) { Log.Error("Unable to make " + toSvc + " active (unable to connect). Failing back." , ioe); failed = true; cause = ioe; } // We failed to make toSvc active if (failed) { string msg = "Unable to failover to " + toSvc; // Only try to failback if we didn't fence fromSvc if (!tryFence) { try { // Unconditionally fence toSvc in case it is still trying to // become active, eg we timed out waiting for its response. // Unconditionally force fromSvc to become active since it // was previously active when we initiated failover. Failover(toSvc, fromSvc, true, true); } catch (FailoverFailedException ffe) { msg += ". Failback to " + fromSvc + " failed (" + ffe.Message + ")"; Log.Fatal(msg); } } throw new FailoverFailedException(msg, cause); } }