示例#1
0
 public virtual bool TryFence(HAServiceTarget target, string args)
 {
     fencedSvc = target;
     callArgs.AddItem(args);
     fenceCalled++;
     return(false);
 }
示例#2
0
        private void DoFence(HAServiceTarget target)
        {
            Log.Info("Should fence: " + target);
            bool gracefulWorked = new FailoverController(conf, HAServiceProtocol.RequestSource
                                                         .RequestByZkfc).TryGracefulFence(target);

            if (gracefulWorked)
            {
                // It's possible that it's in standby but just about to go into active,
                // no? Is there some race here?
                Log.Info("Successfully transitioned " + target + " to standby " + "state without fencing"
                         );
                return;
            }
            try
            {
                target.CheckFencingConfigured();
            }
            catch (BadFencingConfigurationException e)
            {
                Log.Error("Couldn't fence old active " + target, e);
                RecordActiveAttempt(new ZKFailoverController.ActiveAttemptRecord(false, "Unable to fence old active"
                                                                                 ));
                throw new RuntimeException(e);
            }
            if (!target.GetFencer().Fence(target))
            {
                throw new RuntimeException("Unable to fence " + target);
            }
        }
示例#3
0
        // Nothing else we can really check without actually running the command
        public virtual bool TryFence(HAServiceTarget target, string cmd)
        {
            ProcessStartInfo builder;

            if (!Shell.Windows)
            {
                builder = new ProcessStartInfo("bash", "-e", "-c", cmd);
            }
            else
            {
                builder = new ProcessStartInfo("cmd.exe", "/c", cmd);
            }
            SetConfAsEnvVars(builder.EnvironmentVariables);
            AddTargetInfoAsEnvVars(target, builder.EnvironmentVariables);
            SystemProcess p;

            try
            {
                p = builder.Start();
                p.GetOutputStream().Close();
            }
            catch (IOException e)
            {
                Log.Warn("Unable to execute " + cmd, e);
                return(false);
            }
            string pid = TryGetPid(p);

            Log.Info("Launched fencing command '" + cmd + "' with " + ((pid != null) ? ("pid "
                                                                                        + pid) : "unknown pid"));
            string logPrefix = Abbreviate(cmd, AbbrevLength);

            if (pid != null)
            {
                logPrefix = "[PID " + pid + "] " + logPrefix;
            }
            // Pump logs to stderr
            StreamPumper errPumper = new StreamPumper(Log, logPrefix, p.GetErrorStream(), StreamPumper.StreamType
                                                      .Stderr);

            errPumper.Start();
            StreamPumper outPumper = new StreamPumper(Log, logPrefix, p.GetInputStream(), StreamPumper.StreamType
                                                      .Stdout);

            outPumper.Start();
            int rc;

            try
            {
                rc = p.WaitFor();
                errPumper.Join();
                outPumper.Join();
            }
            catch (Exception)
            {
                Log.Warn("Interrupted while waiting for fencing command: " + cmd);
                return(false);
            }
            return(rc == 0);
        }
示例#4
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Org.Apache.Hadoop.HA.ServiceFailedException"/>
        private int TransitionToActive(CommandLine cmd)
        {
            string[] argv = cmd.GetArgs();
            if (argv.Length != 1)
            {
                errOut.WriteLine("transitionToActive: incorrect number of arguments");
                PrintUsage(errOut, "-transitionToActive");
                return(-1);
            }

            /*  returns true if other target node is active or some exception occurred
             * and forceActive was not set  */
            if (!cmd.HasOption(Forceactive))
            {
                if (IsOtherTargetNodeActive(argv[0], cmd.HasOption(Forceactive)))
                {
                    return(-1);
                }
            }
            HAServiceTarget target = ResolveTarget(argv[0]);

            if (!CheckManualStateManagementOK(target))
            {
                return(-1);
            }
            HAServiceProtocol proto = target.GetProxy(GetConf(), 0);

            HAServiceProtocolHelper.TransitionToActive(proto, CreateReqInfo());
            return(0);
        }
示例#5
0
        /// <summary>Try to get the HA state of the node at the given address.</summary>
        /// <remarks>
        /// Try to get the HA state of the node at the given address. This
        /// function is guaranteed to be "quick" -- ie it has a short timeout
        /// and no retries. Its only purpose is to avoid fencing a node that
        /// has already restarted.
        /// </remarks>
        internal virtual bool TryGracefulFence(HAServiceTarget svc)
        {
            HAServiceProtocol proxy = null;

            try
            {
                proxy = svc.GetProxy(gracefulFenceConf, gracefulFenceTimeout);
                proxy.TransitionToStandby(CreateReqInfo());
                return(true);
            }
            catch (ServiceFailedException sfe)
            {
                Log.Warn("Unable to gracefully make " + svc + " standby (" + sfe.Message + ")");
            }
            catch (IOException ioe)
            {
                Log.Warn("Unable to gracefully make " + svc + " standby (unable to connect)", ioe
                         );
            }
            finally
            {
                if (proxy != null)
                {
                    RPC.StopProxy(proxy);
                }
            }
            return(false);
        }
示例#6
0
        public virtual bool Fence(HAServiceTarget fromSvc)
        {
            Log.Info("====== Beginning Service Fencing Process... ======");
            int i = 0;

            foreach (NodeFencer.FenceMethodWithArg method in methods)
            {
                Log.Info("Trying method " + (++i) + "/" + methods.Count + ": " + method);
                try
                {
                    if (method.method.TryFence(fromSvc, method.arg))
                    {
                        Log.Info("====== Fencing successful by method " + method + " ======");
                        return(true);
                    }
                }
                catch (BadFencingConfigurationException e)
                {
                    Log.Error("Fencing method " + method + " misconfigured", e);
                    continue;
                }
                catch (Exception t)
                {
                    Log.Error("Fencing method " + method + " failed with an unexpected error.", t);
                    continue;
                }
                Log.Warn("Fencing method " + method + " was unsuccessful.");
            }
            Log.Error("Unable to fence service by any configured method.");
            return(false);
        }
示例#7
0
        /// <exception cref="Org.Apache.Hadoop.HA.FailoverFailedException"/>
        private void DoFailover(HAServiceTarget tgt1, HAServiceTarget tgt2, bool forceFence
                                , bool forceActive)
        {
            FailoverController fc = new FailoverController(conf, HAServiceProtocol.RequestSource
                                                           .RequestByUser);

            fc.Failover(tgt1, tgt2, forceFence, forceActive);
        }
示例#8
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Org.Apache.Hadoop.HA.ServiceFailedException"/>
        private int Failover(CommandLine cmd)
        {
            bool forceFence  = cmd.HasOption(Forcefence);
            bool forceActive = cmd.HasOption(Forceactive);
            int  numOpts     = cmd.GetOptions() == null ? 0 : cmd.GetOptions().Length;

            string[] args = cmd.GetArgs();
            if (numOpts > 3 || args.Length != 2)
            {
                errOut.WriteLine("failover: incorrect arguments");
                PrintUsage(errOut, "-failover");
                return(-1);
            }
            HAServiceTarget fromNode = ResolveTarget(args[0]);
            HAServiceTarget toNode   = ResolveTarget(args[1]);

            // Check that auto-failover is consistently configured for both nodes.
            Preconditions.CheckState(fromNode.IsAutoFailoverEnabled() == toNode.IsAutoFailoverEnabled
                                         (), "Inconsistent auto-failover configs between %s and %s!", fromNode, toNode);
            if (fromNode.IsAutoFailoverEnabled())
            {
                if (forceFence || forceActive)
                {
                    // -forceActive doesn't make sense with auto-HA, since, if the node
                    // is not healthy, then its ZKFC will immediately quit the election
                    // again the next time a health check runs.
                    //
                    // -forceFence doesn't seem to have any real use cases with auto-HA
                    // so it isn't implemented.
                    errOut.WriteLine(Forcefence + " and " + Forceactive + " flags not " + "supported with auto-failover enabled."
                                     );
                    return(-1);
                }
                try
                {
                    return(GracefulFailoverThroughZKFCs(toNode));
                }
                catch (NotSupportedException e)
                {
                    errOut.WriteLine("Failover command is not supported with " + "auto-failover enabled: "
                                     + e.GetLocalizedMessage());
                    return(-1);
                }
            }
            FailoverController fc = new FailoverController(GetConf(), requestSource);

            try
            {
                fc.Failover(fromNode, toNode, forceFence, forceActive);
                @out.WriteLine("Failover from " + args[0] + " to " + args[1] + " successful");
            }
            catch (FailoverFailedException ffe)
            {
                errOut.WriteLine("Failover failed: " + ffe.GetLocalizedMessage());
                return(-1);
            }
            return(0);
        }
示例#9
0
 /// <summary>
 /// Add information about the target to the the environment of the
 /// subprocess.
 /// </summary>
 /// <param name="target"/>
 /// <param name="environment"/>
 private void AddTargetInfoAsEnvVars(HAServiceTarget target, IDictionary <string, string
                                                                          > environment)
 {
     foreach (KeyValuePair <string, string> e in target.GetFencingParameters())
     {
         string key = TargetPrefix + e.Key;
         key = key.Replace('.', '_');
         environment[key] = e.Value;
     }
 }
示例#10
0
 public virtual void ClearMockState()
 {
     TestNodeFencer.AlwaysSucceedFencer.fenceCalled = 0;
     TestNodeFencer.AlwaysSucceedFencer.callArgs.Clear();
     TestNodeFencer.AlwaysFailFencer.fenceCalled = 0;
     TestNodeFencer.AlwaysFailFencer.callArgs.Clear();
     MockTarget = Org.Mockito.Mockito.Mock <HAServiceTarget>();
     Org.Mockito.Mockito.DoReturn("my mock").When(MockTarget).ToString();
     Org.Mockito.Mockito.DoReturn(new IPEndPoint("host", 1234)).When(MockTarget).GetAddress
         ();
 }
示例#11
0
 internal HealthMonitor(Configuration conf, HAServiceTarget target)
 {
     this.targetToMonitor            = target;
     this.conf                       = conf;
     this.sleepAfterDisconnectMillis = conf.GetLong(HaHmSleepAfterDisconnectKey, HaHmSleepAfterDisconnectDefault
                                                    );
     this.checkIntervalMillis = conf.GetLong(HaHmCheckIntervalKey, HaHmCheckIntervalDefault
                                             );
     this.connectRetryInterval = conf.GetLong(HaHmConnectRetryIntervalKey, HaHmConnectRetryIntervalDefault
                                              );
     this.rpcTimeout = conf.GetInt(HaHmRpcTimeoutKey, HaHmRpcTimeoutDefault);
     this.daemon     = new HealthMonitor.MonitorDaemon(this);
 }
示例#12
0
        /// <summary>
        /// Perform pre-failover checks on the given service we plan to
        /// failover to, eg to prevent failing over to a service (eg due
        /// to it being inaccessible, already active, not healthy, etc).
        /// </summary>
        /// <remarks>
        /// Perform pre-failover checks on the given service we plan to
        /// failover to, eg to prevent failing over to a service (eg due
        /// to it being inaccessible, already active, not healthy, etc).
        /// An option to ignore toSvc if it claims it is not ready to
        /// become active is provided in case performing a failover will
        /// allow it to become active, eg because it triggers a log roll
        /// so the standby can learn about new blocks and leave safemode.
        /// </remarks>
        /// <param name="from">currently active service</param>
        /// <param name="target">service to make active</param>
        /// <param name="forceActive">ignore toSvc if it reports that it is not ready</param>
        /// <exception cref="FailoverFailedException">if we should avoid failover</exception>
        /// <exception cref="Org.Apache.Hadoop.HA.FailoverFailedException"/>
        private void PreFailoverChecks(HAServiceTarget from, HAServiceTarget target, bool
                                       forceActive)
        {
            HAServiceStatus   toSvcStatus;
            HAServiceProtocol toSvc;

            if (from.GetAddress().Equals(target.GetAddress()))
            {
                throw new FailoverFailedException("Can't failover a service to itself");
            }
            try
            {
                toSvc       = target.GetProxy(conf, rpcTimeoutToNewActive);
                toSvcStatus = toSvc.GetServiceStatus();
            }
            catch (IOException e)
            {
                string msg = "Unable to get service state for " + target;
                Log.Error(msg + ": " + e.GetLocalizedMessage());
                throw new FailoverFailedException(msg, e);
            }
            if (!toSvcStatus.GetState().Equals(HAServiceProtocol.HAServiceState.Standby))
            {
                throw new FailoverFailedException("Can't failover to an active service");
            }
            if (!toSvcStatus.IsReadyToBecomeActive())
            {
                string notReadyReason = toSvcStatus.GetNotReadyReason();
                if (!forceActive)
                {
                    throw new FailoverFailedException(target + " is not ready to become active: " + notReadyReason
                                                      );
                }
                else
                {
                    Log.Warn("Service is not ready to become active, but forcing: " + notReadyReason);
                }
            }
            try
            {
                HAServiceProtocolHelper.MonitorHealth(toSvc, CreateReqInfo());
            }
            catch (HealthCheckFailedException hce)
            {
                throw new FailoverFailedException("Can't failover to an unhealthy service", hce);
            }
            catch (IOException e)
            {
                throw new FailoverFailedException("Got an IO exception", e);
            }
        }
示例#13
0
        /// <summary>Initiate a graceful failover by talking to the target node's ZKFC.</summary>
        /// <remarks>
        /// Initiate a graceful failover by talking to the target node's ZKFC.
        /// This sends an RPC to the ZKFC, which coordinates the failover.
        /// </remarks>
        /// <param name="toNode">the node to fail to</param>
        /// <returns>status code (0 for success)</returns>
        /// <exception cref="System.IO.IOException">if failover does not succeed</exception>
        private int GracefulFailoverThroughZKFCs(HAServiceTarget toNode)
        {
            int          timeout = FailoverController.GetRpcTimeoutToNewActive(GetConf());
            ZKFCProtocol proxy   = toNode.GetZKFCProxy(GetConf(), timeout);

            try
            {
                proxy.GracefulFailover();
                @out.WriteLine("Failover to " + toNode + " successful");
            }
            catch (ServiceFailedException sfe)
            {
                errOut.WriteLine("Failover failed: " + sfe.GetLocalizedMessage());
                return(-1);
            }
            return(0);
        }
示例#14
0
 private void FenceOldActive(byte[] data)
 {
     lock (this)
     {
         HAServiceTarget target = DataToTarget(data);
         try
         {
             DoFence(target);
         }
         catch (Exception t)
         {
             RecordActiveAttempt(new ZKFailoverController.ActiveAttemptRecord(false, "Unable to fence old active: "
                                                                              + StringUtils.StringifyException(t)));
             Throwables.Propagate(t);
         }
     }
 }
示例#15
0
            /// <exception cref="Org.Apache.Hadoop.HA.BadFencingConfigurationException"/>
            public virtual bool TryFence(HAServiceTarget target, string args)
            {
                Log.Info("tryFence(" + target + ")");
                DummyHAService svc = (DummyHAService)target;

                lock (svc)
                {
                    svc.fenceCount++;
                }
                if (svc.failToFence)
                {
                    Log.Info("Injected failure to fence");
                    return(false);
                }
                svc.sharedResource.Release(svc);
                return(true);
            }
示例#16
0
 /// <summary>
 /// Ensure that we are allowed to manually manage the HA state of the target
 /// service.
 /// </summary>
 /// <remarks>
 /// Ensure that we are allowed to manually manage the HA state of the target
 /// service. If automatic failover is configured, then the automatic
 /// failover controllers should be doing state management, and it is generally
 /// an error to use the HAAdmin command line to do so.
 /// </remarks>
 /// <param name="target">the target to check</param>
 /// <returns>true if manual state management is allowed</returns>
 private bool CheckManualStateManagementOK(HAServiceTarget target)
 {
     if (target.IsAutoFailoverEnabled())
     {
         if (requestSource != HAServiceProtocol.RequestSource.RequestByUserForced)
         {
             errOut.WriteLine("Automatic failover is enabled for " + target + "\n" + "Refusing to manually manage HA state, since it may cause\n"
                              + "a split-brain scenario or other incorrect state.\n" + "If you are very sure you know what you are doing, please \n"
                              + "specify the --" + Forcemanual + " flag.");
             return(false);
         }
         else
         {
             Log.Warn("Proceeding with manual HA state management even though\n" + "automatic failover is enabled for "
                      + target);
             return(true);
         }
     }
     return(true);
 }
示例#17
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Org.Apache.Hadoop.HA.ServiceFailedException"/>
        private int TransitionToStandby(CommandLine cmd)
        {
            string[] argv = cmd.GetArgs();
            if (argv.Length != 1)
            {
                errOut.WriteLine("transitionToStandby: incorrect number of arguments");
                PrintUsage(errOut, "-transitionToStandby");
                return(-1);
            }
            HAServiceTarget target = ResolveTarget(argv[0]);

            if (!CheckManualStateManagementOK(target))
            {
                return(-1);
            }
            HAServiceProtocol proto = target.GetProxy(GetConf(), 0);

            HAServiceProtocolHelper.TransitionToStandby(proto, CreateReqInfo());
            return(0);
        }
示例#18
0
        /// <exception cref="Org.Apache.Hadoop.HA.BadFencingConfigurationException"/>
        public virtual bool TryFence(HAServiceTarget target, string argsStr)
        {
            SshFenceByTcpPort.Args args        = new SshFenceByTcpPort.Args(argsStr);
            IPEndPoint             serviceAddr = target.GetAddress();
            string  host = serviceAddr.GetHostName();
            Session session;

            try
            {
                session = CreateSession(serviceAddr.GetHostName(), args);
            }
            catch (JSchException e)
            {
                Log.Warn("Unable to create SSH session", e);
                return(false);
            }
            Log.Info("Connecting to " + host + "...");
            try
            {
                session.Connect(GetSshConnectTimeout());
            }
            catch (JSchException e)
            {
                Log.Warn("Unable to connect to " + host + " as user " + args.user, e);
                return(false);
            }
            Log.Info("Connected to " + host);
            try
            {
                return(DoFence(session, serviceAddr));
            }
            catch (JSchException e)
            {
                Log.Warn("Unable to achieve fencing on remote host", e);
                return(false);
            }
            finally
            {
                session.Disconnect();
            }
        }
示例#19
0
        /// <summary>Checks whether other target node is active or not</summary>
        /// <param name="targetNodeToActivate"/>
        /// <returns>
        /// true if other target node is active or some other exception
        /// occurred and forceActive was set otherwise false
        /// </returns>
        /// <exception cref="System.IO.IOException"/>
        private bool IsOtherTargetNodeActive(string targetNodeToActivate, bool forceActive
                                             )
        {
            ICollection <string> targetIds = GetTargetIds(targetNodeToActivate);

            targetIds.Remove(targetNodeToActivate);
            foreach (string targetId in targetIds)
            {
                HAServiceTarget target = ResolveTarget(targetId);
                if (!CheckManualStateManagementOK(target))
                {
                    return(true);
                }
                try
                {
                    HAServiceProtocol proto = target.GetProxy(GetConf(), 5000);
                    if (proto.GetServiceStatus().GetState() == HAServiceProtocol.HAServiceState.Active)
                    {
                        errOut.WriteLine("transitionToActive: Node " + targetId + " is already active");
                        PrintUsage(errOut, "-transitionToActive");
                        return(true);
                    }
                }
                catch (Exception e)
                {
                    //If forceActive switch is false then return true
                    if (!forceActive)
                    {
                        errOut.WriteLine("Unexpected error occurred  " + e.Message);
                        PrintUsage(errOut, "-transitionToActive");
                        return(true);
                    }
                }
            }
            return(false);
        }
示例#20
0
 /// <returns>
 /// an
 /// <see cref="HAServiceTarget"/>
 /// for the current active node
 /// in the cluster, or null if no node is active.
 /// </returns>
 /// <exception cref="System.IO.IOException">if a ZK-related issue occurs</exception>
 /// <exception cref="System.Exception">if thread is interrupted</exception>
 private HAServiceTarget GetCurrentActive()
 {
     lock (elector)
     {
         lock (this)
         {
             byte[] activeData;
             try
             {
                 activeData = elector.GetActiveData();
             }
             catch (ActiveStandbyElector.ActiveNotFoundException)
             {
                 return(null);
             }
             catch (KeeperException ke)
             {
                 throw new IOException("Unexpected ZooKeeper issue fetching active node info", ke);
             }
             HAServiceTarget oldActive = DataToTarget(activeData);
             return(oldActive);
         }
     }
 }
示例#21
0
 protected internal override byte[] TargetToData(HAServiceTarget target)
 {
     return(Ints.ToByteArray(((DummyHAService)target).index));
 }
示例#22
0
 public _HealthMonitor_60(TestHealthMonitor _enclosing, Configuration baseArg1, HAServiceTarget
                          baseArg2)
     : base(baseArg1, baseArg2)
 {
     this._enclosing = _enclosing;
 }
示例#23
0
 protected internal abstract byte[] TargetToData(HAServiceTarget target);
示例#24
0
        /// <summary>Coordinate a graceful failover.</summary>
        /// <remarks>
        /// Coordinate a graceful failover. This proceeds in several phases:
        /// 1) Pre-flight checks: ensure that the local node is healthy, and
        /// thus a candidate for failover.
        /// 2) Determine the current active node. If it is the local node, no
        /// need to failover - return success.
        /// 3) Ask that node to yield from the election for a number of seconds.
        /// 4) Allow the normal election path to run in other threads. Wait until
        /// we either become unhealthy or we see an election attempt recorded by
        /// the normal code path.
        /// 5) Allow the old active to rejoin the election, so a future
        /// failback is possible.
        /// </remarks>
        /// <exception cref="Org.Apache.Hadoop.HA.ServiceFailedException"/>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        private void DoGracefulFailover()
        {
            int timeout = FailoverController.GetGracefulFenceTimeout(conf) * 2;

            // Phase 1: pre-flight checks
            CheckEligibleForFailover();
            // Phase 2: determine old/current active node. Check that we're not
            // ourselves active, etc.
            HAServiceTarget oldActive = GetCurrentActive();

            if (oldActive == null)
            {
                // No node is currently active. So, if we aren't already
                // active ourselves by means of a normal election, then there's
                // probably something preventing us from becoming active.
                throw new ServiceFailedException("No other node is currently active.");
            }
            if (oldActive.GetAddress().Equals(localTarget.GetAddress()))
            {
                Log.Info("Local node " + localTarget + " is already active. " + "No need to failover. Returning success."
                         );
                return;
            }
            // Phase 3: ask the old active to yield from the election.
            Log.Info("Asking " + oldActive + " to cede its active state for " + timeout + "ms"
                     );
            ZKFCProtocol oldZkfc = oldActive.GetZKFCProxy(conf, timeout);

            oldZkfc.CedeActive(timeout);
            // Phase 4: wait for the normal election to make the local node
            // active.
            ZKFailoverController.ActiveAttemptRecord attempt = WaitForActiveAttempt(timeout +
                                                                                    60000);
            if (attempt == null)
            {
                // We didn't even make an attempt to become active.
                lock (this)
                {
                    if (lastHealthState != HealthMonitor.State.ServiceHealthy)
                    {
                        throw new ServiceFailedException("Unable to become active. " + "Service became unhealthy while trying to failover."
                                                         );
                    }
                }
                throw new ServiceFailedException("Unable to become active. " + "Local node did not get an opportunity to do so from ZooKeeper, "
                                                 + "or the local node took too long to transition to active.");
            }
            // Phase 5. At this point, we made some attempt to become active. So we
            // can tell the old active to rejoin if it wants. This allows a quick
            // fail-back if we immediately crash.
            oldZkfc.CedeActive(-1);
            if (attempt.succeeded)
            {
                Log.Info("Successfully became active. " + attempt.status);
            }
            else
            {
                // Propagate failure
                string msg = "Failed to become active. " + attempt.status;
                throw new ServiceFailedException(msg);
            }
        }
示例#25
0
 protected internal ZKFailoverController(Configuration conf, HAServiceTarget localTarget
                                         )
 {
     this.localTarget = localTarget;
     this.conf        = conf;
 }
示例#26
0
        /// <summary>Failover from service 1 to service 2.</summary>
        /// <remarks>
        /// Failover from service 1 to service 2. If the failover fails
        /// then try to failback.
        /// </remarks>
        /// <param name="fromSvc">currently active service</param>
        /// <param name="toSvc">service to make active</param>
        /// <param name="forceFence">to fence fromSvc even if not strictly necessary</param>
        /// <param name="forceActive">try to make toSvc active even if it is not ready</param>
        /// <exception cref="FailoverFailedException">if the failover fails</exception>
        /// <exception cref="Org.Apache.Hadoop.HA.FailoverFailedException"/>
        public virtual void Failover(HAServiceTarget fromSvc, HAServiceTarget toSvc, bool
                                     forceFence, bool forceActive)
        {
            Preconditions.CheckArgument(fromSvc.GetFencer() != null, "failover requires a fencer"
                                        );
            PreFailoverChecks(fromSvc, toSvc, forceActive);
            // Try to make fromSvc standby
            bool tryFence = true;

            if (TryGracefulFence(fromSvc))
            {
                tryFence = forceFence;
            }
            // Fence fromSvc if it's required or forced by the user
            if (tryFence)
            {
                if (!fromSvc.GetFencer().Fence(fromSvc))
                {
                    throw new FailoverFailedException("Unable to fence " + fromSvc + ". Fencing failed."
                                                      );
                }
            }
            // Try to make toSvc active
            bool      failed = false;
            Exception cause  = null;

            try
            {
                HAServiceProtocolHelper.TransitionToActive(toSvc.GetProxy(conf, rpcTimeoutToNewActive
                                                                          ), CreateReqInfo());
            }
            catch (ServiceFailedException sfe)
            {
                Log.Error("Unable to make " + toSvc + " active (" + sfe.Message + "). Failing back."
                          );
                failed = true;
                cause  = sfe;
            }
            catch (IOException ioe)
            {
                Log.Error("Unable to make " + toSvc + " active (unable to connect). Failing back."
                          , ioe);
                failed = true;
                cause  = ioe;
            }
            // We failed to make toSvc active
            if (failed)
            {
                string msg = "Unable to failover to " + toSvc;
                // Only try to failback if we didn't fence fromSvc
                if (!tryFence)
                {
                    try
                    {
                        // Unconditionally fence toSvc in case it is still trying to
                        // become active, eg we timed out waiting for its response.
                        // Unconditionally force fromSvc to become active since it
                        // was previously active when we initiated failover.
                        Failover(toSvc, fromSvc, true, true);
                    }
                    catch (FailoverFailedException ffe)
                    {
                        msg += ". Failback to " + fromSvc + " failed (" + ffe.Message + ")";
                        Log.Fatal(msg);
                    }
                }
                throw new FailoverFailedException(msg, cause);
            }
        }