Beispiel #1
0
            private async Task ValidateAsync(FabricClient fc, NodeCommandState state, CancellationToken cancellationToken)
            {
                // It takes a few seconds for the node to shutdown + a few seconds for FM to find out.
                await Task.Delay(TimeSpan.FromSeconds(15), cancellationToken).ConfigureAwait(false);

                TimeoutHelper timeoutHelper = new TimeoutHelper(this.OperationTimeout);
                Node          queriedNode   = null;

                do
                {
                    TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - start node validating node '{1}' is not down", this.State.OperationId, state.Info.InputNodeInstanceId);
                    queriedNode = await FaultAnalysisServiceUtility.GetNodeInfoAsync(
                        this.State.OperationId,
                        fc,
                        state.Info.NodeName,
                        this.action.Partition,
                        this.action.StateManager,
                        this.action.StoppedNodeTable,
                        this.RequestTimeout,
                        this.OperationTimeout,
                        cancellationToken).ConfigureAwait(false);

                    if (FaultAnalysisServiceUtility.IsNodeRunning(queriedNode))
                    {
                        break;
                    }

                    await Task.Delay(TimeSpan.FromSeconds(5.0d), cancellationToken).ConfigureAwait(false);
                }while (timeoutHelper.GetRemainingTime() > TimeSpan.Zero);

                if (!FaultAnalysisServiceUtility.IsNodeRunning(queriedNode))
                {
                    // something is wrong, retry
                    FaultAnalysisServiceUtility.ThrowEngineRetryableException(string.Format(CultureInfo.InvariantCulture, "{0} - start node validation - node is not running yet.  Status={1}", state.OperationId, queriedNode.NodeStatus));
                }
            }
Beispiel #2
0
            private async Task ValidateAsync(FabricClient fc, NodeCommandState state, CancellationToken cancellationToken)
            {
                // It takes a few seconds for the node to shutdown + a few seconds for FM to find out.
                await Task.Delay(TimeSpan.FromSeconds(15), cancellationToken).ConfigureAwait(false);

                TimeoutHelper timeoutHelper = new TimeoutHelper(this.OperationTimeout);
                Node          queriedNode   = null;

                do
                {
                    TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - stop node validating node '{1}' is not up", this.State.OperationId, state.Info.InputNodeInstanceId);
                    queriedNode = await FaultAnalysisServiceUtility.GetNodeInfoAsync(
                        this.State.OperationId,
                        fc,
                        state.Info.NodeName,
                        this.action.Partition,
                        this.action.StateManager,
                        this.action.StoppedNodeTable,
                        this.RequestTimeout,
                        this.OperationTimeout,
                        cancellationToken).ConfigureAwait(false);

                    if (!FaultAnalysisServiceUtility.IsNodeRunning(queriedNode))
                    {
                        break;
                    }

                    await Task.Delay(TimeSpan.FromSeconds(5.0d), cancellationToken).ConfigureAwait(false);
                }while (timeoutHelper.GetRemainingTime() > TimeSpan.Zero);

                if (FaultAnalysisServiceUtility.IsNodeRunning(queriedNode))
                {
                    // Something is amiss.  The api returned success, but we're not reaching the desired state.  It might be something out of band happened.  This is best effort, so don't fail.
                    TestabilityTrace.TraceSource.WriteWarning(StepBase.TraceType, "{0} - node '{1}' did not reach desired state in {2}", this.State.OperationId, state.Info.InputNodeInstanceId, this.OperationTimeout);
                }
            }
Beispiel #3
0
            public override async Task <ActionStateBase> RunAsync(CancellationToken cancellationToken, ServiceInternalFaultInfo serviceInternalFaultInfo)
            {
                NodeCommandState state = Convert(this.State);

                Node queriedNode = await FaultAnalysisServiceUtility.GetNodeInfoAsync(
                    this.State.OperationId,
                    this.FabricClient,
                    state.Info.NodeName,
                    this.action.Partition,
                    this.action.StateManager,
                    this.action.StoppedNodeTable,
                    this.RequestTimeout,
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - StartNode LookingUpState reading RD", this.State.OperationId);
                bool isStopped = await FaultAnalysisServiceUtility.ReadStoppedNodeStateAsync(
                    this.State.OperationId,
                    this.action.Partition,
                    this.action.StateManager,
                    this.action.StoppedNodeTable,
                    state.Info.NodeName,
                    cancellationToken).ConfigureAwait(false);

                if (FaultAnalysisServiceUtility.IsNodeRunning(queriedNode))
                {
                    if (!isStopped)
                    {
                        // For illustration, if you just called StartNodeUsingNodeNameAsync() in this situation w/o checking first, you'd either get instance mismatch or node has not stopped yet
                        // Note: this is different than the logic in the PerformingActions step (the former does not check instance id, the latter does), which is after the call to StartNodeUsingNodeNameAsync(), because
                        // this is a precondition check.
                        Exception nodeAlreadyUp = FaultAnalysisServiceUtility.CreateException(
                            TraceType,
                            NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_NODE_IS_UP,
                            string.Format(CultureInfo.InvariantCulture, "Node {0} already started", state.Info.NodeName),
                            FabricErrorCode.NodeIsUp);

                        throw new FatalException("fatal", nodeAlreadyUp);
                    }
                    else
                    {
                        // The only way this can happen is OOB start.  FAS should fix it's incorrect state then fail the command with
                        // node already up.
                        TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - StartNode LookingUpState setting RD entry for node {1} to not stopped", this.State.OperationId, state.Info.NodeName);
                        await FaultAnalysisServiceUtility.SetStoppedNodeStateAsync(
                            this.action.State.OperationId,
                            this.action.Partition,
                            this.action.StateManager,
                            this.action.StoppedNodeTable,
                            queriedNode.NodeName,
                            false,
                            cancellationToken).ConfigureAwait(false);

                        Exception nodeIsUp = FaultAnalysisServiceUtility.CreateException(
                            TraceType,
                            Interop.NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_NODE_IS_UP,
                            string.Format(CultureInfo.InvariantCulture, "Node {0} is up", state.Info.NodeName));
                        throw new FatalException("fatal", nodeIsUp);
                    }
                }
                else if (queriedNode.NodeStatus == NodeStatus.Down && !isStopped)
                {
                    // This is a special scenario that can happen if:
                    // 1)  There was an OOB stop using the old api
                    // 2)  A node went down (not stopped, down)
                    // Don't handle this, return node down.
                    Exception nodeIsDown = FaultAnalysisServiceUtility.CreateException(
                        TraceType,
                        Interop.NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_NODE_IS_DOWN,
                        string.Format(CultureInfo.InvariantCulture, "Node {0} is down", state.Info.NodeName));
                    throw new FatalException("fatal", nodeIsDown);
                }

                state.Info.InitialQueriedNodeStatus       = queriedNode.NodeStatus;
                state.Info.NodeWasInitiallyInStoppedState = isStopped;

                state.StateProgress.Push(StepStateNames.PerformingActions);

                return(state);
            }
Beispiel #4
0
            public override async Task <ActionStateBase> RunAsync(CancellationToken cancellationToken, ServiceInternalFaultInfo serviceInternalFaultInfo)
            {
                NodeCommandState state = Convert(this.State);

                // The return value is ignored, this is just being used to check if the RemoveNodeState was called.
                Node queriedNode = await FaultAnalysisServiceUtility.GetNodeInfoAsync(
                    this.State.OperationId,
                    this.FabricClient,
                    state.Info.NodeName,
                    this.action.Partition,
                    this.action.StateManager,
                    this.action.StoppedNodeTable,
                    this.RequestTimeout,
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - calling StartNodeUsingNodeNameAsync, ApiInputNodeInstanceId={1}", this.State.OperationId, state.Info.InputNodeInstanceId);

                Exception exception = null;

                try
                {
                    await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => this.FabricClient.FaultManager.StartNodeUsingNodeNameAsync(
                            state.Info.NodeName,
                            state.Info.InputNodeInstanceId,
                            null,
                            0,
                            this.RequestTimeout,
                            cancellationToken),
                        FabricClientRetryErrors.StartNodeErrors.Value,
                        this.OperationTimeout,
                        cancellationToken).ConfigureAwait(false);
                }
                catch (Exception e)
                {
                    TestabilityTrace.TraceSource.WriteWarning(StepBase.TraceType, "{0} - StartNodeUsingNodeNameAsync threw {1}", this.State.OperationId, e);
                    exception = e;
                }

                cancellationToken.ThrowIfCancellationRequested();

                SuccessRetryOrFail status = SuccessRetryOrFail.Invalid;

                if (exception != null)
                {
                    FabricException fe = exception as FabricException;
                    if (fe != null)
                    {
                        status = this.HandleFabricException(fe, state);
                    }
                    else
                    {
                        TestabilityTrace.TraceSource.WriteWarning(StepBase.TraceType, "{0} - StartNodeUsingNodeNameAsync threw non-FabricException with ErrorCode={1}", this.State.OperationId, exception);
                        status = SuccessRetryOrFail.RetryStep;
                    }
                }
                else
                {
                    // success
                    status = SuccessRetryOrFail.Success;

                    await FaultAnalysisServiceUtility.SetStoppedNodeStateAsync(
                        this.action.State.OperationId,
                        this.action.Partition,
                        this.action.StateManager,
                        this.action.StoppedNodeTable,
                        state.Info.NodeName,
                        false,
                        cancellationToken).ConfigureAwait(false);
                }

                ActionTest.PerformInternalServiceFaultIfRequested(this.State.OperationId, serviceInternalFaultInfo, this.State, cancellationToken, true);

                if (status == SuccessRetryOrFail.RetryStep)
                {
                    throw new RetrySameStepException("retrystep", exception);
                }
                else if (status == SuccessRetryOrFail.Fail)
                {
                    throw new FatalException("fatal", exception);
                }
                else if (status == SuccessRetryOrFail.Success)
                {
                    // no-op
                }
                else
                {
                    ReleaseAssert.Failfast(string.Format(CultureInfo.InvariantCulture, "This condition should not have been hit.  OperationId: {0}", this.State.OperationId));
                }

                await this.ValidateAsync(this.FabricClient, state, cancellationToken).ConfigureAwait(false);

                state.StateProgress.Push(StepStateNames.CompletedSuccessfully);
                return(state);
            }
Beispiel #5
0
            public override async Task <ActionStateBase> RunAsync(CancellationToken cancellationToken, ServiceInternalFaultInfo serviceInternalFaultInfo)
            {
                NodeCommandState state = Convert(this.State);

                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - StopNode.LookingUpState performing node query", this.State.OperationId);

                Node queriedNode = await FaultAnalysisServiceUtility.GetNodeInfoAsync(
                    this.State.OperationId,
                    this.FabricClient,
                    state.Info.NodeName,
                    this.action.Partition,
                    this.action.StateManager,
                    this.action.StoppedNodeTable,
                    this.RequestTimeout,
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - StopNode.LookingUpState node query completed", this.State.OperationId);

                // Check for bad state
                if (queriedNode == null ||
                    queriedNode.NodeStatus == NodeStatus.Invalid ||
                    queriedNode.NodeStatus == NodeStatus.Unknown ||
                    queriedNode.NodeStatus == NodeStatus.Removed)
                {
                    // Fail the command
                    Exception nodeNotFoundException = FaultAnalysisServiceUtility.CreateException(
                        TraceType,
                        Interop.NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_NODE_NOT_FOUND,
                        string.Format(CultureInfo.InvariantCulture, "{0} - Node {1} does not exist", this.State.OperationId, state.Info.NodeName));
                    TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - throwing fatal exception {1}", this.State.OperationId, nodeNotFoundException);
                    throw new FatalException("fatal", nodeNotFoundException);
                }

                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - StopNode LookingUpState reading RD", this.State.OperationId);
                bool isStopped = await FaultAnalysisServiceUtility.ReadStoppedNodeStateAsync(
                    this.State.OperationId,
                    this.action.Partition,
                    this.action.StateManager,
                    this.action.StoppedNodeTable,
                    state.Info.NodeName,
                    cancellationToken).ConfigureAwait(false);

                if (queriedNode.NodeStatus == NodeStatus.Down && isStopped)
                {
                    // Node already stopped
                    Exception nodeAlreadyStopped = FaultAnalysisServiceUtility.CreateException(
                        TraceType,
                        Interop.NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_ALREADY_STOPPED,
                        string.Format(CultureInfo.InvariantCulture, "Node {0} is already stopped", state.Info.NodeName));
                    throw new FatalException("fatal", nodeAlreadyStopped);
                }
                else if (queriedNode.NodeStatus != NodeStatus.Down && isStopped)
                {
                    // FM says the node is up, so FAS has incorrect state, perhaps because of an out of band start from the original deprecated api.
                    // Correct the state, then continue to run this command normally.  It is valid.
                    await FaultAnalysisServiceUtility.SetStoppedNodeStateAsync(
                        this.action.State.OperationId,
                        this.action.Partition,
                        this.action.StateManager,
                        this.action.StoppedNodeTable,
                        queriedNode.NodeName,
                        false,
                        cancellationToken).ConfigureAwait(false);
                }
                else if (queriedNode.NodeStatus == NodeStatus.Down && !isStopped)
                {
                    // Node is down (as opposed to stopped)
                    Exception nodeIsDown = FaultAnalysisServiceUtility.CreateException(
                        TraceType,
                        Interop.NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_NODE_IS_DOWN,
                        string.Format(CultureInfo.InvariantCulture, "Node {0} is down", state.Info.NodeName));
                    throw new FatalException("fatal", nodeIsDown);
                }

                state.Info.InitialQueriedNodeStatus       = queriedNode.NodeStatus;
                state.Info.NodeWasInitiallyInStoppedState = isStopped;
                TestabilityTrace.TraceSource.WriteInfo(
                    StepBase.TraceType,
                    "{0} - StopNode LookingUpState InitialQueriedNodeStatus='{1}', NodeWasInitiallyInStoppedState='{2}'",
                    this.State.OperationId,
                    state.Info.InitialQueriedNodeStatus,
                    state.Info.NodeWasInitiallyInStoppedState);

                state.StateProgress.Push(StepStateNames.PerformingActions);
                return(state);
            }