Beispiel #1
0
            private async Task ValidateAsync(FabricClient fc, NodeCommandState state, CancellationToken cancellationToken)
            {
                // It takes a few seconds for the node to shutdown + a few seconds for FM to find out.
                await Task.Delay(TimeSpan.FromSeconds(15), cancellationToken).ConfigureAwait(false);

                TimeoutHelper timeoutHelper = new TimeoutHelper(this.OperationTimeout);
                Node          queriedNode   = null;

                do
                {
                    TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - start node validating node '{1}' is not down", this.State.OperationId, state.Info.InputNodeInstanceId);
                    queriedNode = await FaultAnalysisServiceUtility.GetNodeInfoAsync(
                        this.State.OperationId,
                        fc,
                        state.Info.NodeName,
                        this.action.Partition,
                        this.action.StateManager,
                        this.action.StoppedNodeTable,
                        this.RequestTimeout,
                        this.OperationTimeout,
                        cancellationToken).ConfigureAwait(false);

                    if (FaultAnalysisServiceUtility.IsNodeRunning(queriedNode))
                    {
                        break;
                    }

                    await Task.Delay(TimeSpan.FromSeconds(5.0d), cancellationToken).ConfigureAwait(false);
                }while (timeoutHelper.GetRemainingTime() > TimeSpan.Zero);

                if (!FaultAnalysisServiceUtility.IsNodeRunning(queriedNode))
                {
                    // something is wrong, retry
                    FaultAnalysisServiceUtility.ThrowEngineRetryableException(string.Format(CultureInfo.InvariantCulture, "{0} - start node validation - node is not running yet.  Status={1}", state.OperationId, queriedNode.NodeStatus));
                }
            }
Beispiel #2
0
            private async Task ValidateAsync(FabricClient fc, NodeCommandState state, CancellationToken cancellationToken)
            {
                // It takes a few seconds for the node to shutdown + a few seconds for FM to find out.
                await Task.Delay(TimeSpan.FromSeconds(15), cancellationToken).ConfigureAwait(false);

                TimeoutHelper timeoutHelper = new TimeoutHelper(this.OperationTimeout);
                Node          queriedNode   = null;

                do
                {
                    TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - stop node validating node '{1}' is not up", this.State.OperationId, state.Info.InputNodeInstanceId);
                    queriedNode = await FaultAnalysisServiceUtility.GetNodeInfoAsync(
                        this.State.OperationId,
                        fc,
                        state.Info.NodeName,
                        this.action.Partition,
                        this.action.StateManager,
                        this.action.StoppedNodeTable,
                        this.RequestTimeout,
                        this.OperationTimeout,
                        cancellationToken).ConfigureAwait(false);

                    if (!FaultAnalysisServiceUtility.IsNodeRunning(queriedNode))
                    {
                        break;
                    }

                    await Task.Delay(TimeSpan.FromSeconds(5.0d), cancellationToken).ConfigureAwait(false);
                }while (timeoutHelper.GetRemainingTime() > TimeSpan.Zero);

                if (FaultAnalysisServiceUtility.IsNodeRunning(queriedNode))
                {
                    // Something is amiss.  The api returned success, but we're not reaching the desired state.  It might be something out of band happened.  This is best effort, so don't fail.
                    TestabilityTrace.TraceSource.WriteWarning(StepBase.TraceType, "{0} - node '{1}' did not reach desired state in {2}", this.State.OperationId, state.Info.InputNodeInstanceId, this.OperationTimeout);
                }
            }
Beispiel #3
0
            public override async Task <ActionStateBase> RunAsync(CancellationToken cancellationToken, ServiceInternalFaultInfo serviceInternalFaultInfo)
            {
                NodeCommandState state = Convert(this.State);

                Node queriedNode = await FaultAnalysisServiceUtility.GetNodeInfoAsync(
                    this.State.OperationId,
                    this.FabricClient,
                    state.Info.NodeName,
                    this.action.Partition,
                    this.action.StateManager,
                    this.action.StoppedNodeTable,
                    this.RequestTimeout,
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - StartNode LookingUpState reading RD", this.State.OperationId);
                bool isStopped = await FaultAnalysisServiceUtility.ReadStoppedNodeStateAsync(
                    this.State.OperationId,
                    this.action.Partition,
                    this.action.StateManager,
                    this.action.StoppedNodeTable,
                    state.Info.NodeName,
                    cancellationToken).ConfigureAwait(false);

                if (FaultAnalysisServiceUtility.IsNodeRunning(queriedNode))
                {
                    if (!isStopped)
                    {
                        // For illustration, if you just called StartNodeUsingNodeNameAsync() in this situation w/o checking first, you'd either get instance mismatch or node has not stopped yet
                        // Note: this is different than the logic in the PerformingActions step (the former does not check instance id, the latter does), which is after the call to StartNodeUsingNodeNameAsync(), because
                        // this is a precondition check.
                        Exception nodeAlreadyUp = FaultAnalysisServiceUtility.CreateException(
                            TraceType,
                            NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_NODE_IS_UP,
                            string.Format(CultureInfo.InvariantCulture, "Node {0} already started", state.Info.NodeName),
                            FabricErrorCode.NodeIsUp);

                        throw new FatalException("fatal", nodeAlreadyUp);
                    }
                    else
                    {
                        // The only way this can happen is OOB start.  FAS should fix it's incorrect state then fail the command with
                        // node already up.
                        TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - StartNode LookingUpState setting RD entry for node {1} to not stopped", this.State.OperationId, state.Info.NodeName);
                        await FaultAnalysisServiceUtility.SetStoppedNodeStateAsync(
                            this.action.State.OperationId,
                            this.action.Partition,
                            this.action.StateManager,
                            this.action.StoppedNodeTable,
                            queriedNode.NodeName,
                            false,
                            cancellationToken).ConfigureAwait(false);

                        Exception nodeIsUp = FaultAnalysisServiceUtility.CreateException(
                            TraceType,
                            Interop.NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_NODE_IS_UP,
                            string.Format(CultureInfo.InvariantCulture, "Node {0} is up", state.Info.NodeName));
                        throw new FatalException("fatal", nodeIsUp);
                    }
                }
                else if (queriedNode.NodeStatus == NodeStatus.Down && !isStopped)
                {
                    // This is a special scenario that can happen if:
                    // 1)  There was an OOB stop using the old api
                    // 2)  A node went down (not stopped, down)
                    // Don't handle this, return node down.
                    Exception nodeIsDown = FaultAnalysisServiceUtility.CreateException(
                        TraceType,
                        Interop.NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_NODE_IS_DOWN,
                        string.Format(CultureInfo.InvariantCulture, "Node {0} is down", state.Info.NodeName));
                    throw new FatalException("fatal", nodeIsDown);
                }

                state.Info.InitialQueriedNodeStatus       = queriedNode.NodeStatus;
                state.Info.NodeWasInitiallyInStoppedState = isStopped;

                state.StateProgress.Push(StepStateNames.PerformingActions);

                return(state);
            }