private SuccessRetryOrFail HandleFabricException(FabricException fe, NodeCommandState state) { SuccessRetryOrFail status = SuccessRetryOrFail.Invalid; TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - StartNodeUsingNodeNameAsync threw FabricException with ErrorCode={1}", this.State.OperationId, fe.ErrorCode); if (fe.ErrorCode == FabricErrorCode.InstanceIdMismatch) { status = SuccessRetryOrFail.Fail; } else if (fe.ErrorCode == FabricErrorCode.NodeIsUp) { status = SuccessRetryOrFail.Success; } else if (fe.ErrorCode == FabricErrorCode.NodeHasNotStoppedYet) { status = SuccessRetryOrFail.RetryStep; } else if (fe.ErrorCode == FabricErrorCode.InvalidAddress) { if (state.Info.InitialQueriedNodeStatus != NodeStatus.Down && state.Info.NodeWasInitiallyInStoppedState == false) { // This is a (probably unlikely) case that may happen if // 1. The request reaches the target node, which is up and is processed. // 2. The response is dropped // 3. The request is retried with enough delay such that the node has now transitioned from up to stopped // So, we say that if the initial conditions were valid and we get InvalidAddress, then consider it successful. status = SuccessRetryOrFail.Success; } else { // The preconditions passed, but the node is down now, so something out of band happened. status = SuccessRetryOrFail.RetryStep; } } else if (fe.ErrorCode == FabricErrorCode.NodeNotFound) { // Always fatal string nodeNotFoundErrorMessage = string.Format(CultureInfo.InvariantCulture, "{0} - node {1} was not found", state.OperationId, state.Info.NodeName); TestabilityTrace.TraceSource.WriteError(StepBase.TraceType, nodeNotFoundErrorMessage); status = SuccessRetryOrFail.Fail; } else { status = SuccessRetryOrFail.RetryStep; } return(status); }
private SuccessRetryOrFail HandleFabricException(FabricException fe, NodeCommandState state) { SuccessRetryOrFail status = SuccessRetryOrFail.Invalid; if (fe.ErrorCode == FabricErrorCode.InstanceIdMismatch) { TestabilityTrace.TraceSource.WriteWarning(StepBase.TraceType, "{0} - StopNode api threw InstanceIdMismatch", this.State.OperationId); status = SuccessRetryOrFail.Fail; } else if (fe.ErrorCode == FabricErrorCode.InvalidAddress) { TestabilityTrace.TraceSource.WriteInfo( StepBase.TraceType, "{0} - StopNode HandleFabricException InitialQueriedNodeStatus='{1}', NodeWasInitiallyInStoppedState='{2}'", this.State.OperationId, state.Info.InitialQueriedNodeStatus, state.Info.NodeWasInitiallyInStoppedState); // If the request was valid (node was up), but the response was dropped, and then the request was retried, we might now be // sending a stop to a node that is now stopped. The request will return InvalidAddress, but this is really success. if (state.Info.InitialQueriedNodeStatus != NodeStatus.Down && state.Info.NodeWasInitiallyInStoppedState == false) { TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - StopNode api threw InvalidAddress, considering stop command successful due to preconditions", this.State.OperationId); status = SuccessRetryOrFail.Success; } else { // Invalid address is not expected here. There may have been an out of band stop using the deprecated api. We should retry until we get something else. TestabilityTrace.TraceSource.WriteWarning(StepBase.TraceType, "{0} - StopNode api threw InvalidAddress", this.State.OperationId); status = SuccessRetryOrFail.RetryStep; } } else if (fe.ErrorCode == FabricErrorCode.NodeNotFound) { status = SuccessRetryOrFail.Fail; } else { TestabilityTrace.TraceSource.WriteWarning(StepBase.TraceType, "{0} - StopNode api threw {1}", this.State.OperationId, fe); status = SuccessRetryOrFail.RetryStep; } return(status); }
public override async Task <ActionStateBase> RunAsync(CancellationToken cancellationToken, ServiceInternalFaultInfo serviceInternalFaultInfo) { NodeCommandState state = Convert(this.State); // The return value is ignored, this is just being used to check if the RemoveNodeState was called. Node queriedNode = await FaultAnalysisServiceUtility.GetNodeInfoAsync( this.State.OperationId, this.FabricClient, state.Info.NodeName, this.action.Partition, this.action.StateManager, this.action.StoppedNodeTable, this.RequestTimeout, this.OperationTimeout, cancellationToken).ConfigureAwait(false); TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - calling StartNodeUsingNodeNameAsync, ApiInputNodeInstanceId={1}", this.State.OperationId, state.Info.InputNodeInstanceId); Exception exception = null; try { await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync( () => this.FabricClient.FaultManager.StartNodeUsingNodeNameAsync( state.Info.NodeName, state.Info.InputNodeInstanceId, null, 0, this.RequestTimeout, cancellationToken), FabricClientRetryErrors.StartNodeErrors.Value, this.OperationTimeout, cancellationToken).ConfigureAwait(false); } catch (Exception e) { TestabilityTrace.TraceSource.WriteWarning(StepBase.TraceType, "{0} - StartNodeUsingNodeNameAsync threw {1}", this.State.OperationId, e); exception = e; } cancellationToken.ThrowIfCancellationRequested(); SuccessRetryOrFail status = SuccessRetryOrFail.Invalid; if (exception != null) { FabricException fe = exception as FabricException; if (fe != null) { status = this.HandleFabricException(fe, state); } else { TestabilityTrace.TraceSource.WriteWarning(StepBase.TraceType, "{0} - StartNodeUsingNodeNameAsync threw non-FabricException with ErrorCode={1}", this.State.OperationId, exception); status = SuccessRetryOrFail.RetryStep; } } else { // success status = SuccessRetryOrFail.Success; await FaultAnalysisServiceUtility.SetStoppedNodeStateAsync( this.action.State.OperationId, this.action.Partition, this.action.StateManager, this.action.StoppedNodeTable, state.Info.NodeName, false, cancellationToken).ConfigureAwait(false); } ActionTest.PerformInternalServiceFaultIfRequested(this.State.OperationId, serviceInternalFaultInfo, this.State, cancellationToken, true); if (status == SuccessRetryOrFail.RetryStep) { throw new RetrySameStepException("retrystep", exception); } else if (status == SuccessRetryOrFail.Fail) { throw new FatalException("fatal", exception); } else if (status == SuccessRetryOrFail.Success) { // no-op } else { ReleaseAssert.Failfast(string.Format(CultureInfo.InvariantCulture, "This condition should not have been hit. OperationId: {0}", this.State.OperationId)); } await this.ValidateAsync(this.FabricClient, state, cancellationToken).ConfigureAwait(false); state.StateProgress.Push(StepStateNames.CompletedSuccessfully); return(state); }