private static void ThrowIfRestartPartitionModeInvalid(RestartPartitionMode restartPartitionMode)
 {
     if (restartPartitionMode == RestartPartitionMode.Invalid)
     {
         throw FaultAnalysisServiceUtility.CreateException(TraceType, Interop.NativeTypes.FABRIC_ERROR_CODE.E_INVALIDARG, Strings.StringResources.Error_UnsupportedRestartPartitionMode);
     }
 }
        public async Task <string> CallSystemService(
            string action,
            string inputBlob,
            TimeSpan timeout,
            CancellationToken cancellationToken)
        {
            this.ThrowIfNotReady();

            string outputBlob = null;

            try
            {
                TestabilityTrace.TraceSource.WriteInfo(TraceType, "Enter CallSystemService for {0}.", action);

                if (this.actionMapping.ContainsKey(action))
                {
                    outputBlob = await this.actionMapping[action](inputBlob, timeout, cancellationToken).ConfigureAwait(false);
                }
                else
                {
                    throw FaultAnalysisServiceUtility.CreateException(TraceType, Interop.NativeTypes.FABRIC_ERROR_CODE.E_NOTIMPL, "{0} is not implemented", action);
                }
            }
            catch (Exception e)
            {
                TestabilityTrace.TraceSource.WriteWarning(TraceType, "CallSystemService error: {0}", e);
                FaultAnalysisServiceUtility.ThrowTransientExceptionIfRetryable(e);

                throw;
            }

            TestabilityTrace.TraceSource.WriteInfo(TraceType, "Exit CallSystemService for {0} with output {1}.", action, outputBlob);
            return(outputBlob);
        }
        private void ThrowIfNotReady(Func <bool> readinessCriteria = null)
        {
            if (this.MessageProcessor == null || (readinessCriteria != null && !readinessCriteria()))
            {
                throw FaultAnalysisServiceUtility.CreateException(TraceType, Interop.NativeTypes.FABRIC_ERROR_CODE.E_ABORT, "Service is not accepting client calls yet");
            }

            if (this.MessageProcessor.Partition.WriteStatus != PartitionAccessStatus.Granted)
            {
                // not primary, reconfiguration pending, and no write quorum all get translated to E_ABORT by the gateway, so there's no point in throwing a specific exception for each here.
                throw FaultAnalysisServiceUtility.CreateException(TraceType, Interop.NativeTypes.FABRIC_ERROR_CODE.E_ABORT, string.Empty);
            }
        }
Example #4
0
        public void Add(string nodeName)
        {
            TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "Enter Add '{0}'", nodeName);
            lock (this.NodeCommandLock)
            {
                if (this.nodeCommandInProgress.Contains(nodeName))
                {
                    throw FaultAnalysisServiceUtility.CreateException(StepBase.TraceType, Interop.NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_NODE_TRANSITION_IN_PROGRESS, Strings.StringResources.Error_NodeTransitionInProgress);
                }

                this.nodeCommandInProgress.Add(nodeName);
                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "Done - Add '{0}'", nodeName);
            }
        }
Example #5
0
        public async Task <ActionStateBase> FindByOperationIdAsync(Guid operationId, CancellationToken cancellationToken)
        {
            ActionStateBase state = null;

            try
            {
                using (var tx = this.stateManager.CreateTransaction())
                {
                    TestabilityTrace.TraceSource.WriteInfo(TraceType, "Enter transaction inside {0} - operationId={1}", "FindByOperationId", operationId);
                    var data = await this.actionTable.TryGetValueAsync(tx, operationId, DefaultDictionaryTimeout, this.cancellationToken).ConfigureAwait(false);

                    if (data.HasValue)
                    {
                        TestabilityTrace.TraceSource.WriteInfo(TraceType, "FindOperationId: {0}, actiontable has value", operationId);
                        state = this.ReadData(data.Value);
                    }
                    else
                    {
                        data = await this.historyTable.TryGetValueAsync(tx, operationId, DefaultDictionaryTimeout, this.cancellationToken).ConfigureAwait(false);

                        if (data.HasValue)
                        {
                            TestabilityTrace.TraceSource.WriteInfo(TraceType, "FindOperationId: {0}, history table has value", operationId);
                            state = this.ReadData(data.Value);
                        }
                        else
                        {
                            string error = string.Format(CultureInfo.InvariantCulture, "OperationId '{0}' was not found", operationId);
                            TestabilityTrace.TraceSource.WriteWarning(TraceType, error);
                            throw FaultAnalysisServiceUtility.CreateException(TraceType, Interop.NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_KEY_NOT_FOUND, error);
                        }
                    }
                }

                TestabilityTrace.TraceSource.WriteInfo(TraceType, "Exit transaction inside {0} - operationId={1}", "FindByOperationId", operationId);
            }
            catch (FabricNotPrimaryException fnp)
            {
                FaultAnalysisServiceUtility.TraceFabricNotPrimary(operationId, fnp);
                throw;
            }
            catch (Exception e)
            {
                TestabilityTrace.TraceSource.WriteWarning(TraceType, "FindOperationId: {0}", e.ToString());
                throw;
            }

            return(state);
        }
Example #6
0
        // Called to initialize state when a new message is received.
        private async Task InitializeNewActionInnerAsync(ActionStateBase actionState)
        {
            TestabilityTrace.TraceSource.WriteInfo(TraceType, "Enter InitializeNewActionAsync");

            try
            {
                using (var tx = this.stateManager.CreateTransaction())
                {
                    TestabilityTrace.TraceSource.WriteInfo(TraceType, "Enter transaction inside InitializeNewActionAsync - operationId={0}", actionState.OperationId);
                    bool keyAlreadyExists = await this.actionTable.ContainsKeyAsync(tx, actionState.OperationId, DefaultDictionaryTimeout, this.cancellationToken).ConfigureAwait(false);

                    if (!keyAlreadyExists)
                    {
                        keyAlreadyExists = await this.historyTable.ContainsKeyAsync(tx, actionState.OperationId, DefaultDictionaryTimeout, this.cancellationToken).ConfigureAwait(false);
                    }

                    if (keyAlreadyExists)
                    {
                        string error = string.Format(CultureInfo.InvariantCulture, "OperationId '{0}' already exists", actionState.OperationId);
                        TestabilityTrace.TraceSource.WriteWarning(TraceType, error);
                        throw FaultAnalysisServiceUtility.CreateException(TraceType, Interop.NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_TEST_COMMAND_OPERATION_ID_ALREADY_EXISTS, error);
                    }
                }

                TestabilityTrace.TraceSource.WriteInfo(TraceType, "Exit transaction inside InitializeNewActionAsync - operationId={0}", actionState.OperationId);
            }
            catch (FabricNotPrimaryException fnp)
            {
                FaultAnalysisServiceUtility.TraceFabricNotPrimary(actionState.OperationId, fnp);
                throw;
            }
            catch (Exception e)
            {
                TestabilityTrace.TraceSource.WriteWarning(TraceType, "Could not add new action: {0}", e.ToString());
                throw;
            }

            await this.PersistAsync(actionState, true).ConfigureAwait(false);
        }
            public override async Task <ActionStateBase> RunAsync(CancellationToken cancellationToken, ServiceInternalFaultInfo serviceInternalFaultInfo)
            {
                InvokeQuorumLossState state = Convert(this.State);

                // get info about the service so we can check type and trss
                ServiceDescription result = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.ServiceManager.GetServiceDescriptionAsync(
                        this.partitionSelector.ServiceName,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                if (result.Kind != ServiceDescriptionKind.Stateful)
                {
                    // The message in the first arg is only for debugging, it is not returned to the user.
                    throw new FabricInvalidForStatelessServicesException("FabricInvalidForStatelessServicesException", FabricErrorCode.InvalidForStatelessServices);
                }

                StatefulServiceDescription statefulServiceDescription = result as StatefulServiceDescription;

                ReleaseAssert.AssertIf(statefulServiceDescription == null, string.Format(CultureInfo.InvariantCulture, "{0} - Service is not a stateful service", this.State.OperationId));

                if (!statefulServiceDescription.HasPersistedState)
                {
                    // The message in the first arg is only for debugging, it is not returned to the user.
                    throw new FabricOnlyValidForStatefulPersistentServicesException("This is only valid for stateful persistent services", FabricErrorCode.OnlyValidForStatefulPersistentServices);
                }

                SelectedPartition targetPartition = await FaultAnalysisServiceUtility.GetSelectedPartitionStateAsync(
                    this.FabricClient,
                    this.partitionSelector,
                    this.RequestTimeout,
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                Guid partitionId = targetPartition.PartitionId;

                // get data about replicas in that partition
                ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.QueryManager.GetReplicaListAsync(
                        partitionId,
                        0,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                List <StatefulServiceReplica> tempReplicas = new List <StatefulServiceReplica>();

                foreach (var replica in replicasResult)
                {
                    StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica;
                    ReleaseAssert.AssertIf(statefulReplica == null, "Expected stateful replica");
                    tempReplicas.Add(statefulReplica);
                }

                List <StatefulServiceReplica> targetReplicas = null;

                if (this.quorumLossMode == QuorumLossMode.AllReplicas)
                {
                    targetReplicas = tempReplicas.Where(r => r.ReplicaRole == ReplicaRole.Primary || r.ReplicaRole == ReplicaRole.ActiveSecondary).ToList();
                }
                else if (this.quorumLossMode == QuorumLossMode.QuorumReplicas)
                {
                    targetReplicas = FaultAnalysisServiceUtility.GetReplicasForPartialLoss(state.OperationId, tempReplicas);
                }
                else
                {
                    throw FaultAnalysisServiceUtility.CreateException(StepBase.TraceType, Interop.NativeTypes.FABRIC_ERROR_CODE.E_INVALIDARG, Strings.StringResources.Error_UnsupportedQuorumLossMode);
                }

                if (targetReplicas == null)
                {
                    // This will cause the command to rollback and retry
                    throw new FabricTransientException("The operation could not be performed, please retry", FabricErrorCode.NotReady);
                }

                List <string> targetNodes = new List <string>();

                foreach (var replica in targetReplicas)
                {
                    targetNodes.Add(replica.NodeName);
                }

                List <Tuple <string, string> > unreliableTransportInfoList = new List <Tuple <string, string> >();

                foreach (string nodeName in targetNodes)
                {
                    UnreliableTransportBehavior behavior = new UnreliableTransportBehavior("*", "StatefulServiceReopen");
                    behavior.AddFilterForPartitionId(partitionId);

                    // ApplyingUnreliableTransport.BehaviorNamePrefix + nodeName;
                    string behaviorName = this.CreateBehaviorName(nodeName);

                    unreliableTransportInfoList.Add(new Tuple <string, string>(nodeName, behaviorName));
                }

                state.StateProgress.Push(StepStateNames.PerformingActions);

                state.Info.PartitionId             = partitionId;
                state.Info.ReplicaIds              = targetReplicas.Select(r => r.Id).ToList();
                state.Info.UnreliableTransportInfo = unreliableTransportInfoList;

                return(state);
            }
Example #8
0
        public async Task CancelTestCommandAsync(Guid operationId, bool force)
        {
            ActionStateBase state = null;

            try
            {
                using (var tx = this.stateManager.CreateTransaction())
                {
                    TestabilityTrace.TraceSource.WriteInfo(TraceType, "Enter transaction inside {0} - operationId={1}", "CancelTestCommandAsync", operationId);
                    var data = await this.actionTable.TryGetValueAsync(tx, operationId, DefaultDictionaryTimeout, this.cancellationToken).ConfigureAwait(false);

                    if (data.HasValue)
                    {
                        state = this.ReadData(data.Value);
                    }
                    else
                    {
                        string error = string.Format(CultureInfo.InvariantCulture, "OperationId '{0}' was not found", operationId);
                        TestabilityTrace.TraceSource.WriteWarning(TraceType, error);
                        throw FaultAnalysisServiceUtility.CreateException(TraceType, Interop.NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_KEY_NOT_FOUND, error);
                    }

                    TestabilityTrace.TraceSource.WriteInfo(TraceType, "CancelActionAsync: operationId={0}, force={1}", operationId, force);

                    if (force)
                    {
                        if (state.RollbackState != RollbackState.RollingBackDueToUserCancel)
                        {
                            string error = string.Format(CultureInfo.InvariantCulture, "CancelActionAsync: operationId={0}, force is not allowed for something in {1}", operationId, state.RollbackState.ToString());
                            TestabilityTrace.TraceSource.WriteError(TraceType, error);

                            throw FaultAnalysisServiceUtility.CreateException(TraceType, Interop.NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_INVALID_TEST_COMMAND_STATE, error);
                        }
                        else
                        {
                            state.RollbackState = RollbackState.RollingBackForce;
                        }
                    }
                    else
                    {
                        // if it's in force, don't allow it back down to graceful
                        if (state.RollbackState == RollbackState.RollingBackForce)
                        {
                            string error = string.Format(
                                CultureInfo.InvariantCulture,
                                "CancelActionAsync: operationId={0}, CancelTestCommandAsync has already been called with force==true.  Switching to force==false is not valid.",
                                operationId);
                            TestabilityTrace.TraceSource.WriteWarning(TraceType, error);

                            throw FaultAnalysisServiceUtility.CreateException(TraceType, Interop.NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_INVALID_TEST_COMMAND_STATE, error);
                        }

                        state.RollbackState = RollbackState.RollingBackDueToUserCancel;
                    }

                    byte[] stateToPersist = state.ToBytes();

                    await this.actionTable.AddOrUpdateAsync(tx, state.OperationId, stateToPersist, (k, v) => stateToPersist, DefaultDictionaryTimeout, this.cancellationToken).ConfigureAwait(false);

                    await tx.CommitAsync().ConfigureAwait(false);

                    TestabilityTrace.TraceSource.WriteInfo(TraceType, "CancelActionAsync: operationId={0}, force={1} committed", operationId, force);
                }

                TestabilityTrace.TraceSource.WriteInfo(TraceType, "Enter transaction inside {0} - operationId={1}", "CancelTestCommandAsync", operationId);
            }
            catch (FabricNotPrimaryException fnp)
            {
                FaultAnalysisServiceUtility.TraceFabricNotPrimary(operationId, fnp);
                throw;
            }
            catch (Exception e)
            {
                TestabilityTrace.TraceSource.WriteError(TraceType, "CancelActionAsync: {0}", e.ToString());
                throw;
            }
        }
Example #9
0
            public override async Task <ActionStateBase> RunAsync(CancellationToken cancellationToken, ServiceInternalFaultInfo serviceInternalFaultInfo)
            {
                NodeCommandState state = Convert(this.State);

                Node queriedNode = await FaultAnalysisServiceUtility.GetNodeInfoAsync(
                    this.State.OperationId,
                    this.FabricClient,
                    state.Info.NodeName,
                    this.action.Partition,
                    this.action.StateManager,
                    this.action.StoppedNodeTable,
                    this.RequestTimeout,
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - StartNode LookingUpState reading RD", this.State.OperationId);
                bool isStopped = await FaultAnalysisServiceUtility.ReadStoppedNodeStateAsync(
                    this.State.OperationId,
                    this.action.Partition,
                    this.action.StateManager,
                    this.action.StoppedNodeTable,
                    state.Info.NodeName,
                    cancellationToken).ConfigureAwait(false);

                if (FaultAnalysisServiceUtility.IsNodeRunning(queriedNode))
                {
                    if (!isStopped)
                    {
                        // For illustration, if you just called StartNodeUsingNodeNameAsync() in this situation w/o checking first, you'd either get instance mismatch or node has not stopped yet
                        // Note: this is different than the logic in the PerformingActions step (the former does not check instance id, the latter does), which is after the call to StartNodeUsingNodeNameAsync(), because
                        // this is a precondition check.
                        Exception nodeAlreadyUp = FaultAnalysisServiceUtility.CreateException(
                            TraceType,
                            NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_NODE_IS_UP,
                            string.Format(CultureInfo.InvariantCulture, "Node {0} already started", state.Info.NodeName),
                            FabricErrorCode.NodeIsUp);

                        throw new FatalException("fatal", nodeAlreadyUp);
                    }
                    else
                    {
                        // The only way this can happen is OOB start.  FAS should fix it's incorrect state then fail the command with
                        // node already up.
                        TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - StartNode LookingUpState setting RD entry for node {1} to not stopped", this.State.OperationId, state.Info.NodeName);
                        await FaultAnalysisServiceUtility.SetStoppedNodeStateAsync(
                            this.action.State.OperationId,
                            this.action.Partition,
                            this.action.StateManager,
                            this.action.StoppedNodeTable,
                            queriedNode.NodeName,
                            false,
                            cancellationToken).ConfigureAwait(false);

                        Exception nodeIsUp = FaultAnalysisServiceUtility.CreateException(
                            TraceType,
                            Interop.NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_NODE_IS_UP,
                            string.Format(CultureInfo.InvariantCulture, "Node {0} is up", state.Info.NodeName));
                        throw new FatalException("fatal", nodeIsUp);
                    }
                }
                else if (queriedNode.NodeStatus == NodeStatus.Down && !isStopped)
                {
                    // This is a special scenario that can happen if:
                    // 1)  There was an OOB stop using the old api
                    // 2)  A node went down (not stopped, down)
                    // Don't handle this, return node down.
                    Exception nodeIsDown = FaultAnalysisServiceUtility.CreateException(
                        TraceType,
                        Interop.NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_NODE_IS_DOWN,
                        string.Format(CultureInfo.InvariantCulture, "Node {0} is down", state.Info.NodeName));
                    throw new FatalException("fatal", nodeIsDown);
                }

                state.Info.InitialQueriedNodeStatus       = queriedNode.NodeStatus;
                state.Info.NodeWasInitiallyInStoppedState = isStopped;

                state.StateProgress.Push(StepStateNames.PerformingActions);

                return(state);
            }
Example #10
0
            public override async Task <ActionStateBase> RunAsync(CancellationToken cancellationToken, ServiceInternalFaultInfo serviceInternalFaultInfo)
            {
                NodeCommandState state = Convert(this.State);

                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - StopNode.LookingUpState performing node query", this.State.OperationId);

                Node queriedNode = await FaultAnalysisServiceUtility.GetNodeInfoAsync(
                    this.State.OperationId,
                    this.FabricClient,
                    state.Info.NodeName,
                    this.action.Partition,
                    this.action.StateManager,
                    this.action.StoppedNodeTable,
                    this.RequestTimeout,
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - StopNode.LookingUpState node query completed", this.State.OperationId);

                // Check for bad state
                if (queriedNode == null ||
                    queriedNode.NodeStatus == NodeStatus.Invalid ||
                    queriedNode.NodeStatus == NodeStatus.Unknown ||
                    queriedNode.NodeStatus == NodeStatus.Removed)
                {
                    // Fail the command
                    Exception nodeNotFoundException = FaultAnalysisServiceUtility.CreateException(
                        TraceType,
                        Interop.NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_NODE_NOT_FOUND,
                        string.Format(CultureInfo.InvariantCulture, "{0} - Node {1} does not exist", this.State.OperationId, state.Info.NodeName));
                    TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - throwing fatal exception {1}", this.State.OperationId, nodeNotFoundException);
                    throw new FatalException("fatal", nodeNotFoundException);
                }

                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - StopNode LookingUpState reading RD", this.State.OperationId);
                bool isStopped = await FaultAnalysisServiceUtility.ReadStoppedNodeStateAsync(
                    this.State.OperationId,
                    this.action.Partition,
                    this.action.StateManager,
                    this.action.StoppedNodeTable,
                    state.Info.NodeName,
                    cancellationToken).ConfigureAwait(false);

                if (queriedNode.NodeStatus == NodeStatus.Down && isStopped)
                {
                    // Node already stopped
                    Exception nodeAlreadyStopped = FaultAnalysisServiceUtility.CreateException(
                        TraceType,
                        Interop.NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_ALREADY_STOPPED,
                        string.Format(CultureInfo.InvariantCulture, "Node {0} is already stopped", state.Info.NodeName));
                    throw new FatalException("fatal", nodeAlreadyStopped);
                }
                else if (queriedNode.NodeStatus != NodeStatus.Down && isStopped)
                {
                    // FM says the node is up, so FAS has incorrect state, perhaps because of an out of band start from the original deprecated api.
                    // Correct the state, then continue to run this command normally.  It is valid.
                    await FaultAnalysisServiceUtility.SetStoppedNodeStateAsync(
                        this.action.State.OperationId,
                        this.action.Partition,
                        this.action.StateManager,
                        this.action.StoppedNodeTable,
                        queriedNode.NodeName,
                        false,
                        cancellationToken).ConfigureAwait(false);
                }
                else if (queriedNode.NodeStatus == NodeStatus.Down && !isStopped)
                {
                    // Node is down (as opposed to stopped)
                    Exception nodeIsDown = FaultAnalysisServiceUtility.CreateException(
                        TraceType,
                        Interop.NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_NODE_IS_DOWN,
                        string.Format(CultureInfo.InvariantCulture, "Node {0} is down", state.Info.NodeName));
                    throw new FatalException("fatal", nodeIsDown);
                }

                state.Info.InitialQueriedNodeStatus       = queriedNode.NodeStatus;
                state.Info.NodeWasInitiallyInStoppedState = isStopped;
                TestabilityTrace.TraceSource.WriteInfo(
                    StepBase.TraceType,
                    "{0} - StopNode LookingUpState InitialQueriedNodeStatus='{1}', NodeWasInitiallyInStoppedState='{2}'",
                    this.State.OperationId,
                    state.Info.InitialQueriedNodeStatus,
                    state.Info.NodeWasInitiallyInStoppedState);

                state.StateProgress.Push(StepStateNames.PerformingActions);
                return(state);
            }
Example #11
0
 internal static void ThrowAlreadyRunning(string message = default(string))
 {
     throw FaultAnalysisServiceUtility.CreateException(TraceType, NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_CHAOS_ALREADY_RUNNING, StringResources.ChaosError_ChaosAlreadyRunning);
 }
Example #12
0
 internal static void ThrowNotReady(string message)
 {
     throw FaultAnalysisServiceUtility.CreateException(TraceType, NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_NOT_READY, message);
 }
Example #13
0
            public override async Task <ActionStateBase> RunAsync(CancellationToken cancellationToken, ServiceInternalFaultInfo serviceInternalFaultInfo)
            {
                InvokeDataLossState state = Convert(this.State);

                PartitionSelector partitionSelector   = state.Info.PartitionSelector;
                DataLossMode      dataLossMode        = state.Info.DataLossMode;
                long   preDataLossNumber              = state.Info.DataLossNumber;
                string failoverManagerPrimaryNodeName = state.Info.NodeName;
                Guid   partitionId          = state.Info.PartitionId;
                string behaviorName         = state.Info.UnreliableTransportInfo.First().Item2;
                int    targetReplicaSetSize = state.Info.TargetReplicaSetSize;

                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - applying UT, partitionId={1}", this.State.OperationId, partitionId);
                System.Fabric.Common.UnreliableTransportBehavior behavior = new System.Fabric.Common.UnreliableTransportBehavior("*", "DoReconfiguration");
                behavior.AddFilterForPartitionId(partitionId);

                await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.TestManager.AddUnreliableTransportBehaviorAsync(
                        failoverManagerPrimaryNodeName,
                        behaviorName,
                        behavior,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                // TODO: Wait for some time so that the unreliable transport behavior can be read from the files.
                // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successfully applied
                await Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken).ConfigureAwait(false);

                ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.QueryManager.GetReplicaListAsync(
                        partitionId,
                        0,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                List <StatefulServiceReplica> replicaList = new List <StatefulServiceReplica>();

                foreach (var replica in replicasResult)
                {
                    StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica;
                    ReleaseAssert.AssertIf(statefulReplica == null, "Service Replica is not of stateful type even though service is stateful");
                    replicaList.Add(statefulReplica);
                }

                // Select target replicas based on the DataLosMode
                List <StatefulServiceReplica> targets = null;

                if (dataLossMode == DataLossMode.FullDataLoss)
                {
                    targets = GetReplicasForFullDataLoss(replicaList);
                }
                else if (dataLossMode == DataLossMode.PartialDataLoss)
                {
                    targets = FaultAnalysisServiceUtility.GetReplicasForPartialLoss(state.OperationId, replicaList);
                }
                else
                {
                    throw FaultAnalysisServiceUtility.CreateException(StepBase.TraceType, Interop.NativeTypes.FABRIC_ERROR_CODE.E_INVALIDARG, Strings.StringResources.Error_UnsupportedDataLossMode);
                }

                if (targets == null)
                {
                    // This will cause the command to rollback and retry
                    throw new FabricTransientException("The operation could not be performed, please retry", FabricErrorCode.NotReady);
                }

                foreach (var replica in targets)
                {
                    TestabilityTrace.TraceSource.WriteInfo(
                        StepBase.TraceType,
                        "{0} - Removing replica {1} in partition {2} with role {3} and status {4} to induce data loss",
                        this.State.OperationId,
                        replica.Id,
                        partitionId,
                        replica.ReplicaRole,
                        replica.ReplicaStatus);

                    await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => this.FabricClient.ServiceManager.RemoveReplicaAsync(
                            replica.NodeName,
                            partitionId,
                            replica.Id,
                            this.RequestTimeout,
                            cancellationToken),
                        FabricClientRetryErrors.RemoveReplicaErrors.Value,
                        this.OperationTimeout,
                        cancellationToken).ConfigureAwait(false);
                }

                ActionTest.PerformInternalServiceFaultIfRequested(this.State.OperationId, serviceInternalFaultInfo, this.State, cancellationToken, true);

                await this.WaitForAllTargetReplicasToGetDroppedAsync(partitionId, targets, cancellationToken).ConfigureAwait(false);

                await RemoveUnreliableTransportAsync(this.FabricClient, failoverManagerPrimaryNodeName, behaviorName, this.RequestTimeout, this.OperationTimeout, cancellationToken).ConfigureAwait(false);

                bool          dataLossWasSuccessful = false;
                TimeoutHelper timeoutHelper         = new TimeoutHelper(TimeSpan.FromSeconds(30));

                do
                {
                    ServicePartitionList partitionsResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => this.FabricClient.QueryManager.GetPartitionListAsync(
                            this.partitionSelector.ServiceName,
                            null,
                            this.RequestTimeout,
                            cancellationToken),
                        this.OperationTimeout,
                        cancellationToken).ConfigureAwait(false);

                    bool partitionFound     = false;
                    long postDataLossNumber = 0;
                    foreach (StatefulServicePartition partition in partitionsResult)
                    {
                        if (partition.PartitionInformation.Id == partitionId)
                        {
                            postDataLossNumber = partition.PrimaryEpoch.DataLossNumber;
                            partitionFound     = true;
                            break;
                        }
                    }

                    if (!partitionFound)
                    {
                        throw new FabricException(StringHelper.Format(StringResources.Error_PartitionNotFound), FabricErrorCode.PartitionNotFound);
                    }

                    TestabilityTrace.TraceSource.WriteInfo(
                        StepBase.TraceType,
                        "{0} - Checking data loss numbers for partition {1} with remaining time {2}. Current numbers {3}:{4}",
                        this.State.OperationId,
                        partitionId,
                        timeoutHelper.GetRemainingTime(),
                        preDataLossNumber,
                        postDataLossNumber);

                    if (postDataLossNumber != preDataLossNumber)
                    {
                        dataLossWasSuccessful = true;
                        break;
                    }

                    await System.Fabric.Common.AsyncWaiter.WaitAsync(TimeSpan.FromSeconds(this.dataLossCheckPollIntervalInSeconds), cancellationToken).ConfigureAwait(false);
                }while (timeoutHelper.GetRemainingTime() > TimeSpan.Zero);

                if (!dataLossWasSuccessful)
                {
                    // This is only viewable internally for debug.  This will cause a retry of the whole flow.
                    string error = string.Format(
                        CultureInfo.InvariantCulture,
                        "{0} - Service could not induce data loss for service '{1}' partition '{2}' in '{3}' Please retry",
                        this.State.OperationId,
                        partitionSelector.ServiceName,
                        partitionId,
                        this.dataLossCheckWaitDurationInSeconds);
                    TestabilityTrace.TraceSource.WriteWarning(StepBase.TraceType, error);
                    throw new FabricTransientException("The operation could not be performed, please retry", FabricErrorCode.NotReady);
                }

                state.StateProgress.Push(StepStateNames.CompletedSuccessfully);

                return(state);
            }
            public override async Task <ActionStateBase> RunAsync(CancellationToken cancellationToken, ServiceInternalFaultInfo serviceInternalFaultInfo)
            {
                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "Inside CollectingState, service={0}", this.partitionSelector.ServiceName);
                RestartPartitionState state = Convert(this.State);

                // Get service info and validate if the parameters are valid
                ServiceDescription result = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.ServiceManager.GetServiceDescriptionAsync(
                        this.partitionSelector.ServiceName,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                if (result.Kind != ServiceDescriptionKind.Stateful && this.restartPartitionMode == RestartPartitionMode.OnlyActiveSecondaries)
                {
                    // The message in the first arg is only for debugging, it is not returned to the user.
                    string debugText = string.Format(CultureInfo.InvariantCulture, "RestartPartition: for stateless services only RestartPartitionMode.AllReplicasOrInstances is valid");
                    TestabilityTrace.TraceSource.WriteWarning(StepBase.TraceType, debugText);
                    throw FaultAnalysisServiceUtility.CreateException(StepBase.TraceType, NativeTypes.FABRIC_ERROR_CODE.E_INVALIDARG, debugText);
                }

                bool hasPersistedState = false;

                if (result.Kind == ServiceDescriptionKind.Stateful)
                {
                    StatefulServiceDescription statefulDescription = result as StatefulServiceDescription;
                    ReleaseAssert.AssertIf(statefulDescription == null, "Stateful service description is not WinFabricStatefulServiceDescription");
                    hasPersistedState = statefulDescription.HasPersistedState;
                }

                SelectedPartition targetPartition = await FaultAnalysisServiceUtility.GetSelectedPartitionStateAsync(
                    this.FabricClient,
                    this.partitionSelector,
                    this.RequestTimeout,
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                Guid partitionId = targetPartition.PartitionId;

                // get replicas for target
                ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.QueryManager.GetReplicaListAsync(
                        partitionId,
                        0,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                // get replicas for fm in order to get the primary
                ServiceReplicaList failoverManagersReplicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.QueryManager.GetReplicaListAsync(
                        FASConstants.FmPartitionId,
                        0,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                string failoverManagerPrimaryNodeName = string.Empty;
                var    readyFMReplicas = failoverManagersReplicasResult.Where(r => r.ReplicaStatus == ServiceReplicaStatus.Ready).ToArray();

                foreach (var replica in readyFMReplicas)
                {
                    StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica;
                    ReleaseAssert.AssertIf(statefulReplica == null, "FM Replica is not a stateful replica");
                    if (statefulReplica.ReplicaRole == ReplicaRole.Primary)
                    {
                        failoverManagerPrimaryNodeName = replica.NodeName;
                    }
                }

                if (string.IsNullOrEmpty(failoverManagerPrimaryNodeName))
                {
                    throw new FabricException(StringHelper.Format(StringResources.Error_PartitionPrimaryNotReady, "FailoverManager"), FabricErrorCode.NotReady);
                }

                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - FM primary is at node={1}", this.State.OperationId, failoverManagerPrimaryNodeName);
                string behaviorName = RestartingSelectedReplicas.UTBehaviorPrefixName + "_" + this.State.OperationId;
                List <Tuple <string, string> > unreliableTransportInfo = new List <Tuple <string, string> >();

                unreliableTransportInfo.Add(new Tuple <string, string>(failoverManagerPrimaryNodeName, behaviorName));

                state.StateProgress.Push(StepStateNames.PerformingActions);
                state.Info.PartitionId             = partitionId;
                state.Info.NodeName                = failoverManagerPrimaryNodeName;
                state.Info.HasPersistedState       = hasPersistedState;
                state.Info.UnreliableTransportInfo = unreliableTransportInfo;

                return(state);
            }
 private void ThrowEAbort()
 {
     throw FaultAnalysisServiceUtility.CreateException(TraceType, Interop.NativeTypes.FABRIC_ERROR_CODE.E_ABORT, "Operation cancelled");
 }
        public async Task StartNodeTransitionAsync(
            NodeTransitionDescription description,
            TimeSpan timeout,
            CancellationToken cancellationToken)
        {
            this.ThrowIfNotReady();

            TestabilityTrace.TraceSource.WriteInfo(
                TraceType,
                "FaultAnalysisServiceImpl StartNodeTransitionAsync operationId={0} NodeTransitionType={1} - {2}:{3}",
                description.OperationId,
                description.NodeTransitionType,
                description.NodeName,
                description.NodeInstanceId);
            try
            {
                if (description.NodeTransitionType == NodeTransitionType.Start)
                {
                    NodeStartDescription translatedDescription = (NodeStartDescription)description;
                    await this.MessageProcessor.ProcessStartNodeCommandAsync(
                        translatedDescription.OperationId,
                        translatedDescription.NodeName,
                        translatedDescription.NodeInstanceId,
                        timeout,
                        null).ConfigureAwait(false);
                }
                else if (description.NodeTransitionType == NodeTransitionType.Stop)
                {
                    NodeStopDescription translatedDescription = (NodeStopDescription)description;
                    if (translatedDescription.StopDurationInSeconds < this.minStopDurationInSeconds ||
                        translatedDescription.StopDurationInSeconds > this.maxStopDurationInSeconds)
                    {
                        TestabilityTrace.TraceSource.WriteWarning(
                            TraceType,
                            "Duration in seconds '{0}' is out of range.  Min='{1}' Max='{2}'",
                            translatedDescription.StopDurationInSeconds,
                            this.minStopDurationInSeconds,
                            this.maxStopDurationInSeconds);
                        throw FaultAnalysisServiceUtility.CreateException(
                                  TraceType,
                                  Interop.NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_INVALID_DURATION,
                                  "StopDurationInSeconds is out of range");
                    }

                    await this.MessageProcessor.ProcessStopNodeCommandAsync(
                        translatedDescription.OperationId,
                        translatedDescription.NodeName,
                        translatedDescription.NodeInstanceId,
                        translatedDescription.StopDurationInSeconds,
                        timeout,
                        null).ConfigureAwait(false);
                }
                else
                {
                    throw new NotImplementedException();
                }
            }
            catch (Exception e)
            {
                TestabilityTrace.TraceSource.WriteWarning(TraceType, "{0} - Exception occurred: {1}", description.OperationId, e.Message);
                FaultAnalysisServiceUtility.ThrowTransientExceptionIfRetryable(e);

                throw;
            }

            TestabilityTrace.TraceSource.WriteInfo(TraceType, "{0} - Intent saved for {1}", description.OperationId, ActionType.StartNode);
        }