public static StepBase GetStep(
            StepStateNames stateName,
            FabricClient fabricClient,
            ActionStateBase actionState,
            InvokeQuorumLossAction action,
            TimeSpan requestTimeout,
            TimeSpan operationTimeout,
            CancellationToken cancellationToken)
        {
            StepBase step = null;
            InvokeQuorumLossState state = Convert(actionState);

            if (stateName == StepStateNames.LookingUpState)
            {
                step = new QuorumLossStepsFactory.LookingUpState(fabricClient, state, requestTimeout, operationTimeout, action.PartitionSelector, action.QuorumLossMode);
            }
            else if (stateName == StepStateNames.PerformingActions)
            {
                step = new QuorumLossStepsFactory.PerformingActions(fabricClient, state, requestTimeout, operationTimeout, action.PartitionSelector);
            }
            else if (stateName == StepStateNames.CompletedSuccessfully)
            {
                // done - but then this method should not have been called
                TestabilityTrace.TraceSource.WriteError(StepBase.TraceType, "{0} - GetStep() should not have been called when the state nme is CompletedSuccessfully", actionState.OperationId);
                ReleaseAssert.Failfast("GetStep() should not have been called when the state nme is CompletedSuccessfully");
            }
            else
            {
                string error = string.Format(CultureInfo.InvariantCulture, "{0} - Unexpected state name={1}", actionState.OperationId, stateName);
                TestabilityTrace.TraceSource.WriteError(StepBase.TraceType, "{0}", error);
                ReleaseAssert.Failfast(error);
            }

            return(step);
        }
        public static InvokeQuorumLossState Convert(ActionStateBase actionState)
        {
            InvokeQuorumLossState invokeQuorumLossState = actionState as InvokeQuorumLossState;

            if (invokeQuorumLossState == null)
            {
                throw new InvalidCastException("State object could not be converted");
            }

            return(invokeQuorumLossState);
        }
        public async Task <PartitionQuorumLossProgress> GetInvokeQuorumLossProgressAsync(
            Guid operationId,
            TimeSpan timeout,
            CancellationToken cancellationToken)
        {
            this.ThrowIfNotReady();
            PartitionQuorumLossProgress progress = null;

            try
            {
                ActionStateBase actionState = await this.MessageProcessor.ProcessGetProgressAsync(operationId, timeout, cancellationToken);

                StepStateNames stateName = actionState.StateProgress.Peek();

                TestCommandProgressState state = FaultAnalysisServiceUtility.ConvertState(actionState, TraceType);
                InvokeQuorumLossState    invokeQuorumLossState = actionState as InvokeQuorumLossState;

                var selectedPartition = new SelectedPartition
                {
                    ServiceName = invokeQuorumLossState.Info.PartitionSelector.ServiceName,
                    PartitionId = invokeQuorumLossState.Info.PartitionId
                };

                PartitionQuorumLossResult result = new PartitionQuorumLossResult(selectedPartition, actionState.ErrorCausingRollback);

                progress = new PartitionQuorumLossProgress(state, result);

                TestabilityTrace.TraceSource.WriteInfo(
                    TraceType,
                    "{0} - {1} progress - {2}, Exception - {3}",
                    operationId,
                    ActionType.InvokeQuorumLoss,
                    progress.Result != null ? progress.Result.SelectedPartition.ToString() : FASConstants.UnavailableMessage,
                    (progress.Result != null && progress.Result.Exception != null) ? progress.Result.Exception.ToString() : FASConstants.UnavailableMessage);
            }
            catch (Exception e)
            {
                TestabilityTrace.TraceSource.WriteWarning(TraceType, "{0} - Exception: {1}", operationId, e.ToString());
                FaultAnalysisServiceUtility.ThrowTransientExceptionIfRetryable(e);

                throw;
            }

            return(progress);
        }
        // Use this method signature for now until the actual client interface is decided
        public async Task ProcessQuorumLossCommandAsync(Guid operationId, PartitionSelector partitionSelector, QuorumLossMode quorumLossMode, TimeSpan quorumLossDuration, TimeSpan timeout, ServiceInternalFaultInfo serviceInternalFaultInfo)
        {
            ThrowIfQuorumLossModeInvalid(quorumLossMode);

            InvokeQuorumLossState actionState = new InvokeQuorumLossState(operationId, serviceInternalFaultInfo, partitionSelector, quorumLossMode, quorumLossDuration);

            try
            {
                // After this call finishes the intent has been persisted
                await this.actionStore.InitializeNewActionAsync(actionState, timeout);

                this.Enqueue(actionState);
            }
            catch (Exception e)
            {
                TestabilityTrace.TraceSource.WriteWarning(TraceType, "{0} - Exception {1}", operationId, e);
                throw;
            }
        }
Exemple #5
0
        private ActionStateBase ReadData(byte[] bytes)
        {
            ActionStateBase result = null;

            using (BinaryReader br = new BinaryReader(new MemoryStream(bytes)))
            {
                // The first 4 bytes are the command type
                ActionType a = ActionStateBase.ReadCommandType(br);

                if (a == ActionType.InvokeDataLoss)
                {
                    result = InvokeDataLossState.FromBytes(br);
                }
                else if (a == ActionType.InvokeQuorumLoss)
                {
                    result = InvokeQuorumLossState.FromBytes(br);
                }
                else if (a == ActionType.RestartPartition)
                {
                    result = RestartPartitionState.FromBytes(br);
                }
                else if (a == ActionType.TestStuck)
                {
                    result = StuckState.FromBytes(br);
                }
                else if (a == ActionType.TestRetryStep)
                {
                    result = TestRetryStepState.FromBytes(br);
                }
                else if (a == ActionType.StartNode)
                {
                    result = NodeCommandState.FromBytes(br, a);
                }
                else if (a == ActionType.StopNode)
                {
                    result = NodeCommandState.FromBytes(br, a);
                }
            }

            return(result);
        }
        public static async Task RemoveUTAsync(FabricClient fabricClient, ActionStateBase state, TimeSpan requestTimeout, TimeSpan operationTimeout, CancellationToken cancellationToken)
        {
            InvokeQuorumLossState invokeQuorumLossState = Convert(state);
            Guid partitionId = invokeQuorumLossState.Info.PartitionId;

            List <Task> tasks = new List <Task>();

            if (invokeQuorumLossState.Info.UnreliableTransportInfo != null)
            {
                foreach (Tuple <string, string> info in invokeQuorumLossState.Info.UnreliableTransportInfo)
                {
                    UnreliableTransportBehavior behavior = new UnreliableTransportBehavior("*", "StatefulServiceReopen");
                    behavior.AddFilterForPartitionId(partitionId);
                    string nodeName     = info.Item1;
                    string behaviorName = info.Item2;

                    TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - Cleaning up behavior={1}", state.OperationId, behaviorName);

                    Task task = FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => fabricClient.TestManager.RemoveUnreliableTransportBehaviorAsync(
                            nodeName,
                            behaviorName,
                            requestTimeout,
                            cancellationToken),
                        FabricClientRetryErrors.RemoveUnreliableTransportBehaviorErrors.Value,
                        operationTimeout,
                        cancellationToken);
                    tasks.Add(task);
                }

                await Task.WhenAll(tasks).ConfigureAwait(false);

                // TODO: Wait for some time so that the unreliable transport behavior can be read from the files.
                // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successfully applied
                await Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken).ConfigureAwait(false);
            }
        }
            public override async Task <ActionStateBase> RunAsync(CancellationToken cancellationToken, ServiceInternalFaultInfo serviceInternalFaultInfo)
            {
                InvokeQuorumLossState state = Convert(this.State);

                Guid partitionId = state.Info.PartitionId;
                List <Tuple <string, string> > unreliableTransportInfo = state.Info.UnreliableTransportInfo;
                List <long> targetReplicas = state.Info.ReplicaIds;

                var unreliableTransportTaskList = new List <Task>();
                List <Tuple <string, string> > unreliableTransportInfoList = new List <Tuple <string, string> >();

                foreach (Tuple <string, string> ut in unreliableTransportInfo)
                {
                    string nodeName     = ut.Item1;
                    string behaviorName = ut.Item2;

                    System.Fabric.Common.UnreliableTransportBehavior behavior = new System.Fabric.Common.UnreliableTransportBehavior("*", "StatefulServiceReopen");
                    behavior.AddFilterForPartitionId(partitionId);

                    TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - applying '{1}'", this.State.OperationId, behaviorName);

                    unreliableTransportTaskList.Add(FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                                                        () => this.FabricClient.TestManager.AddUnreliableTransportBehaviorAsync(
                                                            nodeName,
                                                            behaviorName,
                                                            behavior,
                                                            this.RequestTimeout,
                                                            cancellationToken),
                                                        this.OperationTimeout,
                                                        cancellationToken));
                }

                await Task.WhenAll(unreliableTransportTaskList).ConfigureAwait(false);

                // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successfully applied
                await Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken).ConfigureAwait(false);

                List <Task> tasks = new List <Task>();

                foreach (long replicaId in targetReplicas)
                {
                    ReplicaSelector replicaSelector = ReplicaSelector.ReplicaIdOf(PartitionSelector.PartitionIdOf(this.partitionSelector.ServiceName, partitionId), replicaId);

                    TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - faulting replica with id={1}", this.State.OperationId, replicaId);
                    Task task = FaultAnalysisServiceUtility.RestartReplicaAsync(this.FabricClient, replicaSelector, CompletionMode.DoNotVerify, this.RequestTimeout, this.OperationTimeout, cancellationToken);
                    tasks.Add(task);
                }

                await Task.WhenAll(tasks).ConfigureAwait(false);

                ActionTest.PerformInternalServiceFaultIfRequested(this.State.OperationId, serviceInternalFaultInfo, this.State, cancellationToken, true);

                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - keeping partition in quorum loss for '{1}'", this.State.OperationId, state.Info.QuorumLossDuration);
                await Task.Delay(state.Info.QuorumLossDuration, cancellationToken).ConfigureAwait(false);

                TimeoutHelper timeoutHelper = new TimeoutHelper(this.OperationTimeout);

                bool conditionSatisfied = false;

                int quorumLossCheckRetries = FASConstants.QuorumLossCheckRetryCount;

                do
                {
                    TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - checking PartitionStatus", this.State.OperationId);
                    ServicePartitionList partitionsResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => this.FabricClient.QueryManager.GetPartitionListAsync(
                            this.partitionSelector.ServiceName,
                            null,
                            this.RequestTimeout,
                            cancellationToken),
                        this.OperationTimeout,
                        cancellationToken).ConfigureAwait(false);

                    foreach (StatefulServicePartition partition in partitionsResult)
                    {
                        if (partition.PartitionInformation.Id == partitionId)
                        {
                            if (partition.PartitionStatus == ServicePartitionStatus.InQuorumLoss)
                            {
                                conditionSatisfied = true;
                                break;
                            }
                        }
                    }

                    await AsyncWaiter.WaitAsync(TimeSpan.FromSeconds(5), cancellationToken).ConfigureAwait(false);
                }while (!conditionSatisfied && quorumLossCheckRetries-- > 0);

                if (!conditionSatisfied)
                {
                    string error = string.Format(CultureInfo.InvariantCulture, "{0} - Service could not induce quorum loss for service '{1}', partition '{2}'. Please retry", this.State.OperationId, this.partitionSelector.ServiceName, partitionId);
                    TestabilityTrace.TraceSource.WriteWarning(StepBase.TraceType, error);

                    throw new FabricTransientException("The operation could not be performed, please retry", FabricErrorCode.NotReady);
                }

                await QuorumLossStepsFactory.RemoveUTAsync(this.FabricClient, this.State, this.RequestTimeout, this.OperationTimeout, cancellationToken);

                state.StateProgress.Push(StepStateNames.CompletedSuccessfully);

                return(state);
            }
 public PerformingActions(FabricClient fabricClient, InvokeQuorumLossState state, TimeSpan requestTimeout, TimeSpan operationTimeout, PartitionSelector partitionSelector)
     : base(fabricClient, state, requestTimeout, operationTimeout)
 {
     this.partitionSelector = partitionSelector;
 }
            public override async Task <ActionStateBase> RunAsync(CancellationToken cancellationToken, ServiceInternalFaultInfo serviceInternalFaultInfo)
            {
                InvokeQuorumLossState state = Convert(this.State);

                // get info about the service so we can check type and trss
                ServiceDescription result = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.ServiceManager.GetServiceDescriptionAsync(
                        this.partitionSelector.ServiceName,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                if (result.Kind != ServiceDescriptionKind.Stateful)
                {
                    // The message in the first arg is only for debugging, it is not returned to the user.
                    throw new FabricInvalidForStatelessServicesException("FabricInvalidForStatelessServicesException", FabricErrorCode.InvalidForStatelessServices);
                }

                StatefulServiceDescription statefulServiceDescription = result as StatefulServiceDescription;

                ReleaseAssert.AssertIf(statefulServiceDescription == null, string.Format(CultureInfo.InvariantCulture, "{0} - Service is not a stateful service", this.State.OperationId));

                if (!statefulServiceDescription.HasPersistedState)
                {
                    // The message in the first arg is only for debugging, it is not returned to the user.
                    throw new FabricOnlyValidForStatefulPersistentServicesException("This is only valid for stateful persistent services", FabricErrorCode.OnlyValidForStatefulPersistentServices);
                }

                SelectedPartition targetPartition = await FaultAnalysisServiceUtility.GetSelectedPartitionStateAsync(
                    this.FabricClient,
                    this.partitionSelector,
                    this.RequestTimeout,
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                Guid partitionId = targetPartition.PartitionId;

                // get data about replicas in that partition
                ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.QueryManager.GetReplicaListAsync(
                        partitionId,
                        0,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                List <StatefulServiceReplica> tempReplicas = new List <StatefulServiceReplica>();

                foreach (var replica in replicasResult)
                {
                    StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica;
                    ReleaseAssert.AssertIf(statefulReplica == null, "Expected stateful replica");
                    tempReplicas.Add(statefulReplica);
                }

                List <StatefulServiceReplica> targetReplicas = null;

                if (this.quorumLossMode == QuorumLossMode.AllReplicas)
                {
                    targetReplicas = tempReplicas.Where(r => r.ReplicaRole == ReplicaRole.Primary || r.ReplicaRole == ReplicaRole.ActiveSecondary).ToList();
                }
                else if (this.quorumLossMode == QuorumLossMode.QuorumReplicas)
                {
                    targetReplicas = FaultAnalysisServiceUtility.GetReplicasForPartialLoss(state.OperationId, tempReplicas);
                }
                else
                {
                    throw FaultAnalysisServiceUtility.CreateException(StepBase.TraceType, Interop.NativeTypes.FABRIC_ERROR_CODE.E_INVALIDARG, Strings.StringResources.Error_UnsupportedQuorumLossMode);
                }

                if (targetReplicas == null)
                {
                    // This will cause the command to rollback and retry
                    throw new FabricTransientException("The operation could not be performed, please retry", FabricErrorCode.NotReady);
                }

                List <string> targetNodes = new List <string>();

                foreach (var replica in targetReplicas)
                {
                    targetNodes.Add(replica.NodeName);
                }

                List <Tuple <string, string> > unreliableTransportInfoList = new List <Tuple <string, string> >();

                foreach (string nodeName in targetNodes)
                {
                    UnreliableTransportBehavior behavior = new UnreliableTransportBehavior("*", "StatefulServiceReopen");
                    behavior.AddFilterForPartitionId(partitionId);

                    // ApplyingUnreliableTransport.BehaviorNamePrefix + nodeName;
                    string behaviorName = this.CreateBehaviorName(nodeName);

                    unreliableTransportInfoList.Add(new Tuple <string, string>(nodeName, behaviorName));
                }

                state.StateProgress.Push(StepStateNames.PerformingActions);

                state.Info.PartitionId             = partitionId;
                state.Info.ReplicaIds              = targetReplicas.Select(r => r.Id).ToList();
                state.Info.UnreliableTransportInfo = unreliableTransportInfoList;

                return(state);
            }
 public LookingUpState(FabricClient fabricClient, InvokeQuorumLossState state, TimeSpan requestTimeout, TimeSpan operationTimeout, PartitionSelector partitionSelector, QuorumLossMode quorumLossMode)
     : base(fabricClient, state, requestTimeout, operationTimeout)
 {
     this.partitionSelector = partitionSelector;
     this.quorumLossMode    = quorumLossMode;
 }
        private async Task <FabricTestAction> ConstructActionAsync(ActionType actionType, ActionStateBase actionStateBase)
        {
            FabricTestAction action = null;

            if (actionType == ActionType.InvokeDataLoss)
            {
                InvokeDataLossState actionState = actionStateBase as InvokeDataLossState;

                StepStateNames currentState = actionState.StateProgress.Peek();
                if (currentState == StepStateNames.IntentSaved)
                {
                    actionState.StateProgress.Push(StepStateNames.LookingUpState);
                    await this.actionStore.UpdateActionStateAsync(actionState);

                    TestabilityTrace.TraceSource.WriteInfo(TraceType, "action state has been updated");
                }

                action = new InvokeDataLossAction(
                    this.stateManager,
                    this.Partition,
                    actionState,
                    actionState.Info.PartitionSelector,
                    actionState.Info.DataLossMode,
                    this.dataLossCheckWaitDurationInSeconds,
                    this.dataLossCheckPollIntervalInSeconds,
                    this.replicaDropWaitDurationInSeconds,
                    this.requestTimeout,
                    this.operationTimeout);
            }
            else if (actionType == ActionType.InvokeQuorumLoss)
            {
                InvokeQuorumLossState actionState = actionStateBase as InvokeQuorumLossState;

                StepStateNames currentState = actionState.StateProgress.Peek();
                if (currentState == StepStateNames.IntentSaved)
                {
                    actionState.StateProgress.Push(StepStateNames.LookingUpState);
                    await this.actionStore.UpdateActionStateAsync(actionState);

                    TestabilityTrace.TraceSource.WriteInfo(TraceType, "action state has been updated");
                }

                // This is the case for resuming an action after a failover
                action = new InvokeQuorumLossAction(this.stateManager, this.Partition, actionState, actionState.Info.PartitionSelector, actionState.Info.QuorumLossMode, actionState.Info.QuorumLossDuration, this.requestTimeout, this.operationTimeout);
            }
            else if (actionType == ActionType.RestartPartition)
            {
                RestartPartitionState actionState = actionStateBase as RestartPartitionState;

                StepStateNames currentState = actionState.StateProgress.Peek();
                if (currentState == StepStateNames.IntentSaved)
                {
                    actionState.StateProgress.Push(StepStateNames.LookingUpState);
                    await this.actionStore.UpdateActionStateAsync(actionState);

                    TestabilityTrace.TraceSource.WriteInfo(TraceType, "action state has been updated");
                }

                // This is the case for resuming an action after a failover
                action = new RestartPartitionAction(this.stateManager, this.Partition, actionState, actionState.Info.PartitionSelector, actionState.Info.RestartPartitionMode, this.requestTimeout, this.operationTimeout);
            }
            else if (actionType == ActionType.TestStuck)
            {
                StuckState actionState = actionStateBase as StuckState;

                StepStateNames currentState = actionState.StateProgress.Peek();
                if (currentState == StepStateNames.IntentSaved)
                {
                    actionState.StateProgress.Push(StepStateNames.LookingUpState);
                    await this.actionStore.UpdateActionStateAsync(actionState);

                    TestabilityTrace.TraceSource.WriteInfo(TraceType, "action state has been updated");
                }

                action = new StuckAction(this.stateManager, this.Partition, actionState, this.requestTimeout, this.operationTimeout);
            }
            else if (actionType == ActionType.TestRetryStep)
            {
                TestRetryStepState actionState = actionStateBase as TestRetryStepState;

                StepStateNames currentState = actionState.StateProgress.Peek();
                if (currentState == StepStateNames.IntentSaved)
                {
                    actionState.StateProgress.Push(StepStateNames.LookingUpState);
                    await this.actionStore.UpdateActionStateAsync(actionState);

                    TestabilityTrace.TraceSource.WriteInfo(TraceType, "action state has been updated");
                }

                action = new TestRetryStepAction(this.stateManager, this.Partition, actionState, this.requestTimeout, this.operationTimeout);
            }
            else if (actionType == ActionType.StartNode)
            {
                NodeCommandState actionState = actionStateBase as NodeCommandState;
                actionState.StoppedNodeTable = this.stoppedNodeTable;

                StepStateNames currentState = actionState.StateProgress.Peek();
                if (currentState == StepStateNames.IntentSaved)
                {
                    actionState.StateProgress.Push(StepStateNames.LookingUpState);
                    await this.actionStore.UpdateActionStateAsync(actionState);

                    TestabilityTrace.TraceSource.WriteInfo(TraceType, "action state has been updated");
                }

                action = new StartNodeFromFASAction(this.stateManager, this.Partition, actionState, this.stoppedNodeTable, this.requestTimeout, this.operationTimeout);
            }
            else if (actionType == ActionType.StopNode)
            {
                NodeCommandState actionState = actionStateBase as NodeCommandState;
                actionState.StoppedNodeTable = this.stoppedNodeTable;

                StepStateNames currentState = actionState.StateProgress.Peek();
                if (currentState == StepStateNames.IntentSaved)
                {
                    actionState.StateProgress.Push(StepStateNames.LookingUpState);
                    await this.actionStore.UpdateActionStateAsync(actionState);

                    TestabilityTrace.TraceSource.WriteInfo(TraceType, "action state has been updated");
                }

                action = new StopNodeFromFASAction(this.stateManager, this.Partition, actionState, this.stoppedNodeTable, this.requestTimeout, this.operationTimeout);
            }
            else
            {
                TestabilityTrace.TraceSource.WriteInfo(TraceType, "Unknown actionType");
            }

            return(action);
        }