Пример #1
0
            public static async Task RemoveUnreliableTransportAsync(ActionStateBase state, FabricClient fabricClient, TimeSpan requestTimeout, TimeSpan operationTimeout, CancellationToken cancellationToken)
            {
                RestartPartitionState restartPartitionState = Convert(state);

                if (restartPartitionState.Info.UnreliableTransportInfo != null)
                {
                    string behaviorName = restartPartitionState.Info.UnreliableTransportInfo.First().Item2;
                    string failoverManagerPrimaryNodeName = restartPartitionState.Info.NodeName;
                    TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - Enter Cleanup for '{1}', fmPrimaryNodeName={2}, behavior name={3}", state.OperationId, behaviorName, failoverManagerPrimaryNodeName, behaviorName);

                    await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => fabricClient.TestManager.RemoveUnreliableTransportBehaviorAsync(
                            failoverManagerPrimaryNodeName,
                            behaviorName,
                            requestTimeout,
                            cancellationToken),
                        FabricClientRetryErrors.RemoveUnreliableTransportBehaviorErrors.Value,
                        operationTimeout,
                        cancellationToken).ConfigureAwait(false);

                    // TODO: Wait for some time so that the unreliable transport behavior can be read from the files.
                    // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successfully applied
                    await Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken).ConfigureAwait(false);
                }
            }
Пример #2
0
        public static StepBase GetStep(
            StepStateNames stateName,
            FabricClient fabricClient,
            ActionStateBase actionState,
            RestartPartitionAction action,
            TimeSpan requestTimeout,
            TimeSpan operationTimeout,
            CancellationToken cancellationToken)
        {
            StepBase step = null;
            RestartPartitionState restartPartitionState = Convert(actionState);

            if (stateName == StepStateNames.LookingUpState)
            {
                step = new LookingUpStep(fabricClient, restartPartitionState, requestTimeout, operationTimeout, action.PartitionSelector, action.RestartPartitionMode);
            }
            else if (stateName == StepStateNames.PerformingActions)
            {
                step = new RestartingSelectedReplicas(fabricClient, restartPartitionState, requestTimeout, operationTimeout, action.RestartPartitionMode);
            }
            else if (stateName == StepStateNames.CompletedSuccessfully)
            {
                // done - but then this method should not have been called
                ReleaseAssert.Failfast("GetStep() should not have been called when the state nme is CompletedSuccessfully");
            }
            else
            {
                ReleaseAssert.Failfast(string.Format(CultureInfo.InvariantCulture, "Unexpected state name={0}", stateName));
            }

            return(step);
        }
Пример #3
0
        public static RestartPartitionState Convert(ActionStateBase actionState)
        {
            RestartPartitionState restartPartitionState = actionState as RestartPartitionState;

            if (restartPartitionState == null)
            {
                throw new InvalidCastException("State object could not be converted");
            }

            return(restartPartitionState);
        }
        public async Task <PartitionRestartProgress> GetRestartPartitionProgressAsync(
            Guid operationId,
            TimeSpan timeout,
            CancellationToken cancellationToken)
        {
            this.ThrowIfNotReady();
            PartitionRestartProgress progress = null;

            try
            {
                TestabilityTrace.TraceSource.WriteInfo(TraceType, "GetRestartPartitionProgressAsync calling message processor");
                ActionStateBase actionState = await this.MessageProcessor.ProcessGetProgressAsync(operationId, timeout, cancellationToken);

                StepStateNames stateName = actionState.StateProgress.Peek();

                TestCommandProgressState state = FaultAnalysisServiceUtility.ConvertState(actionState, TraceType);
                RestartPartitionState    restartPartitionState = actionState as RestartPartitionState;
                TestabilityTrace.TraceSource.WriteInfo(
                    TraceType,
                    "RestartPartition - serviceName={0}, partitionId={1}",
                    restartPartitionState.Info.PartitionSelector.ServiceName.ToString(),
                    restartPartitionState.Info.PartitionId);

                var selectedPartition = new SelectedPartition
                {
                    ServiceName = restartPartitionState.Info.PartitionSelector.ServiceName,
                    PartitionId = restartPartitionState.Info.PartitionId
                };

                PartitionRestartResult result = new PartitionRestartResult(selectedPartition, actionState.ErrorCausingRollback);

                progress = new PartitionRestartProgress(state, result);
                TestabilityTrace.TraceSource.WriteInfo(
                    TraceType,
                    "{0} - {1} progress - {2}, Exception - {3}",
                    operationId,
                    ActionType.RestartPartition,
                    progress.Result != null ? progress.Result.SelectedPartition.ToString() : FASConstants.UnavailableMessage,
                    (progress.Result != null && progress.Result.Exception != null) ? progress.Result.Exception.ToString() : FASConstants.UnavailableMessage);
            }
            catch (Exception e)
            {
                TestabilityTrace.TraceSource.WriteWarning(TraceType, "{0} - Caught {1}", operationId, e.ToString());
                FaultAnalysisServiceUtility.ThrowTransientExceptionIfRetryable(e);

                throw;
            }

            return(progress);
        }
        // Use this method signature for now until the actual client interface is decided
        public async Task ProcessRestartPartitionCommandAsync(Guid operationId, PartitionSelector partitionSelector, RestartPartitionMode restartPartitionMode, TimeSpan timeout, ServiceInternalFaultInfo serviceInternalFaultInfo)
        {
            ThrowIfRestartPartitionModeInvalid(restartPartitionMode);

            RestartPartitionState actionState = new RestartPartitionState(operationId, serviceInternalFaultInfo, partitionSelector, restartPartitionMode);

            try
            {
                // After this call finishes the intent has been persisted
                await this.actionStore.InitializeNewActionAsync(actionState, timeout);

                this.Enqueue(actionState);
            }
            catch (Exception e)
            {
                TestabilityTrace.TraceSource.WriteWarning(TraceType, "{0} - Exception {1}", operationId, e);
                throw;
            }
        }
Пример #6
0
        private ActionStateBase ReadData(byte[] bytes)
        {
            ActionStateBase result = null;

            using (BinaryReader br = new BinaryReader(new MemoryStream(bytes)))
            {
                // The first 4 bytes are the command type
                ActionType a = ActionStateBase.ReadCommandType(br);

                if (a == ActionType.InvokeDataLoss)
                {
                    result = InvokeDataLossState.FromBytes(br);
                }
                else if (a == ActionType.InvokeQuorumLoss)
                {
                    result = InvokeQuorumLossState.FromBytes(br);
                }
                else if (a == ActionType.RestartPartition)
                {
                    result = RestartPartitionState.FromBytes(br);
                }
                else if (a == ActionType.TestStuck)
                {
                    result = StuckState.FromBytes(br);
                }
                else if (a == ActionType.TestRetryStep)
                {
                    result = TestRetryStepState.FromBytes(br);
                }
                else if (a == ActionType.StartNode)
                {
                    result = NodeCommandState.FromBytes(br, a);
                }
                else if (a == ActionType.StopNode)
                {
                    result = NodeCommandState.FromBytes(br, a);
                }
            }

            return(result);
        }
Пример #7
0
            public override async Task <ActionStateBase> RunAsync(CancellationToken cancellationToken, ServiceInternalFaultInfo serviceInternalFaultInfo)
            {
                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "Inside CollectingState, service={0}", this.partitionSelector.ServiceName);
                RestartPartitionState state = Convert(this.State);

                // Get service info and validate if the parameters are valid
                ServiceDescription result = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.ServiceManager.GetServiceDescriptionAsync(
                        this.partitionSelector.ServiceName,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                if (result.Kind != ServiceDescriptionKind.Stateful && this.restartPartitionMode == RestartPartitionMode.OnlyActiveSecondaries)
                {
                    // The message in the first arg is only for debugging, it is not returned to the user.
                    string debugText = string.Format(CultureInfo.InvariantCulture, "RestartPartition: for stateless services only RestartPartitionMode.AllReplicasOrInstances is valid");
                    TestabilityTrace.TraceSource.WriteWarning(StepBase.TraceType, debugText);
                    throw FaultAnalysisServiceUtility.CreateException(StepBase.TraceType, NativeTypes.FABRIC_ERROR_CODE.E_INVALIDARG, debugText);
                }

                bool hasPersistedState = false;

                if (result.Kind == ServiceDescriptionKind.Stateful)
                {
                    StatefulServiceDescription statefulDescription = result as StatefulServiceDescription;
                    ReleaseAssert.AssertIf(statefulDescription == null, "Stateful service description is not WinFabricStatefulServiceDescription");
                    hasPersistedState = statefulDescription.HasPersistedState;
                }

                SelectedPartition targetPartition = await FaultAnalysisServiceUtility.GetSelectedPartitionStateAsync(
                    this.FabricClient,
                    this.partitionSelector,
                    this.RequestTimeout,
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                Guid partitionId = targetPartition.PartitionId;

                // get replicas for target
                ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.QueryManager.GetReplicaListAsync(
                        partitionId,
                        0,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                // get replicas for fm in order to get the primary
                ServiceReplicaList failoverManagersReplicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.QueryManager.GetReplicaListAsync(
                        FASConstants.FmPartitionId,
                        0,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                string failoverManagerPrimaryNodeName = string.Empty;
                var    readyFMReplicas = failoverManagersReplicasResult.Where(r => r.ReplicaStatus == ServiceReplicaStatus.Ready).ToArray();

                foreach (var replica in readyFMReplicas)
                {
                    StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica;
                    ReleaseAssert.AssertIf(statefulReplica == null, "FM Replica is not a stateful replica");
                    if (statefulReplica.ReplicaRole == ReplicaRole.Primary)
                    {
                        failoverManagerPrimaryNodeName = replica.NodeName;
                    }
                }

                if (string.IsNullOrEmpty(failoverManagerPrimaryNodeName))
                {
                    throw new FabricException(StringHelper.Format(StringResources.Error_PartitionPrimaryNotReady, "FailoverManager"), FabricErrorCode.NotReady);
                }

                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - FM primary is at node={1}", this.State.OperationId, failoverManagerPrimaryNodeName);
                string behaviorName = RestartingSelectedReplicas.UTBehaviorPrefixName + "_" + this.State.OperationId;
                List <Tuple <string, string> > unreliableTransportInfo = new List <Tuple <string, string> >();

                unreliableTransportInfo.Add(new Tuple <string, string>(failoverManagerPrimaryNodeName, behaviorName));

                state.StateProgress.Push(StepStateNames.PerformingActions);
                state.Info.PartitionId             = partitionId;
                state.Info.NodeName                = failoverManagerPrimaryNodeName;
                state.Info.HasPersistedState       = hasPersistedState;
                state.Info.UnreliableTransportInfo = unreliableTransportInfo;

                return(state);
            }
Пример #8
0
 public LookingUpStep(FabricClient fabricClient, RestartPartitionState state, TimeSpan requestTimeout, TimeSpan operationTimeout, PartitionSelector partitionSelector, RestartPartitionMode restartPartitionMode)
     : base(fabricClient, state, requestTimeout, operationTimeout)
 {
     this.partitionSelector    = partitionSelector;
     this.restartPartitionMode = restartPartitionMode;
 }
Пример #9
0
            public override async Task <ActionStateBase> RunAsync(CancellationToken cancellationToken, ServiceInternalFaultInfo serviceInternalFaultInfo)
            {
                RestartPartitionState state = Convert(this.State);

                Guid   partitionId       = state.Info.PartitionId;
                bool   hasPersistedState = state.Info.HasPersistedState;
                string failoverManagerPrimaryNodeName = state.Info.NodeName;
                string behaviorName = state.Info.UnreliableTransportInfo.First().Item2;

                System.Fabric.Common.UnreliableTransportBehavior behavior = new System.Fabric.Common.UnreliableTransportBehavior("*", "DoReconfiguration");
                behavior.AddFilterForPartitionId(partitionId);

                await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.TestManager.AddUnreliableTransportBehaviorAsync(
                        failoverManagerPrimaryNodeName,
                        behaviorName,
                        behavior,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successfully applied
                await Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken).ConfigureAwait(false);

                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - applied UT on partitionId {1}, node={2}", this.State.OperationId, partitionId, failoverManagerPrimaryNodeName);
                ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.QueryManager.GetReplicaListAsync(
                        partitionId,
                        0,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                var stableReplicasToRestart = replicasResult.Where(r => r.ReplicaStatus == ServiceReplicaStatus.Ready).ToArray();

                foreach (var replica in stableReplicasToRestart)
                {
                    if (this.restartPartitionMode == RestartPartitionMode.OnlyActiveSecondaries)
                    {
                        StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica;
                        ReleaseAssert.AssertIf(statefulReplica == null, "Stateful service replica is not StatefulServiceReplica");
                        if (statefulReplica.ReplicaRole == ReplicaRole.Primary)
                        {
                            continue;
                        }
                    }

                    TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - restarting replica partition={1}, node={2}, replica id={3}", this.State.OperationId, partitionId, replica.NodeName, replica.Id);
                    if (hasPersistedState)
                    {
                        await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                            () => this.FabricClient.ServiceManager.RestartReplicaAsync(
                                replica.NodeName,
                                partitionId,
                                replica.Id,
                                this.RequestTimeout,
                                cancellationToken),
                            FabricClientRetryErrors.RestartReplicaErrors.Value,
                            this.OperationTimeout,
                            cancellationToken).ConfigureAwait(false);
                    }
                    else
                    {
                        await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                            () => this.FabricClient.ServiceManager.RemoveReplicaAsync(
                                replica.NodeName,
                                partitionId,
                                replica.Id,
                                this.RequestTimeout,
                                cancellationToken),
                            FabricClientRetryErrors.RemoveReplicaErrors.Value,
                            this.OperationTimeout,
                            cancellationToken).ConfigureAwait(false);
                    }
                }

                ActionTest.PerformInternalServiceFaultIfRequested(this.State.OperationId, serviceInternalFaultInfo, this.State, cancellationToken, true);

                await RemoveUnreliableTransportAsync(this.State, this.FabricClient, this.RequestTimeout, this.OperationTimeout, cancellationToken);

                state.StateProgress.Push(StepStateNames.CompletedSuccessfully);

                return(state);
            }
Пример #10
0
 public RestartingSelectedReplicas(FabricClient fabricClient, RestartPartitionState state, TimeSpan requestTimeout, TimeSpan operationTimeout, RestartPartitionMode restartPartitionMode)
     : base(fabricClient, state, requestTimeout, operationTimeout)
 {
     this.restartPartitionMode = restartPartitionMode;
 }
        private async Task <FabricTestAction> ConstructActionAsync(ActionType actionType, ActionStateBase actionStateBase)
        {
            FabricTestAction action = null;

            if (actionType == ActionType.InvokeDataLoss)
            {
                InvokeDataLossState actionState = actionStateBase as InvokeDataLossState;

                StepStateNames currentState = actionState.StateProgress.Peek();
                if (currentState == StepStateNames.IntentSaved)
                {
                    actionState.StateProgress.Push(StepStateNames.LookingUpState);
                    await this.actionStore.UpdateActionStateAsync(actionState);

                    TestabilityTrace.TraceSource.WriteInfo(TraceType, "action state has been updated");
                }

                action = new InvokeDataLossAction(
                    this.stateManager,
                    this.Partition,
                    actionState,
                    actionState.Info.PartitionSelector,
                    actionState.Info.DataLossMode,
                    this.dataLossCheckWaitDurationInSeconds,
                    this.dataLossCheckPollIntervalInSeconds,
                    this.replicaDropWaitDurationInSeconds,
                    this.requestTimeout,
                    this.operationTimeout);
            }
            else if (actionType == ActionType.InvokeQuorumLoss)
            {
                InvokeQuorumLossState actionState = actionStateBase as InvokeQuorumLossState;

                StepStateNames currentState = actionState.StateProgress.Peek();
                if (currentState == StepStateNames.IntentSaved)
                {
                    actionState.StateProgress.Push(StepStateNames.LookingUpState);
                    await this.actionStore.UpdateActionStateAsync(actionState);

                    TestabilityTrace.TraceSource.WriteInfo(TraceType, "action state has been updated");
                }

                // This is the case for resuming an action after a failover
                action = new InvokeQuorumLossAction(this.stateManager, this.Partition, actionState, actionState.Info.PartitionSelector, actionState.Info.QuorumLossMode, actionState.Info.QuorumLossDuration, this.requestTimeout, this.operationTimeout);
            }
            else if (actionType == ActionType.RestartPartition)
            {
                RestartPartitionState actionState = actionStateBase as RestartPartitionState;

                StepStateNames currentState = actionState.StateProgress.Peek();
                if (currentState == StepStateNames.IntentSaved)
                {
                    actionState.StateProgress.Push(StepStateNames.LookingUpState);
                    await this.actionStore.UpdateActionStateAsync(actionState);

                    TestabilityTrace.TraceSource.WriteInfo(TraceType, "action state has been updated");
                }

                // This is the case for resuming an action after a failover
                action = new RestartPartitionAction(this.stateManager, this.Partition, actionState, actionState.Info.PartitionSelector, actionState.Info.RestartPartitionMode, this.requestTimeout, this.operationTimeout);
            }
            else if (actionType == ActionType.TestStuck)
            {
                StuckState actionState = actionStateBase as StuckState;

                StepStateNames currentState = actionState.StateProgress.Peek();
                if (currentState == StepStateNames.IntentSaved)
                {
                    actionState.StateProgress.Push(StepStateNames.LookingUpState);
                    await this.actionStore.UpdateActionStateAsync(actionState);

                    TestabilityTrace.TraceSource.WriteInfo(TraceType, "action state has been updated");
                }

                action = new StuckAction(this.stateManager, this.Partition, actionState, this.requestTimeout, this.operationTimeout);
            }
            else if (actionType == ActionType.TestRetryStep)
            {
                TestRetryStepState actionState = actionStateBase as TestRetryStepState;

                StepStateNames currentState = actionState.StateProgress.Peek();
                if (currentState == StepStateNames.IntentSaved)
                {
                    actionState.StateProgress.Push(StepStateNames.LookingUpState);
                    await this.actionStore.UpdateActionStateAsync(actionState);

                    TestabilityTrace.TraceSource.WriteInfo(TraceType, "action state has been updated");
                }

                action = new TestRetryStepAction(this.stateManager, this.Partition, actionState, this.requestTimeout, this.operationTimeout);
            }
            else if (actionType == ActionType.StartNode)
            {
                NodeCommandState actionState = actionStateBase as NodeCommandState;
                actionState.StoppedNodeTable = this.stoppedNodeTable;

                StepStateNames currentState = actionState.StateProgress.Peek();
                if (currentState == StepStateNames.IntentSaved)
                {
                    actionState.StateProgress.Push(StepStateNames.LookingUpState);
                    await this.actionStore.UpdateActionStateAsync(actionState);

                    TestabilityTrace.TraceSource.WriteInfo(TraceType, "action state has been updated");
                }

                action = new StartNodeFromFASAction(this.stateManager, this.Partition, actionState, this.stoppedNodeTable, this.requestTimeout, this.operationTimeout);
            }
            else if (actionType == ActionType.StopNode)
            {
                NodeCommandState actionState = actionStateBase as NodeCommandState;
                actionState.StoppedNodeTable = this.stoppedNodeTable;

                StepStateNames currentState = actionState.StateProgress.Peek();
                if (currentState == StepStateNames.IntentSaved)
                {
                    actionState.StateProgress.Push(StepStateNames.LookingUpState);
                    await this.actionStore.UpdateActionStateAsync(actionState);

                    TestabilityTrace.TraceSource.WriteInfo(TraceType, "action state has been updated");
                }

                action = new StopNodeFromFASAction(this.stateManager, this.Partition, actionState, this.stoppedNodeTable, this.requestTimeout, this.operationTimeout);
            }
            else
            {
                TestabilityTrace.TraceSource.WriteInfo(TraceType, "Unknown actionType");
            }

            return(action);
        }