public static async Task <Tuple <SelectedReplica, Replica> > GetSelectedReplicaAsync(
            FabricClient fabricClient,
            ReplicaSelector replicaSelector,
            TimeSpan requestTimeout,
            TimeSpan operationTimeout,
            CancellationToken cancellationToken)
        {
            ThrowIf.Null(replicaSelector, "ReplicaSelector");

            SelectedPartition selectedPartition = await FaultAnalysisServiceUtility.GetSelectedPartitionStateAsync(
                fabricClient,
                replicaSelector.PartitionSelector,
                requestTimeout,
                operationTimeout,
                cancellationToken).ConfigureAwait(false);

            Guid partitionId = selectedPartition.PartitionId;

            ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                () => fabricClient.QueryManager.GetReplicaListAsync(
                    partitionId,
                    0,
                    requestTimeout,
                    cancellationToken),
                operationTimeout,
                cancellationToken).ConfigureAwait(false);

            Replica replicaResult         = replicaSelector.GetSelectedReplica(replicasResult.ToArray(), new Random(), true /*skip invalid replicas*/);
            var     replicaSelectorResult = new SelectedReplica(replicaResult.Id, selectedPartition);

            return(new Tuple <SelectedReplica, Replica>(replicaSelectorResult, replicaResult));
        }
Esempio n. 2
0
        /// <summary>
        /// Restarts a random node.
        /// </summary>
        /// <param name="serviceName">Uri of the service in the format fabric:/[application]/[service name]</param>
        /// <returns>Task instance.</returns>
        public async Task RestartNodeAsync(Uri serviceName)
        {
            PartitionSelector randomPartitionSelector  = PartitionSelector.RandomOf(serviceName);
            ReplicaSelector   primaryofReplicaSelector = ReplicaSelector.PrimaryOf(randomPartitionSelector);

            // Create FabricClient with connection and security information here
            await _client.FaultManager.RestartNodeAsync(primaryofReplicaSelector, CompletionMode.Verify).ConfigureAwait(false);
        }
 public RestartDeployedCodePackageAction(
     Uri applicationName,
     ReplicaSelector replicaSelector)
 {
     this.ApplicationName = applicationName;
     this.ReplicaSelector = replicaSelector;
     this.CompletionMode  = CompletionMode.Verify;
 }
        public static async Task <RestartReplicaResult> RestartReplicaAsync(
            FabricClient fabricClient,
            ReplicaSelector replicaSelector,
            CompletionMode completionMode,
            TimeSpan requestTimeout,
            TimeSpan operationTimeout,
            CancellationToken cancellationToken)
        {
            System.Fabric.Common.TimeoutHelper helper = new System.Fabric.Common.TimeoutHelper(operationTimeout);

            string          nodeName              = null;
            Guid            partitionId           = Guid.Empty;
            long            replicaId             = 0;
            SelectedReplica replicaSelectorResult = SelectedReplica.None;

            System.Fabric.Common.ThrowIf.Null(replicaSelector, "ReplicaSelector");

            Tuple <SelectedReplica, Replica> replicaStateActionResult = await FaultAnalysisServiceUtility.GetSelectedReplicaAsync(
                fabricClient,
                replicaSelector,
                requestTimeout,
                operationTimeout,
                cancellationToken).ConfigureAwait(false);

            replicaSelectorResult = replicaStateActionResult.Item1;
            if (replicaSelectorResult == null)
            {
                throw new InvalidOperationException("replicaStateActionResult cannot be null");
            }

            partitionId = replicaStateActionResult.Item1.SelectedPartition.PartitionId;

            Replica replicaStateResult = replicaStateActionResult.Item2;

            if (replicaStateResult == null)
            {
                throw new InvalidOperationException("replicaStateResult cannot be null");
            }

            nodeName  = replicaStateResult.NodeName;
            replicaId = replicaStateResult.Id;

            ThrowIf.IsTrue(partitionId == Guid.Empty, "PartitionID");
            ThrowIf.IsTrue(replicaId == 0, "ReplicaID");

            await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                () => fabricClient.ServiceManager.RestartReplicaAsync(
                    nodeName,
                    partitionId,
                    replicaId,
                    requestTimeout,
                    cancellationToken),
                FabricClientRetryErrors.RestartReplicaErrors.Value,
                operationTimeout,
                cancellationToken).ConfigureAwait(false);

            return(new RestartReplicaResult(replicaSelectorResult));
        }
Esempio n. 5
0
        public async Task <bool> RestartGetSnowConditionsService()
        {
            var replicaSelector = ReplicaSelector.PrimaryOf(PartitionSelector.RandomOf(GlobalContext.ServiceName));

            //PartitionSelector namedPartitionSelector = PartitionSelector.PartitionKeyOf(new Uri(GlobalContext.appName), "Partition1");

            //await fabricClient.ClusterManager.(new ServicePar); ; //statefull
            return(true);
        }
 internal override Task <RestartDeployedCodePackageResult> InvokeCommandAsync(
     IClusterConnection clusterConnection,
     Uri applicationName,
     ReplicaSelector replicaSelector)
 {
     return(clusterConnection.RestartDeployedCodePackageAsync(
                replicaSelector,
                applicationName,
                this.TimeoutSec,
                this.CommandCompletionMode ?? CompletionMode.Verify,
                this.GetCancellationToken()));
 }
Esempio n. 7
0
        private FabricTestAction GetRemoveReplicaAction(ReplicaStateTransitionAction ragAction)
        {
            Uri  serviceUri = ragAction.ServiceUri;
            Guid guid       = ragAction.PartitionId;
            long replicaId  = ragAction.ReplicaId;

            string report = StringHelper.Format("Generating Action: {0}\n\t\tService: {1}\n\t\tPartition: {2}\n\t\tReplicaId: {3}", ragAction.ActionType, ragAction.ServiceUri, ragAction.PartitionId, ragAction.ReplicaId);

            // Select specific replica using ReplicaSelector.
            ReplicaSelector replicaSelector = ReplicaSelector.ReplicaIdOf(PartitionSelector.PartitionIdOf(serviceUri, guid), replicaId);

            RemoveReplicaAction restartReplicaAction = new RemoveReplicaAction(replicaSelector);

            return(restartReplicaAction);
        }
Esempio n. 8
0
 /// <summary>
 /// The invoke command async.
 /// </summary>
 /// <param name="clusterConnection">
 /// The cluster connection.
 /// </param>
 /// <param name="replicaSelector">
 /// The replica selector.
 /// </param>
 /// <param name="completionMode">
 /// The completion mode.
 /// </param>
 /// <param name="cancellationToken">
 /// The cancellation token.
 /// </param>
 /// <returns>
 /// The <see cref="Task"/>.
 /// </returns>
 internal override async Task <ReplicaResult> InvokeCommandAsync(
     IClusterConnection clusterConnection,
     ReplicaSelector replicaSelector,
     CompletionMode completionMode,
     CancellationToken cancellationToken)
 {
     return
         (await
          Task.FromResult(
              clusterConnection.RestartReplicaAsync(
                  replicaSelector,
                  this.TimeoutSec,
                  completionMode,
                  cancellationToken).Result));
 }
Esempio n. 9
0
        private async Task TestReplicaFaultsAsync(
            ReplicaSelector replicaSelector,
            string replicaRole,
            bool hasPersistedState,
            CancellationToken token)
        {
            await this.InvokeAndValidateFaultAsync(
                StringHelper.Format("Removing replica state for {0}", replicaRole),
                () =>
            {
                return(this.FabricClient.FaultManager.RemoveReplicaAsync(
                           replicaSelector,
                           CompletionMode.Verify,
                           false,
                           this.failoverTestScenarioParameters.OperationTimeout,
                           token));
            }, token);

            if (hasPersistedState)
            {
                await this.InvokeAndValidateFaultAsync(
                    StringHelper.Format("Restarting replica state for {0}", replicaRole),
                    () =>
                {
                    return(this.FabricClient.FaultManager.RestartReplicaAsync(
                               replicaSelector,
                               CompletionMode.Verify,
                               this.failoverTestScenarioParameters.OperationTimeout,
                               token));
                }, token);
            }

            await this.InvokeAndValidateFaultAsync(
                StringHelper.Format("Restarting code package for {0} replica", replicaRole),
                () =>
            {
                return(this.FabricClient.FaultManager.RestartDeployedCodePackageAsync(
                           this.serviceDescription.ApplicationName,
                           replicaSelector,
                           CompletionMode.Verify,
                           this.failoverTestScenarioParameters.OperationTimeout,
                           token));
            }, token);
        }
Esempio n. 10
0
        private void KillPrimaryReplica()
        {
            // Kill the primary
            Application application =
                _fabricClient.QueryManager.GetApplicationListAsync()
                .Result.Single(a => a.ApplicationTypeName == DefaultApplicationTypeName);
            Service service =
                _fabricClient.QueryManager.GetServiceListAsync(application.ApplicationName).Result.Single();
            Partition partition =
                _fabricClient.QueryManager.GetPartitionListAsync(service.ServiceName).Result.Single();
            StatefulServiceReplica primaryReplica =
                _fabricClient.QueryManager.GetReplicaListAsync(partition.PartitionInformation.Id)
                .Result.Select(replica => replica as StatefulServiceReplica)
                .Single(statefulServiceReplica => statefulServiceReplica.ReplicaRole == ReplicaRole.Primary);

            LogHelper.Log("Killing the primary replica at node {0}", primaryReplica.NodeName);

            ReplicaSelector replicaSelector = ReplicaSelector.ReplicaIdOf(PartitionSelector.PartitionIdOf(service.ServiceName, partition.PartitionInformation.Id), primaryReplica.Id);

            _fabricClient.FaultManager.RemoveReplicaAsync(replicaSelector, CompletionMode.DoNotVerify, false);
        }
        public static ReplicaSelector GetExpectedReplicaSelector(ParitionSelectorTestHelper.PartitionCase partitionCase, ReplicaCase replicaCase)
        {
            ReplicaSelector   result            = null;
            PartitionSelector partitionSelector = ParitionSelectorTestHelper.GetExpectedPartitionSelector(partitionCase);

            switch (replicaCase)
            {
            case ReplicaCase.ReplicaPrimary:
            {
                result = ReplicaSelector.PrimaryOf(partitionSelector);
                break;
            }

            case ReplicaCase.ReplicaRandomSecondary:
            {
                result = ReplicaSelector.RandomSecondaryOf(partitionSelector);
                break;
            }

            case ReplicaCase.ReplicaId:
            {
                result = ReplicaSelector.ReplicaIdOf(partitionSelector, replicaInstance.Value);
                break;
            }

            case ReplicaCase.ReplicaId_NoValue:
            {
                result = ReplicaSelector.ReplicaIdOf(partitionSelector, 0);
                break;
            }

            case ReplicaCase.ReplicaRandom:
            {
                result = ReplicaSelector.RandomOf(partitionSelector);
                break;
            }
            }

            return(result);
        }
Esempio n. 12
0
        /// <summary>
        /// This API supports the Service Fabric platform and is not meant to be called from your code
        /// </summary>
        /// <param name="token">This API supports the Service Fabric platform and is not meant to be called from your code</param>
        /// <returns></returns>
        protected override async Task OnExecuteAsync(CancellationToken token)
        {
            this.serviceDescription = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                () => this.FabricClient.ServiceManager.GetServiceDescriptionAsync(
                    this.failoverTestScenarioParameters.PartitionSelector.ServiceName,
                    this.failoverTestScenarioParameters.RequestTimeout,
                    token),
                this.failoverTestScenarioParameters.OperationTimeout,
                token).ConfigureAwait(false);

            bool hasPersistedState = false;

            if (this.serviceDescription.IsStateful())
            {
                StatefulServiceDescription statefulDescription = this.serviceDescription as StatefulServiceDescription;
                ReleaseAssert.AssertIf(statefulDescription == null, "Stateful service description is not WinFabricStatefulServiceDescription");
                hasPersistedState = statefulDescription.HasPersistedState;
            }

            Log.WriteInfo(TraceType, "Validating Service health and availability");
            await this.FabricClient.TestManager.ValidateServiceAsync(
                this.failoverTestScenarioParameters.PartitionSelector.ServiceName,
                this.failoverTestScenarioParameters.MaxServiceStabilizationTimeout,
                token);

            Log.WriteInfo(TraceType, "Getting Selected Partition");
            var getPartitionStateAction = new GetSelectedPartitionStateAction(this.failoverTestScenarioParameters.PartitionSelector)
            {
                RequestTimeout = this.failoverTestScenarioParameters.RequestTimeout,
                ActionTimeout  = this.failoverTestScenarioParameters.OperationTimeout
            };

            await this.TestContext.ActionExecutor.RunAsync(getPartitionStateAction, token);

            Guid selectedPartitionId = getPartitionStateAction.Result.PartitionId;

            Log.WriteInfo(TraceType, "Running test for partition {0}", selectedPartitionId);

            this.ReportProgress("Selected partition {0} for testing failover", selectedPartitionId);

            PartitionSelector selectedPartition = PartitionSelector.PartitionIdOf(this.failoverTestScenarioParameters.PartitionSelector.ServiceName, selectedPartitionId);

            while (this.failoverTestScenarioParameters.TimeToRun - this.GetElapsedTime() > TimeSpan.Zero && !token.IsCancellationRequested)
            {
                if (this.serviceDescription.IsStateful())
                {
                    ReplicaSelector primaryReplicaSelector   = ReplicaSelector.PrimaryOf(selectedPartition);
                    ReplicaSelector secondaryReplicaSelector = ReplicaSelector.RandomSecondaryOf(selectedPartition);

                    // Make Primary go through RemoveReplica, RestartReplica and RestartCodePackage

                    await this.TestReplicaFaultsAsync(primaryReplicaSelector, "Primary", hasPersistedState, token);

                    // Make Secondary go through RemoveReplica, RestartReplica and RestartCodePackage

                    await this.TestReplicaFaultsAsync(secondaryReplicaSelector, "Secondary", hasPersistedState, token);
                }
                else
                {
                    ReplicaSelector randomInstanceSelector = ReplicaSelector.RandomOf(selectedPartition);

                    // Make Stateless Instance go through RemoveReplica, RestartReplica and RestartCodePackage

                    await this.TestReplicaFaultsAsync(randomInstanceSelector, "Stateless Instance", hasPersistedState, token);
                }

                if (this.serviceDescription.IsStateful())
                {
                    // Restart all secondary replicas and make sure the replica set recovers

                    await this.InvokeAndValidateFaultAsync(
                        "Restarting all the secondary replicas",
                        () =>
                    {
#pragma warning disable 618
                        return(this.FabricClient.TestManager.RestartPartitionAsync(
                                   selectedPartition,
                                   RestartPartitionMode.OnlyActiveSecondaries,
                                   this.failoverTestScenarioParameters.OperationTimeout,
                                   token));

#pragma warning restore 618
                    }, token);

                    // Restart all replicas if service is persisted

                    if (hasPersistedState)
                    {
                        await this.InvokeAndValidateFaultAsync(
                            "Restarting all replicas including Primary",
                            () =>
                        {
#pragma warning disable 618
                            return(this.FabricClient.TestManager.RestartPartitionAsync(
                                       selectedPartition,
                                       RestartPartitionMode.AllReplicasOrInstances,
                                       this.failoverTestScenarioParameters.OperationTimeout,
                                       token));

#pragma warning restore 618
                        }, token);
                    }

                    // Induce move and swap primary a few times

                    await this.InvokeAndValidateFaultAsync(
                        "Move Primary to a different node",
                        () =>
                    {
                        return(this.FabricClient.FaultManager.MovePrimaryAsync(
                                   string.Empty,
                                   selectedPartition,
                                   true,
                                   this.failoverTestScenarioParameters.OperationTimeout,
                                   token));
                    }, token);

                    // Induce move secondary a few times

                    await this.InvokeAndValidateFaultAsync(
                        "Move Secondary to a different node",
                        () =>
                    {
                        return(this.FabricClient.FaultManager.MoveSecondaryAsync(
                                   string.Empty,
                                   string.Empty,
                                   selectedPartition,
                                   true,
                                   this.failoverTestScenarioParameters.OperationTimeout,
                                   token));
                    }, token);
                }
                else
                {
                    // Restart all stateless instances

                    await this.InvokeAndValidateFaultAsync(
                        "Restarting all stateless instances for partition",
                        () =>
                    {
#pragma warning disable 618
                        return(this.FabricClient.TestManager.RestartPartitionAsync(
                                   selectedPartition,
                                   RestartPartitionMode.AllReplicasOrInstances,
                                   this.failoverTestScenarioParameters.OperationTimeout,
                                   token));

#pragma warning restore 618
                    }, token);
                }
            }
        }
        internal static ReplicaSelector GetReplicaSelector(string partitionSetName, Guid partitionId, Uri serviceName, string partitionKey, long?replicaOrInstanceId)
        {
            ReplicaSelector   replicaSelector   = null;
            PartitionSelector partitionSelector = null;

            if (partitionSetName.Contains("PartitionId"))
            {
                partitionSelector = PartitionSelector.PartitionIdOf(serviceName, partitionId);
            }
            else
            {
                if (partitionSetName.Contains("PartitionSingleton"))
                {
                    partitionSelector = PartitionSelector.SingletonOf(serviceName);
                }
                else if (partitionSetName.Contains("PartitionNamed"))
                {
                    partitionSelector = PartitionSelector.PartitionKeyOf(serviceName, partitionKey);
                }
                else if (partitionSetName.Contains("PartitionUniformedInt"))
                {
                    long partitionKeyLong;
                    if (!long.TryParse(partitionKey, out partitionKeyLong))
                    {
                        throw new ArgumentException(StringResources.Error_InvalidPartitionKey);
                    }

                    partitionSelector = PartitionSelector.PartitionKeyOf(serviceName, partitionKeyLong);
                }
                else if (!partitionSetName.Contains("Partition"))
                {
                    partitionSelector = PartitionSelector.RandomOf(serviceName);
                }
            }

            if (partitionSelector == null)
            {
                throw new ArgumentException(StringResources.Error_CouldNotParsePartitionSelector);
            }

            if (partitionSetName.Contains("ReplicaPrimary"))
            {
                replicaSelector = ReplicaSelector.PrimaryOf(partitionSelector);
            }
            else if (partitionSetName.Contains("ReplicaRandomSecondary"))
            {
                replicaSelector = ReplicaSelector.RandomSecondaryOf(partitionSelector);
            }
            else if (partitionSetName.Contains("ReplicaId"))
            {
                replicaSelector = ReplicaSelector.ReplicaIdOf(partitionSelector, replicaOrInstanceId ?? 0);
            }
            else if (!partitionSetName.Contains("Replica"))
            {
                replicaSelector = ReplicaSelector.RandomOf(partitionSelector);
            }

            if (replicaSelector == null)
            {
                throw new ArgumentException(StringResources.Error_CouldNotParseReplicaSelector);
            }

            return(replicaSelector);
        }
Esempio n. 14
0
 internal abstract Task <RestartDeployedCodePackageResult> InvokeCommandAsync(
     IClusterConnection clusterConnection,
     Uri uri,
     ReplicaSelector replicaSelector);
Esempio n. 15
0
 protected NodeControlRequest(IFabricClient fabricClient, ReplicaSelector replicaSelector, CompletionMode completionMode, TimeSpan timeout)
     : base(fabricClient, timeout)
 {
     this.ReplicaSelector = replicaSelector;
     this.CompletionMode  = completionMode;
 }
Esempio n. 16
0
 protected NodeControlRequest(IFabricClient fabricClient, ReplicaSelector replicaSelector, TimeSpan timeout)
     : this(fabricClient, replicaSelector, CompletionMode.Invalid, timeout)
 {
     this.ReplicaSelector = replicaSelector;
 }
 public static string FormatOutput(string output, ReplicaSelector replicaSelector)
 {
     return(output.Replace("###ReplicaID###", replicaInstance.ToString()).Replace("###ReplicaSelector###", replicaSelector.ToString()));
 }
Esempio n. 18
0
 public RestartNodeAction(ReplicaSelector replicaSelector, bool createFabricDump)
 {
     this.ReplicaSelector  = replicaSelector;
     this.CreateFabricDump = createFabricDump;
     this.CompletionMode   = CompletionMode.Verify;
 }
 public GetSelectedReplicaStateAction(ReplicaSelector replicaSelector)
 {
     this.ReplicaSelector = replicaSelector;
 }
Esempio n. 20
0
 public RestartNodeRequest(IFabricClient fabricClient, ReplicaSelector replicaSelector, bool createFabricDump, CompletionMode completionMode, TimeSpan timeout)
     : base(fabricClient, replicaSelector, completionMode, timeout)
 {
     this.createFabricDump = createFabricDump;
 }
 public RemoveReplicaAction(ReplicaSelector replicaSelector)
 {
     this.ReplicaSelector = replicaSelector;
     this.CompletionMode  = CompletionMode.Verify;
     this.ForceRemove     = false;
 }
Esempio n. 22
0
 /// <summary>
 /// The invoke command async.
 /// </summary>
 /// <param name="clusterConnection">
 /// The cluster connection.
 /// </param>
 /// <param name="replicaSelector">
 /// The replica selector.
 /// </param>
 /// <param name="completionMode">
 /// The completion mode.
 /// </param>
 /// <param name="cancellationToken">
 /// The cancellation token.
 /// </param>
 /// <returns>
 /// The <see cref="Task"/>.
 /// </returns>
 internal abstract Task <ReplicaResult> InvokeCommandAsync(
     IClusterConnection clusterConnection,
     ReplicaSelector replicaSelector,
     CompletionMode completionMode,
     CancellationToken cancellationToken);
            protected override async Task ExecuteActionAsync(FabricTestContext testContext, InvokeQuorumLossAction action, CancellationToken cancellationToken)
            {
                ThrowIf.Null(action.PartitionSelector, "PartitionSelector");

                var helper = new TimeoutHelper(action.ActionTimeout);

                // get info about the service so we can check type and trss
                ServiceDescription result = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.ServiceManager.GetServiceDescriptionAsync(
                        action.PartitionSelector.ServiceName,
                        action.RequestTimeout,
                        cancellationToken),
                    helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                if (result.Kind != ServiceDescriptionKind.Stateful)
                {
                    throw new InvalidOperationException(StringHelper.Format(StringResources.Error_InvalidServiceTypeTestability, "QuorumLoss", "Stateful", action.PartitionSelector.ServiceName, "Stateless"));
                }

                StatefulServiceDescription statefulServiceDescription = result as StatefulServiceDescription;

                ReleaseAssert.AssertIf(statefulServiceDescription == null, "Service is not a stateful service");

                if (!statefulServiceDescription.HasPersistedState)
                {
                    throw new InvalidOperationException(StringHelper.Format(StringResources.Error_InvalidServiceTypeTestability, "QuorumLoss", "Stateful Persistent", action.PartitionSelector.ServiceName, "Stateful In-Memory Only"));
                }

                // figure out /which/ partition to select
                var getPartitionStateAction = new GetSelectedPartitionStateAction(action.PartitionSelector)
                {
                    RequestTimeout = action.RequestTimeout,
                    ActionTimeout  = helper.GetRemainingTime()
                };

                await testContext.ActionExecutor.RunAsync(getPartitionStateAction, cancellationToken);

                Guid partitionId = getPartitionStateAction.Result.PartitionId;

                // get data about replicas in that partition
                ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.QueryManager.GetReplicaListAsync(
                        partitionId,
                        0,
                        action.RequestTimeout,
                        cancellationToken),
                    helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                var removeUTRequestList = new List <Tuple <string, string> >();
                Dictionary <Tuple <string, string>, Task> removeUTTaskDictionary = new Dictionary <Tuple <string, string>, Task>();

                try
                {
                    var  stableReplicas                  = replicasResult.Where(r => r.ReplicaStatus == ServiceReplicaStatus.Ready).ToArray();
                    var  stableReplicasToRemove          = new List <StatefulServiceReplica>();
                    long replicasToRestartWithoutPrimary =
                        action.QuorumLossMode == QuorumLossMode.AllReplicas
                            ? stableReplicas.Length - 1
                            : FabricCluster.GetWriteQuorumSize(replicasResult.Count);
                    foreach (var replica in stableReplicas)
                    {
                        StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica;
                        ReleaseAssert.AssertIf(statefulReplica == null, "Service Replica is not of stateful type even though service is stateful");
                        if (statefulReplica.ReplicaRole != ReplicaRole.Primary)
                        {
                            replicasToRestartWithoutPrimary--;
                        }

                        if (replicasToRestartWithoutPrimary >= 0 || statefulReplica.ReplicaRole == ReplicaRole.Primary)
                        {
                            stableReplicasToRemove.Add(statefulReplica);
                        }
                    }

                    // for selected replicas, block reopen so that when we restart the replica (NOT remove the replica) it doesn't come up
                    var utTaskList = new List <Task>();
                    foreach (var statefulReplica in stableReplicasToRemove)
                    {
                        string nodeName = statefulReplica.NodeName;
                        UnreliableTransportBehavior behavior = new UnreliableTransportBehavior("*", "StatefulServiceReopen");
                        behavior.AddFilterForPartitionId(partitionId);
                        string behaviorName = "BlockStatefulServiceReopen_" + nodeName;

                        removeUTRequestList.Add(new Tuple <string, string>(nodeName, behaviorName));
                        utTaskList.Add(
                            FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                                () =>
                                testContext.FabricClient.TestManager.AddUnreliableTransportBehaviorAsync(
                                    nodeName,
                                    behaviorName,
                                    behavior,
                                    action.RequestTimeout,
                                    cancellationToken),
                                helper.GetRemainingTime(),
                                cancellationToken));
                    }

                    await Task.WhenAll(utTaskList).ConfigureAwait(false);

                    // TODO: Wait for some time so that the unreliable transport behavior can be read from the files.
                    // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successfully applied
                    await Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken);

                    var restartReplicaTaskList = new List <Task>();
                    foreach (var statefulReplica in stableReplicasToRemove)
                    {
                        ReplicaSelector replicaSelector = ReplicaSelector.ReplicaIdOf(PartitionSelector.PartitionIdOf(action.PartitionSelector.ServiceName, partitionId), statefulReplica.Id);

                        var restartReplicaAction = new RestartReplicaAction(replicaSelector)
                        {
                            CompletionMode = CompletionMode.DoNotVerify,
                            RequestTimeout = action.RequestTimeout,
                            ActionTimeout  = helper.GetRemainingTime()
                        };

                        restartReplicaTaskList.Add(testContext.ActionExecutor.RunAsync(restartReplicaAction, cancellationToken));
                    }

                    await Task.WhenAll(restartReplicaTaskList).ConfigureAwait(false);

                    await AsyncWaiter.WaitAsync(action.QuorumLossDuration, cancellationToken).ConfigureAwait(false);

                    // validate
                    ServicePartitionList partitionsResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => testContext.FabricClient.QueryManager.GetPartitionListAsync(
                            action.PartitionSelector.ServiceName,
                            null,
                            action.RequestTimeout,
                            cancellationToken),
                        FabricClientRetryErrors.GetPartitionListFabricErrors.Value,
                        helper.GetRemainingTime(),
                        cancellationToken).ConfigureAwait(false);

                    foreach (StatefulServicePartition partition in partitionsResult)
                    {
                        if (partition.PartitionInformation.Id == partitionId)
                        {
                            ReleaseAssert.AssertIf(partition.PartitionStatus != ServicePartitionStatus.InQuorumLoss, "Partition failed to be in Quorum Loss.");
                            break;
                        }
                    }

                    foreach (var removeUTParams in removeUTRequestList)
                    {
                        var  currentParams = removeUTParams;
                        Task task          = FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                            () => testContext.FabricClient.TestManager.RemoveUnreliableTransportBehaviorAsync(
                                currentParams.Item1,  /*nodeName*/
                                currentParams.Item2,  /*behaviorName*/
                                action.RequestTimeout,
                                cancellationToken),
                            FabricClientRetryErrors.RemoveUnreliableTransportBehaviorErrors.Value,
                            helper.GetRemainingTime(),
                            cancellationToken);

                        removeUTTaskDictionary[currentParams] = task;
                    }

                    await Task.WhenAll(removeUTTaskDictionary.Values).ConfigureAwait(false);

                    // TODO: Wait for some time so that the removal of this unreliable transport behavior can be read from the files.
                    // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successully applied
                    await Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken);
                }
                finally
                {
                    var removeUTTaskList = new List <Task>();

                    foreach (var removeUTRequest in removeUTTaskDictionary)
                    {
                        var currentRemoveUTRequest = removeUTRequest;
                        if (currentRemoveUTRequest.Value == null || currentRemoveUTRequest.Value.IsFaulted)
                        {
                            removeUTTaskList.Add(
                                FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                                    () => testContext.FabricClient.TestManager.RemoveUnreliableTransportBehaviorAsync(
                                        currentRemoveUTRequest.Key.Item1, /*nodeName*/
                                        currentRemoveUTRequest.Key.Item2, /*behaviorName*/
                                        action.RequestTimeout,
                                        cancellationToken),
                                    FabricClientRetryErrors.RemoveUnreliableTransportBehaviorErrors.Value,
                                    helper.GetRemainingTime(),
                                    cancellationToken));
                        }
                    }

                    Task.WhenAll(removeUTTaskList).Wait(cancellationToken);

                    // TODO: Wait for some time so that the removal of this unreliable transport behavior can be read from the files.
                    // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successully applied
                    Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken).GetAwaiter().GetResult();
                }

                action.Result          = new InvokeQuorumLossResult(getPartitionStateAction.Result);
                this.ResultTraceString = StringHelper.Format("InvokeQuorumLossAction succeeded for {0} with QuorumLossMode = {1}", partitionId, action.QuorumLossMode);
            }
 public RestartReplicaAction(ReplicaSelector replicaSelector)
 {
     this.ReplicaSelector = replicaSelector;
     this.CompletionMode  = CompletionMode.Verify;
 }
            public override async Task <ActionStateBase> RunAsync(CancellationToken cancellationToken, ServiceInternalFaultInfo serviceInternalFaultInfo)
            {
                InvokeQuorumLossState state = Convert(this.State);

                Guid partitionId = state.Info.PartitionId;
                List <Tuple <string, string> > unreliableTransportInfo = state.Info.UnreliableTransportInfo;
                List <long> targetReplicas = state.Info.ReplicaIds;

                var unreliableTransportTaskList = new List <Task>();
                List <Tuple <string, string> > unreliableTransportInfoList = new List <Tuple <string, string> >();

                foreach (Tuple <string, string> ut in unreliableTransportInfo)
                {
                    string nodeName     = ut.Item1;
                    string behaviorName = ut.Item2;

                    System.Fabric.Common.UnreliableTransportBehavior behavior = new System.Fabric.Common.UnreliableTransportBehavior("*", "StatefulServiceReopen");
                    behavior.AddFilterForPartitionId(partitionId);

                    TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - applying '{1}'", this.State.OperationId, behaviorName);

                    unreliableTransportTaskList.Add(FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                                                        () => this.FabricClient.TestManager.AddUnreliableTransportBehaviorAsync(
                                                            nodeName,
                                                            behaviorName,
                                                            behavior,
                                                            this.RequestTimeout,
                                                            cancellationToken),
                                                        this.OperationTimeout,
                                                        cancellationToken));
                }

                await Task.WhenAll(unreliableTransportTaskList).ConfigureAwait(false);

                // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successfully applied
                await Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken).ConfigureAwait(false);

                List <Task> tasks = new List <Task>();

                foreach (long replicaId in targetReplicas)
                {
                    ReplicaSelector replicaSelector = ReplicaSelector.ReplicaIdOf(PartitionSelector.PartitionIdOf(this.partitionSelector.ServiceName, partitionId), replicaId);

                    TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - faulting replica with id={1}", this.State.OperationId, replicaId);
                    Task task = FaultAnalysisServiceUtility.RestartReplicaAsync(this.FabricClient, replicaSelector, CompletionMode.DoNotVerify, this.RequestTimeout, this.OperationTimeout, cancellationToken);
                    tasks.Add(task);
                }

                await Task.WhenAll(tasks).ConfigureAwait(false);

                ActionTest.PerformInternalServiceFaultIfRequested(this.State.OperationId, serviceInternalFaultInfo, this.State, cancellationToken, true);

                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - keeping partition in quorum loss for '{1}'", this.State.OperationId, state.Info.QuorumLossDuration);
                await Task.Delay(state.Info.QuorumLossDuration, cancellationToken).ConfigureAwait(false);

                TimeoutHelper timeoutHelper = new TimeoutHelper(this.OperationTimeout);

                bool conditionSatisfied = false;

                int quorumLossCheckRetries = FASConstants.QuorumLossCheckRetryCount;

                do
                {
                    TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - checking PartitionStatus", this.State.OperationId);
                    ServicePartitionList partitionsResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => this.FabricClient.QueryManager.GetPartitionListAsync(
                            this.partitionSelector.ServiceName,
                            null,
                            this.RequestTimeout,
                            cancellationToken),
                        this.OperationTimeout,
                        cancellationToken).ConfigureAwait(false);

                    foreach (StatefulServicePartition partition in partitionsResult)
                    {
                        if (partition.PartitionInformation.Id == partitionId)
                        {
                            if (partition.PartitionStatus == ServicePartitionStatus.InQuorumLoss)
                            {
                                conditionSatisfied = true;
                                break;
                            }
                        }
                    }

                    await AsyncWaiter.WaitAsync(TimeSpan.FromSeconds(5), cancellationToken).ConfigureAwait(false);
                }while (!conditionSatisfied && quorumLossCheckRetries-- > 0);

                if (!conditionSatisfied)
                {
                    string error = string.Format(CultureInfo.InvariantCulture, "{0} - Service could not induce quorum loss for service '{1}', partition '{2}'. Please retry", this.State.OperationId, this.partitionSelector.ServiceName, partitionId);
                    TestabilityTrace.TraceSource.WriteWarning(StepBase.TraceType, error);

                    throw new FabricTransientException("The operation could not be performed, please retry", FabricErrorCode.NotReady);
                }

                await QuorumLossStepsFactory.RemoveUTAsync(this.FabricClient, this.State, this.RequestTimeout, this.OperationTimeout, cancellationToken);

                state.StateProgress.Push(StepStateNames.CompletedSuccessfully);

                return(state);
            }
        public static async Task <Replica> GetReplicaAsync(this IFabricTestabilityClient client, ReplicaSelector replicaSelector)
        {
            var systemFabricCient = GetSystemFabricClient(client);
            GetSelectedReplicaStateAction replicaAction = new GetSelectedReplicaStateAction(replicaSelector);
            await systemFabricCient.FabricClient.TestManager.TestContext.ActionExecutor.RunAsync(replicaAction);

            return(replicaAction.Result.Item2);
        }