Exemplo n.º 1
0
        // Running multiple times should be safe.
        private async Task LoadPartitionAndReplicaCountAsync(CancellationToken ct)
        {
            // Optimization. No need for more than one thread to enter this method.
            await this.asyncMutex.WaitAsync();

            try
            {
                if (this.isStateful.HasValue && this.partitionCount.HasValue && this.targetReplicaSetSize.HasValue)
                {
                    // values already loaded
                    return;
                }

                ServicePartitionList servicePartitions = await this.GetPartitionsAsync(ct).ConfigureAwait(false);

                // Make sure servicePartitions has at least one item.
                ThrowIf.NullOrEmpty(servicePartitions, "servicePartitions");

                // set PartitionCount
                ReleaseAssert.AssertIfNot(this.partitionCount.TrySetValue(servicePartitions.Count), "partitionCount has already been set to a different value");

                // set isStateful field
                Partition partition = servicePartitions[0];
                bool      stateful  = partition is StatefulServicePartition;
                ReleaseAssert.AssertIfNot(this.isStateful.TrySetValue(stateful), "isStateful has already been set to a different value");

                // retrieve replicaCount
                if (stateful)
                {
                    var statefulServicePartition = partition as StatefulServicePartition;
                    ReleaseAssert.AssertIfNot(this.targetReplicaSetSize.TrySetValue((int)statefulServicePartition.TargetReplicaSetSize), "targetReplicaSetSize has already been set to a different value");
                }
                else
                {
                    var statelessServicePartition = partition as StatelessServicePartition;
                    ReleaseAssert.AssertIfNot(this.targetReplicaSetSize.TrySetValue((int)statelessServicePartition.InstanceCount), "targetReplicaSetSize has already been set to a different value");
                }
            }
            catch (Exception e)
            {
                TestabilityTrace.TraceSource.WriteError(TraceSource, "Error while getting partitions for service {0}. Exception: {1}", this.serviceName, e.Message);
                throw;
            }
            finally
            {
                this.asyncMutex.Release();
            }
        }
Exemplo n.º 2
0
        private bool ValidatePartitionCount(int totalPartitionsFound)
        {
            int expectedPartitionCount;

            // Assert
            ReleaseAssert.AssertIfNot(this.partitionCount.TryGetValue(out expectedPartitionCount), "ParitionCount is null.");

            if (expectedPartitionCount != totalPartitionsFound)
            {
                TestabilityTrace.TraceSource.WriteInfo(TraceSource, "Found only {0}/{1} Partitions for service {2}", totalPartitionsFound, expectedPartitionCount, this.serviceName);
                return(false);
            }

            // Success
            TestabilityTrace.TraceSource.WriteInfo(TraceSource, "Validated that service '{0}' has {1} stable partitions.", this.serviceName, totalPartitionsFound);
            return(true);
        }
Exemplo n.º 3
0
        private int GetExpectedReplicaSetSize(IEnumerable <NodeInfo> nodes)
        {
            int replicaCount;

            // Assert
            ReleaseAssert.AssertIfNot(this.targetReplicaSetSize.TryGetValue(out replicaCount), "TargetReplicaSetSize is null");

            int upNodeCount = nodes.Count(n => n.IsNodeUp);

            if (replicaCount == -1)
            {
                // This can only be the case for stateless service and this means we want to place on all nodes
                replicaCount = upNodeCount;
            }

            // Return min of nodes or replica count i.e. if we have 3 nodes and replica count of 5 we will only be able
            // to place 3/5 replicas and the check below will handle this case
            return(nodes.Any() ? Math.Min(upNodeCount, replicaCount) : replicaCount);
        }
Exemplo n.º 4
0
        public bool AddFile(string fileName)
        {
            ReleaseAssert.AssertIf((null == this.fciContext), (String.Format(CultureInfo.CurrentCulture, StringResources.Error_FciContext_Null, "AddFile")));
            var destFileName = Path.GetFileName(fileName);
            var result       = NativeHelper.FCIAddFile(
                this.fciContext,
                fileName,
                destFileName,
                false,
                FciGetNextCabMethod,
                FciStatusMethod,
                FciGetOpenInfoMethod,
                0);

            if (this.customData != null && this.customData.ErrorInfo != null)
            {
                throw this.customData.ErrorInfo;
            }

            return(result);
        }
Exemplo n.º 5
0
        public async Task <Dictionary <Partition, StatelessServiceInstance[]> > QueryPartitionAndReplicaResultAsyncStateless(TimeSpan timeout, CancellationToken ct)
        {
            Dictionary <Partition, Replica[]> instancesMap = await this.QueryLocationsAsync(ct).ConfigureAwait(false);

            var allServiceInstances =
                new Dictionary <Partition, StatelessServiceInstance[]>();

            foreach (var partition in instancesMap)
            {
                var statelessInstances = new List <StatelessServiceInstance>();
                foreach (Replica instance in partition.Value)
                {
                    StatelessServiceInstance statelessInstance = instance as StatelessServiceInstance;
                    ReleaseAssert.AssertIf(statelessInstance == null, "Instance {0} should be of type stateless for Partition {1}", instance.Id, partition.Key.PartitionId());
                    statelessInstances.Add(statelessInstance);
                }

                allServiceInstances.Add(partition.Key, statelessInstances.ToArray());
            }

            return(allServiceInstances);
        }
Exemplo n.º 6
0
        public async Task <Dictionary <Partition, StatefulServiceReplica[]> > QueryPartitionAndReplicaResultAsyncStateful(CancellationToken ct)
        {
            var servicePartitionMap = await this.QueryLocationsAsync(ct).ConfigureAwait(false);

            var allServiceReplicas =
                new Dictionary <Partition, StatefulServiceReplica[]>();

            foreach (var partition in servicePartitionMap)
            {
                List <StatefulServiceReplica> statefulReplicas = new List <StatefulServiceReplica>();
                foreach (Replica replica in partition.Value)
                {
                    StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica;
                    ReleaseAssert.AssertIf(statefulReplica == null, "Replica {0} should be of type stateful for Partition {1}", replica.Id, partition.Key.PartitionId());

                    statefulReplicas.Add(statefulReplica);
                }

                allServiceReplicas.Add(partition.Key, statefulReplicas.ToArray());
            }

            return(allServiceReplicas);
        }
Exemplo n.º 7
0
            protected override async Task ExecuteActionAsync(FabricTestContext testContext, MoveSecondaryAction action, CancellationToken cancellationToken)
            {
                ThrowIf.Null(action.PartitionSelector, "PartitionSelector");

                this.helper = new TimeoutHelper(action.ActionTimeout);

                string newSecondaryNode     = action.NewSecondaryNodeName;
                string currentSecondaryNode = action.CurrentSecondaryNodeName;

                var getPartitionStateAction = new GetSelectedPartitionStateAction(action.PartitionSelector)
                {
                    RequestTimeout = action.RequestTimeout,
                    ActionTimeout  = this.helper.GetRemainingTime()
                };

                await testContext.ActionExecutor.RunAsync(getPartitionStateAction, cancellationToken).ConfigureAwait(false);

                Guid partitionId = getPartitionStateAction.Result.PartitionId;

                if (!action.IgnoreConstraints)
                {
                    // get current primary replica node name.
                    ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => testContext.FabricClient.QueryManager.GetReplicaListAsync(
                            partitionId,
                            0,
                            action.RequestTimeout,
                            cancellationToken),
                        this.helper.GetRemainingTime(),
                        cancellationToken).ConfigureAwait(false);

                    string        currentPrimaryNodeInfo = string.Empty;
                    List <string> currentSecReplicaNodes = new List <string>();
                    foreach (var replica in replicasResult)
                    {
                        StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica;
                        if (statefulReplica == null)
                        {
                            throw new InvalidOperationException(StringHelper.Format(StringResources.Error_InvalidServiceTypeTestability, "MoveSecondary", "Stateful", action.PartitionSelector.ServiceName, "Stateless"));
                        }

                        if (statefulReplica.ReplicaRole == ReplicaRole.Primary)
                        {
                            currentPrimaryNodeInfo = statefulReplica.NodeName;
                            if (!string.IsNullOrEmpty(newSecondaryNode) && newSecondaryNode == statefulReplica.NodeName)
                            {
                                throw new FabricException(
                                          StringHelper.Format(StringResources.Error_InvalidNodeNameProvided, newSecondaryNode, "MoveSecondary", "Primary exists on node"),
                                          FabricErrorCode.AlreadyPrimaryReplica);
                            }
                        }
                        else if (statefulReplica.ReplicaRole == ReplicaRole.ActiveSecondary)
                        {
                            currentSecReplicaNodes.Add(statefulReplica.NodeName);
                            if (!string.IsNullOrEmpty(newSecondaryNode) && newSecondaryNode == statefulReplica.NodeName)
                            {
                                throw new FabricException(
                                          StringHelper.Format(StringResources.Error_InvalidNodeNameProvided, newSecondaryNode, "MoveSecondary", "Secondary exists on node"),
                                          FabricErrorCode.AlreadySecondaryReplica);
                            }
                        }
                    }

                    if (currentSecReplicaNodes.Count == 0)
                    {
                        throw new InvalidOperationException(StringResources.Error_NoSecondariesInReplicaSet);
                    }

                    if (string.IsNullOrEmpty(currentSecondaryNode))
                    {
                        int num = testContext.Random.Next(currentSecReplicaNodes.Count);
                        currentSecondaryNode = currentSecReplicaNodes.ElementAt(num);
                    }

                    if (!currentSecReplicaNodes.Contains(currentSecondaryNode))
                    {
                        throw new FabricException(
                                  StringHelper.Format(StringResources.Error_InvalidNodeNameProvided, newSecondaryNode, "MoveSecondary", "Current node does not have a secondary replica"),
                                  FabricErrorCode.InvalidReplicaStateForReplicaOperation);
                    }
                }

                ReleaseAssert.AssertIf(string.IsNullOrEmpty(currentSecondaryNode), "Current node name cannot be null or empty.");
                ReleaseAssert.AssertIf(newSecondaryNode == currentSecondaryNode, "Current and New node names are same.");

                ActionTraceSource.WriteInfo(TraceSource, "Calling move secondary with current node {0}, new node {1}, partition {2}", currentSecondaryNode, string.IsNullOrEmpty(newSecondaryNode) ? "Random" : newSecondaryNode, partitionId);
                await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.FaultManager.MoveSecondaryUsingNodeNameAsync(
                        currentSecondaryNode,
                        newSecondaryNode,
                        getPartitionStateAction.Result.ServiceName,
                        partitionId,
                        action.IgnoreConstraints,
                        action.RequestTimeout,
                        cancellationToken),
                    FabricClientRetryErrors.MoveSecondaryFabricErrors.Value,
                    this.helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                action.Result          = new MoveSecondaryResult(currentSecondaryNode, newSecondaryNode, getPartitionStateAction.Result);
                this.ResultTraceString = StringHelper.Format(
                    "MoveSecondaryAction succeeded for moving Primary for {0} from {1} to {2}.",
                    partitionId,
                    currentSecondaryNode,
                    newSecondaryNode);
            }
            protected override async Task ExecuteActionAsync(FabricTestContext testContext, RestartReplicaAction action, CancellationToken cancellationToken)
            {
                TimeoutHelper helper = new TimeoutHelper(action.ActionTimeout);

                string          nodeName              = action.NodeName;
                Guid?           partitionId           = action.PartitionId;
                long?           replicaId             = action.ReplicaId;
                SelectedReplica replicaSelectorResult = SelectedReplica.None;

                if (string.IsNullOrEmpty(nodeName) ||
                    !partitionId.HasValue ||
                    !replicaId.HasValue)
                {
                    ThrowIf.Null(action.ReplicaSelector, "ReplicaSelector");

                    var getReplicaStateAction = new GetSelectedReplicaStateAction(action.ReplicaSelector)
                    {
                        RequestTimeout = action.RequestTimeout,
                        ActionTimeout  = helper.GetRemainingTime()
                    };

                    await testContext.ActionExecutor.RunAsync(getReplicaStateAction, cancellationToken).ConfigureAwait(false);

                    var replicaStateActionResult = getReplicaStateAction.Result;
                    replicaSelectorResult = replicaStateActionResult.Item1;
                    ReleaseAssert.AssertIf(replicaSelectorResult == null, "replicaSelectorResult cannot be null");

                    partitionId = replicaStateActionResult.Item1.SelectedPartition.PartitionId;

                    Replica replicaStateResult = replicaStateActionResult.Item2;
                    ReleaseAssert.AssertIf(replicaStateResult == null, "replicaStateResult cannot be null");

                    nodeName  = replicaStateResult.NodeName;
                    replicaId = replicaStateResult.Id;
                }

                ThrowIf.IsFalse(partitionId.HasValue, "PartitionID");
                ThrowIf.IsFalse(replicaId.HasValue, "ReplicaID");

                await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.ServiceManager.RestartReplicaAsync(
                        nodeName,
                        partitionId.Value,
                        replicaId.Value,
                        action.RequestTimeout,
                        cancellationToken),
                    FabricClientRetryErrors.RestartReplicaErrors.Value,
                    helper.GetRemainingTime(),
                    cancellationToken);

                if (action.CompletionMode == CompletionMode.Verify)
                {
                    // TODO: Check with failover team to see how to confirm that the replica actually restarted. We do not expose instance id for persisted replicas
                }

                action.Result          = new RestartReplicaResult(replicaSelectorResult);
                this.ResultTraceString = StringHelper.Format(
                    "RestartReplicaOrInstance succeeded by restarting replica {0}:{1} node {2} with CompletionMode {3}",
                    partitionId.Value,
                    replicaId.Value,
                    nodeName,
                    action.CompletionMode);
            }
Exemplo n.º 9
0
        public async Task <ValidationReport> EnsureStabilityWithReportAsync(TimeSpan maximumStabilizationTimeout, TimeSpan retryWait, CancellationToken ct)
        {
            TestabilityTrace.TraceSource.WriteInfo(TraceSource, "Ensuring that '{0}' is online with timeout '{1}'.", this.serviceName, maximumStabilizationTimeout);

            bool checkQuorumLoss = (this.checkFlags & ValidationCheckFlag.CheckQuorumLoss) != 0;

            // Load basic information about this service.
            TestabilityTrace.TraceSource.WriteNoise(TraceSource, "Querying basic information for {0}.", this.serviceName);
            await this.LoadPartitionAndReplicaCountAsync(ct);

            DateTime      startTime = DateTime.Now;
            TimeoutHelper timer     = new TimeoutHelper(maximumStabilizationTimeout);
            bool          success   = false;

            List <Guid>   partitionsInQuorumLoss = new List <Guid>();
            StringBuilder errorString            = new StringBuilder();
            int           retryCount             = 1;

            while (!success && timer.GetRemainingTime() > TimeSpan.Zero)
            {
                TestabilityTrace.TraceSource.WriteInfo(TraceSource, "EnsureStabilityWithReportAsync(): retryCount='{0}', timer.GetRemainingTime()='{1}'", retryCount, timer.GetRemainingTime());

                var nodes = await this.TestContext.FabricCluster.GetLatestNodeInfoAsync(this.requestTimeout, this.operationTimeout, ct);

                // Empty error string and list of partitions in quorum loss
                partitionsInQuorumLoss.Clear();
                errorString.Clear();

                success = true;
                int totalPartitionsFound = 0;

                bool stateful;
                ReleaseAssert.AssertIfNot(this.isStateful.TryGetValue(out stateful), "isStateful flag is not available");
                bool checkTarget  = (this.checkFlags & ValidationCheckFlag.CheckTargetReplicaSetSize) != 0;
                bool checkInBuild = (this.checkFlags & ValidationCheckFlag.CheckInBuildReplica) != 0;

                if (stateful)
                {
                    var partitionDictionary = await this.QueryPartitionAndReplicaResultAsyncStateful(ct);

                    totalPartitionsFound = partitionDictionary.Count();

                    foreach (KeyValuePair <Partition, StatefulServiceReplica[]> partition in partitionDictionary)
                    {
                        bool partitionIsReady = partition.Key.PartitionStatus == ServicePartitionStatus.Ready;
                        if (!partitionIsReady)
                        {
                            var message = StringHelper.Format("Partition '{0}' is not Ready", partition.Key.PartitionId());
                            TestabilityTrace.TraceSource.WriteInfo(TraceSource, "{0}", message);
                            errorString.AppendLine(message);
                        }

                        if (partition.Key.PartitionStatus != ServicePartitionStatus.InQuorumLoss)
                        {
                            int validCount      = 0;
                            int inBuildReplicas = 0;
                            foreach (StatefulServiceReplica replica in partition.Value)
                            {
                                if (replica.ReplicaStatus == ServiceReplicaStatus.Ready &&
                                    (replica.ReplicaRole == ReplicaRole.Primary || replica.ReplicaRole == ReplicaRole.ActiveSecondary))
                                {
                                    ++validCount;
                                }

                                if (replica.ReplicaStatus == ServiceReplicaStatus.InBuild)
                                {
                                    ++inBuildReplicas;
                                    var message = StringHelper.Format("Replica {0} for partition '{1}' is InBuild", replica.Id, partition.Key.PartitionId());
                                    TestabilityTrace.TraceSource.WriteInfo(TraceSource, "{0}", message);
                                    errorString.AppendLine(message);
                                }
                            }

                            bool targetAchieved = this.CheckReplicaSetSize(partition.Key.PartitionInformation.Id, validCount, startTime, nodes, errorString);
                            if (!partitionIsReady ||
                                (checkInBuild && inBuildReplicas > 0) ||
                                (checkTarget && !targetAchieved))
                            {
                                success = false;
                            }
                        }
                        else
                        {
                            partitionsInQuorumLoss.Add(partition.Key.PartitionInformation.Id);
                        }
                    }
                }
                else
                {
                    int targetInstanceCount = 0;
                    ReleaseAssert.AssertIf(!this.targetReplicaSetSize.TryGetValue(out targetInstanceCount), "targetReplicaSetSize for service: {0} should have been populated at this point.", this.serviceName);

                    bool placementConstraintsDefined = false;
                    try
                    {
                        // Get the service description to find out if there are placement constraints on the service
                        ServiceDescription result = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                            () => this.TestContext.FabricClient.ServiceManager.GetServiceDescriptionAsync(
                                this.serviceName,
                                this.requestTimeout,
                                ct),
                            this.operationTimeout,
                            ct).ConfigureAwait(false);

                        ThrowIf.IsTrue(result == null, "A description must be associated with the service: {0}", this.serviceName);

                        placementConstraintsDefined = !string.IsNullOrEmpty(result.PlacementConstraints);
                    }
                    catch (UnauthorizedAccessException)
                    {
                        ServiceGroupDescription groupDescription = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                            () => this.TestContext.FabricClient.ServiceGroupManager.GetServiceGroupDescriptionAsync(
                                this.serviceName,
                                this.requestTimeout,
                                ct),
                            this.operationTimeout,
                            ct).ConfigureAwait(false);

                        ThrowIf.IsTrue(groupDescription == null, "A description must be associated with the service group: {0}", this.serviceName);

                        placementConstraintsDefined = !string.IsNullOrEmpty(groupDescription.ServiceDescription.PlacementConstraints);
                    }

                    // If a stateless service has instance count == -1 and it has placement constraints such
                    // that the possible number of instances cannot match the total number of nodes,
                    // we need to find out the number of eligible nodes for the service which is tracked by RDBug 8993319.
                    // Until RDBug 8993319 is fixed, we take the presence of placement constraints into consideration to make the
                    // validation more accurate.
                    if (targetInstanceCount == -1 && placementConstraintsDefined)
                    {
                        checkTarget = false;
                    }

                    var partitionDictionary = await this.QueryPartitionAndReplicaResultAsyncStateless(timer.GetRemainingTime(), ct);

                    totalPartitionsFound = partitionDictionary.Count();

                    foreach (KeyValuePair <Partition, StatelessServiceInstance[]> partition in partitionDictionary)
                    {
                        bool partitionIsReady = partition.Key.PartitionStatus == ServicePartitionStatus.Ready;
                        if (!partitionIsReady)
                        {
                            var message = StringHelper.Format("Partition '{0}' is not Ready", partition.Key.PartitionId());
                            TestabilityTrace.TraceSource.WriteInfo(TraceSource, "{0}", message);
                            errorString.AppendLine(message);
                        }

                        int validCount = 0;
                        foreach (StatelessServiceInstance instance in partition.Value)
                        {
                            if (instance.ReplicaStatus == ServiceReplicaStatus.Ready)
                            {
                                ++validCount;
                            }
                        }

                        bool targetAchieved = this.CheckReplicaSetSize(partition.Key.PartitionInformation.Id, validCount, startTime, nodes, errorString);
                        if (!partitionIsReady ||
                            (checkTarget && !targetAchieved))
                        {
                            success = false;
                        }
                    }
                }

                if (!this.ValidatePartitionCount(totalPartitionsFound))
                {
                    success = false;
                }

                if (partitionsInQuorumLoss.Count > 0 && checkQuorumLoss)
                {
                    string paritionIds = string.Join(",", partitionsInQuorumLoss.ToArray());
                    var    message     = StringHelper.Format("Partitions '{0}' in quorum loss for service {1}", paritionIds, this.serviceName);
                    TestabilityTrace.TraceSource.WriteInfo(TraceSource, "{0}", message);
                    errorString.AppendLine(message);
                    success = false;
                }

                if (!success)
                {
                    if (retryCount % 10 == 0)
                    {
                        TestabilityTrace.TraceSource.WriteWarning(TraceSource, "Service {0} validation failed due to issues below, will retry: \n{1}", this.serviceName, errorString);
                    }

                    // Delay before querying again so we allow some time for state to change - don't spam the node
                    await AsyncWaiter.WaitAsync(retryWait, ct).ConfigureAwait(false);
                }

                retryCount++;
            }

            if (partitionsInQuorumLoss.Count > 0)
            {
                string partitionIds = string.Join(",", partitionsInQuorumLoss.ToArray());
                TestabilityTrace.TraceSource.WriteInfo(TraceSource, "Partitions in quorum loss for service {0} are '{1}'", this.serviceName, partitionIds);

                if (checkQuorumLoss)
                {
                    throw new FabricValidationException(StringHelper.Format(StringResources.Error_PartitionsInQuorumLoss, partitionIds, this.serviceName));
                }
            }

            if (!success)
            {
                return(new ValidationReport(
                           true,
                           StringHelper.Format(StringResources.Error_ServiceNotStable, this.serviceName, maximumStabilizationTimeout, errorString)));
            }
            else
            {
                return(ValidationReport.Default);
            }
        }
Exemplo n.º 10
0
        public async Task <ValidationReport> ValidateHealthWithReportAsync(TimeSpan maximumStabilizationTimeout, TimeSpan retryWait, CancellationToken ct)
        {
            TestabilityTrace.TraceSource.WriteInfo(TraceSource, "Validating that '{0}' is healthy with timeout '{1}'.", this.serviceName, maximumStabilizationTimeout);

            TimeoutHelper timer      = new TimeoutHelper(maximumStabilizationTimeout);
            bool          success    = false;
            string        healthinfo = string.Empty;
            int           retryCount = 1;

            while (!success && timer.GetRemainingTime() > TimeSpan.Zero)
            {
                TestabilityTrace.TraceSource.WriteInfo(TraceSource, "ValidateHealthWithReportAsync(): retryCount='{0}', timer.GetRemainingTime()='{1}'", retryCount, timer.GetRemainingTime());

                healthinfo = string.Empty;

                if (this.TestContext == null)
                {
                    Console.WriteLine("testcontext is null");
                }

                ReleaseAssert.AssertIfNull(this.TestContext, "test context");
                ReleaseAssert.AssertIfNull(this.serviceName, "serviceName");
                ReleaseAssert.AssertIfNull(FabricClientRetryErrors.GetEntityHealthFabricErrors.Value, "health error code");

                ApplicationHealthPolicy healthPolicy = new ApplicationHealthPolicy();

                var serviceHealthResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () =>
                    this.TestContext.FabricClient.HealthManager.GetServiceHealthAsync(
                        this.serviceName,
                        healthPolicy,
                        this.requestTimeout,
                        ct),
                    FabricClientRetryErrors.GetEntityHealthFabricErrors.Value,
                    timer.GetRemainingTime(),
                    ct).ConfigureAwait(false);

                bool checkError   = (this.checkFlags & ValidationCheckFlag.CheckError) != 0;
                bool checkWarning = (this.checkFlags & ValidationCheckFlag.CheckWarning) != 0;

                if ((checkError && serviceHealthResult.AggregatedHealthState == HealthState.Error) ||
                    (checkWarning && serviceHealthResult.AggregatedHealthState == HealthState.Warning) ||
                    serviceHealthResult.AggregatedHealthState == HealthState.Invalid ||
                    serviceHealthResult.AggregatedHealthState == HealthState.Unknown)
                {
                    TestabilityTrace.TraceSource.WriteInfo(TraceSource, "{0} is health state is {1}. Will Retry check", this.serviceName, serviceHealthResult.AggregatedHealthState);
                    healthinfo = await this.GetUnhealthyItemsAsync(serviceHealthResult, timer, ct).ConfigureAwait(false);

                    TestabilityTrace.TraceSource.WriteInfo(TraceSource, healthinfo);
                }
                else
                {
                    success = true;
                }

                if (!success)
                {
                    if (retryCount % 10 == 0)
                    {
                        TestabilityTrace.TraceSource.WriteWarning(TraceSource, "Service {0} health validation failed due to issues below, will retry: \n{1}", this.serviceName, healthinfo);
                    }

                    // Delay before querying again so we allow some time for state to change - don't spam the node
                    await AsyncWaiter.WaitAsync(retryWait);
                }

                retryCount++;
            }

            if (!success)
            {
                return(new ValidationReport(true, StringHelper.Format(StringResources.Error_ServiceNotHealthy, serviceName, maximumStabilizationTimeout, healthinfo)));
            }
            else
            {
                return(ValidationReport.Default);
            }
        }
            protected override async Task ExecuteActionAsync(FabricTestContext testContext, RestartDeployedCodePackageAction action, CancellationToken cancellationToken)
            {
                this.helper = new TimeoutHelper(action.ActionTimeout);

                string          nodeName                   = action.NodeName;
                Uri             applicationName            = action.ApplicationName;
                string          serviceManifestName        = action.ServiceManifestName;
                string          servicePackageActivationId = action.ServicePackageActivationId;
                string          codePackageName            = action.CodePackageName;
                SelectedReplica replicaSelectorResult      = SelectedReplica.None;

                ThrowIf.Null(applicationName, "ApplicationName");

                if (string.IsNullOrEmpty(nodeName) ||
                    string.IsNullOrEmpty(serviceManifestName) ||
                    string.IsNullOrEmpty(codePackageName))
                {
                    ThrowIf.Null(action.ReplicaSelector, "ReplicaSelector");

                    var getReplicaStateAction = new GetSelectedReplicaStateAction(action.ReplicaSelector)
                    {
                        RequestTimeout = action.RequestTimeout,
                        ActionTimeout  = this.helper.GetRemainingTime()
                    };

                    await testContext.ActionExecutor.RunAsync(getReplicaStateAction, cancellationToken).ConfigureAwait(false);

                    var replicaStateActionResult = getReplicaStateAction.Result;
                    ReleaseAssert.AssertIf(replicaStateActionResult == null, "replicaStateActionResult cannot be null");
                    replicaSelectorResult = replicaStateActionResult.Item1;
                    ReleaseAssert.AssertIf(replicaSelectorResult == null || replicaSelectorResult.SelectedPartition == null,
                                           "replicaSelectorResult cannot be null or for a non-null replicaSelectorResult, the selected partition must be non-null");
                    Guid partitionId = replicaStateActionResult.Item1.SelectedPartition.PartitionId;

                    Replica replicaStateResult = replicaStateActionResult.Item2;
                    ReleaseAssert.AssertIf(replicaStateResult == null, "replicaStateResult cannot be null");

                    nodeName = replicaStateResult.NodeName;

                    var deployedReplicaListResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync <DeployedServiceReplicaList>(
                        () => testContext.FabricClient.QueryManager.GetDeployedReplicaListAsync(
                            nodeName,
                            applicationName,
                            null,
                            partitionId,
                            action.RequestTimeout,
                            cancellationToken),
                        this.helper.GetRemainingTime(),
                        cancellationToken).ConfigureAwait(false);

                    DeployedServiceReplica selectedReplica = deployedReplicaListResult.FirstOrDefault(r => r.Partitionid == partitionId);
                    if (selectedReplica == null)
                    {
                        throw new FabricException(
                                  StringHelper.Format(StringResources.Error_DidNotFindDeployedReplicaOnNode, partitionId, nodeName),
                                  FabricErrorCode.ReplicaDoesNotExist);
                    }

                    serviceManifestName        = selectedReplica.ServiceManifestName;
                    servicePackageActivationId = selectedReplica.ServicePackageActivationId;
                    codePackageName            = selectedReplica.CodePackageName;
                }

                ActionTraceSource.WriteInfo(TraceSource, "SelectedReplica: serviceManifestName: {0}, servicePackageActivationId: {1}, codePackageName: {2}", serviceManifestName, servicePackageActivationId, codePackageName);

                DeployedCodePackage deployedCodePackageListResult = await this.GetCodePackageInfoAsync(testContext, nodeName, applicationName, serviceManifestName, servicePackageActivationId, codePackageName, action, cancellationToken).ConfigureAwait(false);

                var codepackageEntrypointToRestart = GetCodepackageEntrypointToRestart(action, deployedCodePackageListResult);

                await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.FaultManager.RestartDeployedCodePackageUsingNodeNameAsync(
                        nodeName,
                        applicationName,
                        serviceManifestName,
                        servicePackageActivationId,
                        codePackageName,
                        codepackageEntrypointToRestart.EntryPoint.CodePackageInstanceId,
                        action.RequestTimeout,
                        cancellationToken),
                    this.helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                if (action.CompletionMode == CompletionMode.Verify)
                {
                    bool success = false;
                    while (this.helper.GetRemainingTime() > TimeSpan.Zero)
                    {
                        var deployedCodePackageListResultAfterRestart = await this.GetCodePackageInfoAsync(testContext, nodeName, applicationName, serviceManifestName, servicePackageActivationId, codePackageName, action, cancellationToken).ConfigureAwait(false);

                        if (deployedCodePackageListResultAfterRestart != null)
                        {
                            var entryPointAfterRestart = codepackageEntrypointToRestart.EntryPointType == EntryPointType.Main ? deployedCodePackageListResultAfterRestart.EntryPoint : deployedCodePackageListResultAfterRestart.SetupEntryPoint;
                            if (entryPointAfterRestart != null && entryPointAfterRestart.CodePackageInstanceId > codepackageEntrypointToRestart.EntryPoint.CodePackageInstanceId && entryPointAfterRestart.EntryPointStatus == EntryPointStatus.Started)
                            {
                                success = true;
                                break;
                            }
                        }

                        ActionTraceSource.WriteInfo(TraceSource, "CodePackage = {0}:{1}:{2} not yet restarted. Retrying...", nodeName, applicationName, codePackageName);
                        await AsyncWaiter.WaitAsync(TimeSpan.FromSeconds(5), cancellationToken).ConfigureAwait(false);
                    }

                    if (!success)
                    {
                        throw new TimeoutException(StringHelper.Format(StringResources.Error_TestabilityActionTimeout,
                                                                       "RestartDeployedCodePackage",
                                                                       applicationName));
                    }
                }

                action.Result = new RestartDeployedCodePackageResult(
                    nodeName,
                    applicationName,
                    serviceManifestName,
                    servicePackageActivationId,
                    codePackageName,
                    codepackageEntrypointToRestart.EntryPoint.CodePackageInstanceId,
                    replicaSelectorResult);

                ResultTraceString = StringHelper.Format("RestartCodePackageAction succeeded for {0}:{1}:{2} with CompletionMode = {3}", nodeName, applicationName, codePackageName, action.CompletionMode);
            }