예제 #1
0
        public RunspaceGroup.Token Take()
        {
            Runspace runspace  = null;
            bool     createNew = false;

            lock (this.thisLock)
            {
                if (this.runspaces.IsEmpty && this.currentStackSize < GroupSizeMax)
                {
                    createNew = true;
                    this.currentStackSize++;
                }
            }

            if (createNew)
            {
                runspace = this.CreateRunspace();
            }

            TimeoutHelper helper = new TimeoutHelper(MaxRunspaceWaitTime);

            while (runspace == null)
            {
                if (!this.runspaces.TryPop(out runspace))
                {
                    this.runSpaceAvailableEvent.WaitOne(TimeSpan.FromSeconds(5));
                }

                ThrowIf.IsTrue(helper.GetRemainingTime() == TimeSpan.Zero, "Could not get a RunSpace in {0} minutes. Check if something is stuck", MaxRunspaceWaitTime);
            }

            return(new Token(this, runspace));
        }
예제 #2
0
        public ValidationReport(bool failed, string reason)
        {
            ThrowIf.IsTrue(failed && string.IsNullOrEmpty(reason), "If validation fails, you must provide a reason for the failure.");

            this.validationFailed = failed;
            this.failureReason    = reason;
        }
예제 #3
0
        public async Task <ValidationReport> EnsureStabilityWithReportAsync(TimeSpan maximumStabilizationTimeout, TimeSpan retryWait, CancellationToken ct)
        {
            TestabilityTrace.TraceSource.WriteInfo(TraceSource, "Ensuring that '{0}' is online with timeout '{1}'.", this.serviceName, maximumStabilizationTimeout);

            bool checkQuorumLoss = (this.checkFlags & ValidationCheckFlag.CheckQuorumLoss) != 0;

            // Load basic information about this service.
            TestabilityTrace.TraceSource.WriteNoise(TraceSource, "Querying basic information for {0}.", this.serviceName);
            await this.LoadPartitionAndReplicaCountAsync(ct);

            DateTime      startTime = DateTime.Now;
            TimeoutHelper timer     = new TimeoutHelper(maximumStabilizationTimeout);
            bool          success   = false;

            List <Guid>   partitionsInQuorumLoss = new List <Guid>();
            StringBuilder errorString            = new StringBuilder();
            int           retryCount             = 1;

            while (!success && timer.GetRemainingTime() > TimeSpan.Zero)
            {
                TestabilityTrace.TraceSource.WriteInfo(TraceSource, "EnsureStabilityWithReportAsync(): retryCount='{0}', timer.GetRemainingTime()='{1}'", retryCount, timer.GetRemainingTime());

                var nodes = await this.TestContext.FabricCluster.GetLatestNodeInfoAsync(this.requestTimeout, this.operationTimeout, ct);

                // Empty error string and list of partitions in quorum loss
                partitionsInQuorumLoss.Clear();
                errorString.Clear();

                success = true;
                int totalPartitionsFound = 0;

                bool stateful;
                ReleaseAssert.AssertIfNot(this.isStateful.TryGetValue(out stateful), "isStateful flag is not available");
                bool checkTarget  = (this.checkFlags & ValidationCheckFlag.CheckTargetReplicaSetSize) != 0;
                bool checkInBuild = (this.checkFlags & ValidationCheckFlag.CheckInBuildReplica) != 0;

                if (stateful)
                {
                    var partitionDictionary = await this.QueryPartitionAndReplicaResultAsyncStateful(ct);

                    totalPartitionsFound = partitionDictionary.Count();

                    foreach (KeyValuePair <Partition, StatefulServiceReplica[]> partition in partitionDictionary)
                    {
                        bool partitionIsReady = partition.Key.PartitionStatus == ServicePartitionStatus.Ready;
                        if (!partitionIsReady)
                        {
                            var message = StringHelper.Format("Partition '{0}' is not Ready", partition.Key.PartitionId());
                            TestabilityTrace.TraceSource.WriteInfo(TraceSource, "{0}", message);
                            errorString.AppendLine(message);
                        }

                        if (partition.Key.PartitionStatus != ServicePartitionStatus.InQuorumLoss)
                        {
                            int validCount      = 0;
                            int inBuildReplicas = 0;
                            foreach (StatefulServiceReplica replica in partition.Value)
                            {
                                if (replica.ReplicaStatus == ServiceReplicaStatus.Ready &&
                                    (replica.ReplicaRole == ReplicaRole.Primary || replica.ReplicaRole == ReplicaRole.ActiveSecondary))
                                {
                                    ++validCount;
                                }

                                if (replica.ReplicaStatus == ServiceReplicaStatus.InBuild)
                                {
                                    ++inBuildReplicas;
                                    var message = StringHelper.Format("Replica {0} for partition '{1}' is InBuild", replica.Id, partition.Key.PartitionId());
                                    TestabilityTrace.TraceSource.WriteInfo(TraceSource, "{0}", message);
                                    errorString.AppendLine(message);
                                }
                            }

                            bool targetAchieved = this.CheckReplicaSetSize(partition.Key.PartitionInformation.Id, validCount, startTime, nodes, errorString);
                            if (!partitionIsReady ||
                                (checkInBuild && inBuildReplicas > 0) ||
                                (checkTarget && !targetAchieved))
                            {
                                success = false;
                            }
                        }
                        else
                        {
                            partitionsInQuorumLoss.Add(partition.Key.PartitionInformation.Id);
                        }
                    }
                }
                else
                {
                    int targetInstanceCount = 0;
                    ReleaseAssert.AssertIf(!this.targetReplicaSetSize.TryGetValue(out targetInstanceCount), "targetReplicaSetSize for service: {0} should have been populated at this point.", this.serviceName);

                    bool placementConstraintsDefined = false;
                    try
                    {
                        // Get the service description to find out if there are placement constraints on the service
                        ServiceDescription result = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                            () => this.TestContext.FabricClient.ServiceManager.GetServiceDescriptionAsync(
                                this.serviceName,
                                this.requestTimeout,
                                ct),
                            this.operationTimeout,
                            ct).ConfigureAwait(false);

                        ThrowIf.IsTrue(result == null, "A description must be associated with the service: {0}", this.serviceName);

                        placementConstraintsDefined = !string.IsNullOrEmpty(result.PlacementConstraints);
                    }
                    catch (UnauthorizedAccessException)
                    {
                        ServiceGroupDescription groupDescription = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                            () => this.TestContext.FabricClient.ServiceGroupManager.GetServiceGroupDescriptionAsync(
                                this.serviceName,
                                this.requestTimeout,
                                ct),
                            this.operationTimeout,
                            ct).ConfigureAwait(false);

                        ThrowIf.IsTrue(groupDescription == null, "A description must be associated with the service group: {0}", this.serviceName);

                        placementConstraintsDefined = !string.IsNullOrEmpty(groupDescription.ServiceDescription.PlacementConstraints);
                    }

                    // If a stateless service has instance count == -1 and it has placement constraints such
                    // that the possible number of instances cannot match the total number of nodes,
                    // we need to find out the number of eligible nodes for the service which is tracked by RDBug 8993319.
                    // Until RDBug 8993319 is fixed, we take the presence of placement constraints into consideration to make the
                    // validation more accurate.
                    if (targetInstanceCount == -1 && placementConstraintsDefined)
                    {
                        checkTarget = false;
                    }

                    var partitionDictionary = await this.QueryPartitionAndReplicaResultAsyncStateless(timer.GetRemainingTime(), ct);

                    totalPartitionsFound = partitionDictionary.Count();

                    foreach (KeyValuePair <Partition, StatelessServiceInstance[]> partition in partitionDictionary)
                    {
                        bool partitionIsReady = partition.Key.PartitionStatus == ServicePartitionStatus.Ready;
                        if (!partitionIsReady)
                        {
                            var message = StringHelper.Format("Partition '{0}' is not Ready", partition.Key.PartitionId());
                            TestabilityTrace.TraceSource.WriteInfo(TraceSource, "{0}", message);
                            errorString.AppendLine(message);
                        }

                        int validCount = 0;
                        foreach (StatelessServiceInstance instance in partition.Value)
                        {
                            if (instance.ReplicaStatus == ServiceReplicaStatus.Ready)
                            {
                                ++validCount;
                            }
                        }

                        bool targetAchieved = this.CheckReplicaSetSize(partition.Key.PartitionInformation.Id, validCount, startTime, nodes, errorString);
                        if (!partitionIsReady ||
                            (checkTarget && !targetAchieved))
                        {
                            success = false;
                        }
                    }
                }

                if (!this.ValidatePartitionCount(totalPartitionsFound))
                {
                    success = false;
                }

                if (partitionsInQuorumLoss.Count > 0 && checkQuorumLoss)
                {
                    string paritionIds = string.Join(",", partitionsInQuorumLoss.ToArray());
                    var    message     = StringHelper.Format("Partitions '{0}' in quorum loss for service {1}", paritionIds, this.serviceName);
                    TestabilityTrace.TraceSource.WriteInfo(TraceSource, "{0}", message);
                    errorString.AppendLine(message);
                    success = false;
                }

                if (!success)
                {
                    if (retryCount % 10 == 0)
                    {
                        TestabilityTrace.TraceSource.WriteWarning(TraceSource, "Service {0} validation failed due to issues below, will retry: \n{1}", this.serviceName, errorString);
                    }

                    // Delay before querying again so we allow some time for state to change - don't spam the node
                    await AsyncWaiter.WaitAsync(retryWait, ct).ConfigureAwait(false);
                }

                retryCount++;
            }

            if (partitionsInQuorumLoss.Count > 0)
            {
                string partitionIds = string.Join(",", partitionsInQuorumLoss.ToArray());
                TestabilityTrace.TraceSource.WriteInfo(TraceSource, "Partitions in quorum loss for service {0} are '{1}'", this.serviceName, partitionIds);

                if (checkQuorumLoss)
                {
                    throw new FabricValidationException(StringHelper.Format(StringResources.Error_PartitionsInQuorumLoss, partitionIds, this.serviceName));
                }
            }

            if (!success)
            {
                return(new ValidationReport(
                           true,
                           StringHelper.Format(StringResources.Error_ServiceNotStable, this.serviceName, maximumStabilizationTimeout, errorString)));
            }
            else
            {
                return(ValidationReport.Default);
            }
        }