protected override async Task ExecuteActionAsync(FabricTestContext testContext, GetSelectedReplicaStateAction action, CancellationToken cancellationToken)
            {
                ThrowIf.Null(action.ReplicaSelector, "ReplicaSelector");

                TimeoutHelper helper = new TimeoutHelper(action.ActionTimeout);

                var getPartitionStateAction = new GetSelectedPartitionStateAction(action.ReplicaSelector.PartitionSelector)
                {
                    RequestTimeout = action.RequestTimeout,
                    ActionTimeout  = helper.GetRemainingTime()
                };

                await testContext.ActionExecutor.RunAsync(getPartitionStateAction, cancellationToken);

                Guid partitionId = getPartitionStateAction.Result.PartitionId;

                // TODO: make these actions which store state locally as well.
                ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync <ServiceReplicaList>(
                    () => testContext.FabricClient.QueryManager.GetReplicaListAsync(
                        partitionId,
                        0,
                        action.RequestTimeout,
                        cancellationToken),
                    helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                Replica replicaResult         = action.ReplicaSelector.GetSelectedReplica(replicasResult.ToArray(), testContext.Random, true /*skip invalid replicas*/);
                var     replicaSelectorResult = new SelectedReplica(replicaResult.Id, getPartitionStateAction.Result);

                action.Result = new Tuple <SelectedReplica, Replica>(
                    replicaSelectorResult,
                    replicaResult);

                ResultTraceString = StringHelper.Format("ReplicaSelector Selected  Replica {0}", replicaResult.Id);
            }
예제 #2
0
        public async Task WaitForStatefulService(Uri serviceInstanceUri)
        {
            StatefulServiceDescription description =
                await this.Client.ServiceManager.GetServiceDescriptionAsync(serviceInstanceUri) as StatefulServiceDescription;

            int targetTotalReplicas = description.TargetReplicaSetSize;

            if (description.PartitionSchemeDescription is UniformInt64RangePartitionSchemeDescription)
            {
                targetTotalReplicas *= ((UniformInt64RangePartitionSchemeDescription)description.PartitionSchemeDescription).PartitionCount;
            }

            ServicePartitionList partitions = await this.Client.QueryManager.GetPartitionListAsync(serviceInstanceUri);

            int replicaTotal = 0;

            while (replicaTotal < targetTotalReplicas)
            {
                await Task.Delay(this.interval);

                //ServiceEventSource.Current.ServiceMessage(this, "CountyService waiting for National Service to come up.");

                replicaTotal = 0;
                foreach (Partition partition in partitions)
                {
                    ServiceReplicaList replicaList = await this.Client.QueryManager.GetReplicaListAsync(partition.PartitionInformation.Id);

                    replicaTotal += replicaList.Count(x => x.ReplicaStatus == System.Fabric.Query.ServiceReplicaStatus.Ready);
                }
            }
        }
        public static async Task <Tuple <SelectedReplica, Replica> > GetSelectedReplicaAsync(
            FabricClient fabricClient,
            ReplicaSelector replicaSelector,
            TimeSpan requestTimeout,
            TimeSpan operationTimeout,
            CancellationToken cancellationToken)
        {
            ThrowIf.Null(replicaSelector, "ReplicaSelector");

            SelectedPartition selectedPartition = await FaultAnalysisServiceUtility.GetSelectedPartitionStateAsync(
                fabricClient,
                replicaSelector.PartitionSelector,
                requestTimeout,
                operationTimeout,
                cancellationToken).ConfigureAwait(false);

            Guid partitionId = selectedPartition.PartitionId;

            ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                () => fabricClient.QueryManager.GetReplicaListAsync(
                    partitionId,
                    0,
                    requestTimeout,
                    cancellationToken),
                operationTimeout,
                cancellationToken).ConfigureAwait(false);

            Replica replicaResult         = replicaSelector.GetSelectedReplica(replicasResult.ToArray(), new Random(), true /*skip invalid replicas*/);
            var     replicaSelectorResult = new SelectedReplica(replicaResult.Id, selectedPartition);

            return(new Tuple <SelectedReplica, Replica>(replicaSelectorResult, replicaResult));
        }
예제 #4
0
        public async Task <ServiceReplicaList> GetReplicasAsync(Guid partitionId, CancellationToken ct)
        {
            ServiceReplicaList serviceReplicaList = new ServiceReplicaList();
            string             continuationToken  = null;

            do
            {
                ServiceReplicaList queryResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () =>
                    this.TestContext.FabricClient.QueryManager.GetReplicaListAsync(
                        partitionId,
                        0,
                        ServiceReplicaStatusFilter.Default,
                        continuationToken,
                        this.requestTimeout,
                        ct),
                    this.operationTimeout,
                    ct).ConfigureAwait(false);

                serviceReplicaList.AddRangeNullSafe(queryResult);
                continuationToken = queryResult.ContinuationToken;
            } while (!string.IsNullOrEmpty(continuationToken));

            return(serviceReplicaList);
        }
예제 #5
0
        internal static async Task <Node> GetNodeWithFASSecondary()
        {
            NodeList           nodeList = ActionTest.GetNodeListAsync().Result;
            ServiceReplicaList list     = null;
            FabricClient       fc       = new FabricClient();

            System.Fabric.Common.TimeoutHelper timeoutHelper = new System.Fabric.Common.TimeoutHelper(TimeSpan.FromMinutes(2));

            do
            {
                try
                {
                    list = await fc.QueryManager.GetReplicaListAsync(new Guid("00000000-0000-0000-0000-000000005000"));
                }
                catch (Exception)
                {
                    Task.Delay(TimeSpan.FromSeconds(1)).Wait();
                }
            }while (list == null && timeoutHelper.GetRemainingTime() > TimeSpan.Zero);

            if (list == null)
            {
                throw new InvalidOperationException("Could not resolve FAS primary");
            }

            Replica replica = list.Where(r => ((StatefulServiceReplica)r).ReplicaRole == ReplicaRole.ActiveSecondary).FirstOrDefault();

            return(nodeList.Where(n => n.NodeName == replica.NodeName).FirstOrDefault());
        }
예제 #6
0
        public async Task <string> GetReplicaHealthAsync(Guid partitionId, long replicaId)
        {
            ServiceReplicaList replicaList = await this.fabricClient.QueryManager.GetReplicaListAsync(partitionId, replicaId);

            Replica replica = replicaList.FirstOrDefault();

            return(replica?.HealthState.ToString());
        }
예제 #7
0
        public async Task <Dictionary <Partition, Replica[]> > QueryLocationsAsync(CancellationToken ct)
        {
            ServicePartitionList servicePartitionsResult = await this.GetPartitionsAsync(ct).ConfigureAwait(false);

            var allServiceReplicas =
                new Dictionary <Partition, Replica[]>();

            foreach (var partition in servicePartitionsResult)
            {
                List <Replica>     serviceReplicas      = new List <Replica>();
                ServiceReplicaList serviceReplicaResult = await this.GetReplicasAsync(partition.PartitionInformation.Id, ct).ConfigureAwait(false);

                serviceReplicas.AddRange(serviceReplicaResult);
                allServiceReplicas.Add(partition, serviceReplicas.ToArray());
            }

            return(allServiceReplicas);
        }
예제 #8
0
        public async Task <IEnumerable <ReplicaWrapper> > GetReplicaListAsync(Guid partitionId, TimeSpan timeout, CancellationToken cancellationToken)
        {
            var replicaList = new List <ReplicaWrapper>();
            ServiceReplicaList previousResult = null;

            // Set up the counter that record the time lapse.
            var stopWatch = ValueStopwatch.StartNew();

            do
            {
                cancellationToken.ThrowIfCancellationRequested();
                var remaining = timeout - stopWatch.Elapsed;
                if (remaining.Ticks < 0)
                {
                    // If the passing time is longer than the timeout duration.
                    throw new TimeoutException($"Unable to enumerate all replicas pages in the allotted time budget of {timeout.TotalSeconds} seconds");
                }

                previousResult = await ExceptionsHelper.TranslateCancellations(
                    () => _queryClient.GetReplicaListAsync(
                        partitionId: partitionId,
                        continuationToken: previousResult?.ContinuationToken,
                        timeout: remaining,
                        cancellationToken: cancellationToken),
                    cancellationToken);

                foreach (var replica in previousResult)
                {
                    replicaList.Add(
                        new ReplicaWrapper
                    {
                        Id             = replica.Id,
                        ReplicaAddress = replica.ReplicaAddress,
                        ReplicaStatus  = replica.ReplicaStatus,
                        HealthState    = replica.HealthState,
                        ServiceKind    = replica.ServiceKind,
                        Role           = replica.ServiceKind == ServiceKind.Stateful ? ((StatefulServiceReplica)replica).ReplicaRole : (ReplicaRole?)null,
                    });
                }
            }while (!string.IsNullOrEmpty(previousResult?.ContinuationToken));

            return(replicaList);
        }
예제 #9
0
            private async Task WaitForAllTargetReplicasToGetDroppedAsync(Guid partitionId, List <StatefulServiceReplica> targets, CancellationToken cancellationToken)
            {
                bool          allTargetReplicasRemoved = false;
                TimeoutHelper timeoutHelper            = new TimeoutHelper(TimeSpan.FromSeconds(this.replicaDropWaitDurationInSeconds));

                do
                {
                    ServiceReplicaList queriedReplicasAfterFault = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => this.FabricClient.QueryManager.GetReplicaListAsync(
                            partitionId,
                            0,
                            this.RequestTimeout,
                            cancellationToken),
                        this.OperationTimeout,
                        cancellationToken).ConfigureAwait(false);

                    allTargetReplicasRemoved = this.AllTargetReplicasDropped(targets, queriedReplicasAfterFault);
                    if (!allTargetReplicasRemoved)
                    {
                        await Task.Delay(TimeSpan.FromMilliseconds(500), cancellationToken).ConfigureAwait(false);
                    }
                }while (!allTargetReplicasRemoved && timeoutHelper.GetRemainingTime() > TimeSpan.Zero);

                if (!allTargetReplicasRemoved)
                {
                    string error = string.Format(
                        CultureInfo.InvariantCulture,
                        "{0} - Service could not drop all replicas for '{1}' partition '{2}' in '{3}'.  See traces above for which replicas did not get dropped.  Retrying",
                        this.State.OperationId,
                        this.partitionSelector.ServiceName,
                        partitionId,
                        this.dataLossCheckWaitDurationInSeconds);

                    TestabilityTrace.TraceSource.WriteWarning(StepBase.TraceType, error);

                    // The will cause a retry of the whole operation
                    throw new FabricTransientException("The operation could not be performed, retrying.  Issue: " + error, FabricErrorCode.NotReady);
                }
            }
예제 #10
0
        public void PagedServiceReplicaListSerializationTest()
        {
            ServiceReplicaList replicaList = new ServiceReplicaList();

            replicaList.ContinuationToken = "432543";
            replicaList.Add(new StatefulServiceReplica(
                                this.random.CreateRandom <ServiceReplicaStatus>(),
                                this.random.CreateRandom <HealthState>(),
                                ReplicaRole.IdleSecondary,
                                this.random.CreateRandom <Uri>().ToString(),
                                this.random.CreateRandom <string>(),
                                this.random.CreateRandom <long>(),
                                TimeSpan.FromSeconds(2)));
            replicaList.Add(new StatelessServiceInstance(
                                this.random.CreateRandom <ServiceReplicaStatus>(),
                                this.random.CreateRandom <HealthState>(),
                                this.random.CreateRandom <Uri>().ToString(),
                                this.random.CreateRandom <string>(),
                                this.random.CreateRandom <long>(),
                                TimeSpan.FromSeconds(4)));
            TestUsingSerializer(this.Serializer, replicaList);
        }
        /// <summary>
        /// Get the complete list of replicas for a partition.
        /// </summary>
        /// <param name="partition">Guid containing the partition identifier.</param>
        /// <returns>List of Replica instances.</returns>
        private async Task <IList <Replica> > GetCompleteReplicaListAsync(Guid partition)
        {
            string         ct         = null;
            int            retryCount = 5;
            List <Replica> items      = new List <Replica>();

            do
            {
                try
                {
                    // Get the list of replicas and add them to the item list.
                    ServiceReplicaList rList =
                        await this.Client.QueryManager.GetReplicaListAsync(partition, ct, this._timeout, this._token).ConfigureAwait(false);

                    if (null != rList)
                    {
                        ct = rList.ContinuationToken;
                        items.AddRange(rList);
                    }
                    else
                    {
                        retryCount--;
                    }
                }
                catch (TimeoutException)
                {
                    retryCount--;
                }
                catch (FabricTransientException)
                {
                    retryCount--;
                }
            } while (null != ct && retryCount > 0);

            return(items);
        }
        private static List <EndpointInstance> ExtractEndpoints(ServiceReplicaList replicas, Partition partition,
                                                                Uri service)
        {
            List <EndpointInstance> endpointInstances = new List <EndpointInstance>();

            foreach (var replica in replicas)
            {
                if (replica.ReplicaAddress.Length == 0)
                {
                    continue;
                }

                JObject addresses;
                try
                {
                    addresses = JObject.Parse(replica.ReplicaAddress);
                }
                catch
                {
                    continue;
                }

                var endpoints = addresses["Endpoints"].Value <JObject>();
                foreach (var endpoint in endpoints)
                {
                    var endpointName    = endpoint.Key;
                    var endpointAddress = endpoint.Value.ToString();

                    if (!endpointAddress.StartsWith("http", StringComparison.InvariantCultureIgnoreCase))
                    {
                        continue;
                    }

                    EndpointType endpointType = null;

                    if (partition.ServiceKind == ServiceKind.Stateful)
                    {
                        var statefulRole = ((StatefulServiceReplica)replica).ReplicaRole;
                        ServiceEndpointRole role;
                        switch (statefulRole)
                        {
                        case ReplicaRole.Primary:
                            role = ServiceEndpointRole.StatefulPrimary;
                            break;

                        case ReplicaRole.ActiveSecondary:
                            role = ServiceEndpointRole.StatefulSecondary;
                            break;

                        default:
                            role = ServiceEndpointRole.Invalid;
                            break;
                        }

                        switch (partition.PartitionInformation.Kind)
                        {
                        case ServicePartitionKind.Singleton:
                            endpointType = EndpointType.CreateSingleton(service.AbsolutePath,
                                                                        partition.PartitionInformation.Id, endpointName, endpoints.Count == 1, role);
                            break;

                        case ServicePartitionKind.Int64Range:
                            var rangePartitionInformation =
                                (Int64RangePartitionInformation)partition.PartitionInformation;
                            endpointType = EndpointType.CreateInt64Partitioned(service.AbsolutePath,
                                                                               partition.PartitionInformation.Id, endpointName, endpoints.Count == 1, role,
                                                                               rangePartitionInformation.LowKey, rangePartitionInformation.HighKey);
                            break;

                        case ServicePartitionKind.Named:
                            var namedPartitionInformation =
                                (NamedPartitionInformation)partition.PartitionInformation;
                            endpointType = EndpointType.CreateNamedPartitioned(service.AbsolutePath,
                                                                               partition.PartitionInformation.Id, endpointName, endpoints.Count == 1, role,
                                                                               namedPartitionInformation.Name);
                            break;

                        case ServicePartitionKind.Invalid:
                            break;

                        default:
                            throw new ArgumentOutOfRangeException();
                        }
                    }
                    else
                    {
                        endpointType = EndpointType.CreateStateless(service.AbsolutePath,
                                                                    partition.PartitionInformation.Id, endpointName, endpoints.Count == 1);
                    }

                    if (endpointType != null)
                    {
                        var endpointInstanceObject =
                            new EndpointInstance(endpointType, new Uri(endpointAddress), replica.NodeName);
                        endpointInstances.Add(endpointInstanceObject);
                    }
                }
            }

            return(endpointInstances);
        }
예제 #13
0
            protected override async Task ExecuteActionAsync(FabricTestContext testContext, InvokeQuorumLossAction action, CancellationToken cancellationToken)
            {
                ThrowIf.Null(action.PartitionSelector, "PartitionSelector");

                var helper = new TimeoutHelper(action.ActionTimeout);

                // get info about the service so we can check type and trss
                ServiceDescription result = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.ServiceManager.GetServiceDescriptionAsync(
                        action.PartitionSelector.ServiceName,
                        action.RequestTimeout,
                        cancellationToken),
                    helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                if (result.Kind != ServiceDescriptionKind.Stateful)
                {
                    throw new InvalidOperationException(StringHelper.Format(StringResources.Error_InvalidServiceTypeTestability, "QuorumLoss", "Stateful", action.PartitionSelector.ServiceName, "Stateless"));
                }

                StatefulServiceDescription statefulServiceDescription = result as StatefulServiceDescription;

                ReleaseAssert.AssertIf(statefulServiceDescription == null, "Service is not a stateful service");

                if (!statefulServiceDescription.HasPersistedState)
                {
                    throw new InvalidOperationException(StringHelper.Format(StringResources.Error_InvalidServiceTypeTestability, "QuorumLoss", "Stateful Persistent", action.PartitionSelector.ServiceName, "Stateful In-Memory Only"));
                }

                // figure out /which/ partition to select
                var getPartitionStateAction = new GetSelectedPartitionStateAction(action.PartitionSelector)
                {
                    RequestTimeout = action.RequestTimeout,
                    ActionTimeout  = helper.GetRemainingTime()
                };

                await testContext.ActionExecutor.RunAsync(getPartitionStateAction, cancellationToken);

                Guid partitionId = getPartitionStateAction.Result.PartitionId;

                // get data about replicas in that partition
                ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.QueryManager.GetReplicaListAsync(
                        partitionId,
                        0,
                        action.RequestTimeout,
                        cancellationToken),
                    helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                var removeUTRequestList = new List <Tuple <string, string> >();
                Dictionary <Tuple <string, string>, Task> removeUTTaskDictionary = new Dictionary <Tuple <string, string>, Task>();

                try
                {
                    var  stableReplicas                  = replicasResult.Where(r => r.ReplicaStatus == ServiceReplicaStatus.Ready).ToArray();
                    var  stableReplicasToRemove          = new List <StatefulServiceReplica>();
                    long replicasToRestartWithoutPrimary =
                        action.QuorumLossMode == QuorumLossMode.AllReplicas
                            ? stableReplicas.Length - 1
                            : FabricCluster.GetWriteQuorumSize(replicasResult.Count);
                    foreach (var replica in stableReplicas)
                    {
                        StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica;
                        ReleaseAssert.AssertIf(statefulReplica == null, "Service Replica is not of stateful type even though service is stateful");
                        if (statefulReplica.ReplicaRole != ReplicaRole.Primary)
                        {
                            replicasToRestartWithoutPrimary--;
                        }

                        if (replicasToRestartWithoutPrimary >= 0 || statefulReplica.ReplicaRole == ReplicaRole.Primary)
                        {
                            stableReplicasToRemove.Add(statefulReplica);
                        }
                    }

                    // for selected replicas, block reopen so that when we restart the replica (NOT remove the replica) it doesn't come up
                    var utTaskList = new List <Task>();
                    foreach (var statefulReplica in stableReplicasToRemove)
                    {
                        string nodeName = statefulReplica.NodeName;
                        UnreliableTransportBehavior behavior = new UnreliableTransportBehavior("*", "StatefulServiceReopen");
                        behavior.AddFilterForPartitionId(partitionId);
                        string behaviorName = "BlockStatefulServiceReopen_" + nodeName;

                        removeUTRequestList.Add(new Tuple <string, string>(nodeName, behaviorName));
                        utTaskList.Add(
                            FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                                () =>
                                testContext.FabricClient.TestManager.AddUnreliableTransportBehaviorAsync(
                                    nodeName,
                                    behaviorName,
                                    behavior,
                                    action.RequestTimeout,
                                    cancellationToken),
                                helper.GetRemainingTime(),
                                cancellationToken));
                    }

                    await Task.WhenAll(utTaskList).ConfigureAwait(false);

                    // TODO: Wait for some time so that the unreliable transport behavior can be read from the files.
                    // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successfully applied
                    await Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken);

                    var restartReplicaTaskList = new List <Task>();
                    foreach (var statefulReplica in stableReplicasToRemove)
                    {
                        ReplicaSelector replicaSelector = ReplicaSelector.ReplicaIdOf(PartitionSelector.PartitionIdOf(action.PartitionSelector.ServiceName, partitionId), statefulReplica.Id);

                        var restartReplicaAction = new RestartReplicaAction(replicaSelector)
                        {
                            CompletionMode = CompletionMode.DoNotVerify,
                            RequestTimeout = action.RequestTimeout,
                            ActionTimeout  = helper.GetRemainingTime()
                        };

                        restartReplicaTaskList.Add(testContext.ActionExecutor.RunAsync(restartReplicaAction, cancellationToken));
                    }

                    await Task.WhenAll(restartReplicaTaskList).ConfigureAwait(false);

                    await AsyncWaiter.WaitAsync(action.QuorumLossDuration, cancellationToken).ConfigureAwait(false);

                    // validate
                    ServicePartitionList partitionsResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => testContext.FabricClient.QueryManager.GetPartitionListAsync(
                            action.PartitionSelector.ServiceName,
                            null,
                            action.RequestTimeout,
                            cancellationToken),
                        FabricClientRetryErrors.GetPartitionListFabricErrors.Value,
                        helper.GetRemainingTime(),
                        cancellationToken).ConfigureAwait(false);

                    foreach (StatefulServicePartition partition in partitionsResult)
                    {
                        if (partition.PartitionInformation.Id == partitionId)
                        {
                            ReleaseAssert.AssertIf(partition.PartitionStatus != ServicePartitionStatus.InQuorumLoss, "Partition failed to be in Quorum Loss.");
                            break;
                        }
                    }

                    foreach (var removeUTParams in removeUTRequestList)
                    {
                        var  currentParams = removeUTParams;
                        Task task          = FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                            () => testContext.FabricClient.TestManager.RemoveUnreliableTransportBehaviorAsync(
                                currentParams.Item1,  /*nodeName*/
                                currentParams.Item2,  /*behaviorName*/
                                action.RequestTimeout,
                                cancellationToken),
                            FabricClientRetryErrors.RemoveUnreliableTransportBehaviorErrors.Value,
                            helper.GetRemainingTime(),
                            cancellationToken);

                        removeUTTaskDictionary[currentParams] = task;
                    }

                    await Task.WhenAll(removeUTTaskDictionary.Values).ConfigureAwait(false);

                    // TODO: Wait for some time so that the removal of this unreliable transport behavior can be read from the files.
                    // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successully applied
                    await Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken);
                }
                finally
                {
                    var removeUTTaskList = new List <Task>();

                    foreach (var removeUTRequest in removeUTTaskDictionary)
                    {
                        var currentRemoveUTRequest = removeUTRequest;
                        if (currentRemoveUTRequest.Value == null || currentRemoveUTRequest.Value.IsFaulted)
                        {
                            removeUTTaskList.Add(
                                FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                                    () => testContext.FabricClient.TestManager.RemoveUnreliableTransportBehaviorAsync(
                                        currentRemoveUTRequest.Key.Item1, /*nodeName*/
                                        currentRemoveUTRequest.Key.Item2, /*behaviorName*/
                                        action.RequestTimeout,
                                        cancellationToken),
                                    FabricClientRetryErrors.RemoveUnreliableTransportBehaviorErrors.Value,
                                    helper.GetRemainingTime(),
                                    cancellationToken));
                        }
                    }

                    Task.WhenAll(removeUTTaskList).Wait(cancellationToken);

                    // TODO: Wait for some time so that the removal of this unreliable transport behavior can be read from the files.
                    // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successully applied
                    Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken).GetAwaiter().GetResult();
                }

                action.Result          = new InvokeQuorumLossResult(getPartitionStateAction.Result);
                this.ResultTraceString = StringHelper.Format("InvokeQuorumLossAction succeeded for {0} with QuorumLossMode = {1}", partitionId, action.QuorumLossMode);
            }
예제 #14
0
            protected override async Task ExecuteActionAsync(FabricTestContext testContext, RestartPartitionAction action, CancellationToken cancellationToken)
            {
                ThrowIf.Null(action.PartitionSelector, "partitionSelector");

                this.helper = new TimeoutHelper(action.ActionTimeout);

                // get service info so we can validate if the operation is valid
                ServiceDescription result = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.ServiceManager.GetServiceDescriptionAsync(
                        action.PartitionSelector.ServiceName,
                        action.RequestTimeout,
                        cancellationToken),
                    this.helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                if (result.Kind != ServiceDescriptionKind.Stateful && action.RestartPartitionMode == RestartPartitionMode.OnlyActiveSecondaries)
                {
                    throw new InvalidOperationException(StringHelper.Format(StringResources.Error_InvalidServiceTypeTestability, "RestartPartitionMode.OnlyActiveSecondaries", "Stateful", action.PartitionSelector.ServiceName, "Stateless"));
                }

                bool hasPersistedState = false;

                if (result.Kind == ServiceDescriptionKind.Stateful)
                {
                    StatefulServiceDescription statefulDescription = result as StatefulServiceDescription;
                    ReleaseAssert.AssertIf(statefulDescription == null, "Stateful service description is not WinFabricStatefulServiceDescription");
                    hasPersistedState = statefulDescription.HasPersistedState;
                }

                // now actually select a partition
                var getPartitionStateAction = new GetSelectedPartitionStateAction(action.PartitionSelector)
                {
                    RequestTimeout = action.RequestTimeout,
                    ActionTimeout  = helper.GetRemainingTime()
                };

                await testContext.ActionExecutor.RunAsync(getPartitionStateAction, cancellationToken);

                Guid partitionId = getPartitionStateAction.Result.PartitionId;

                // get replicas for target
                ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.QueryManager.GetReplicaListAsync(
                        partitionId,
                        0,
                        action.RequestTimeout,
                        cancellationToken),
                    this.helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                // get replicas for fm in order to get the primary
                ServiceReplicaList fmReplicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.QueryManager.GetReplicaListAsync(
                        Constants.FmPartitionId,
                        0,
                        action.RequestTimeout,
                        cancellationToken),
                    this.helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                string fmPrimaryNodeName = string.Empty;
                var    readyFMReplicas   = fmReplicasResult.Where(r => r.ReplicaStatus == ServiceReplicaStatus.Ready).ToArray();

                foreach (var replica in readyFMReplicas)
                {
                    StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica;
                    ReleaseAssert.AssertIf(statefulReplica == null, "FM Replica is not a stateful replica");
                    if (statefulReplica.ReplicaRole == ReplicaRole.Primary)
                    {
                        fmPrimaryNodeName = replica.NodeName;
                    }
                }

                if (string.IsNullOrEmpty(fmPrimaryNodeName))
                {
                    throw new FabricException(StringHelper.Format(StringResources.Error_PartitionPrimaryNotReady, "FailoverManager"), FabricErrorCode.NotReady);
                }

                ////------------------------------------------------------
                // target ut at the fm primary only
                UnreliableTransportBehavior behavior = new UnreliableTransportBehavior("*", "DoReconfiguration");

                behavior.AddFilterForPartitionId(partitionId);
                string behaviorName = "BlockDoReconfiguration";

                await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.TestManager.AddUnreliableTransportBehaviorAsync(
                        fmPrimaryNodeName,
                        behaviorName,
                        behavior,
                        action.RequestTimeout,
                        cancellationToken),
                    this.helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                // TODO: Wait for some time so that the unreliable transport behavior can be read from the files.
                // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successfully applied
                await Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken).ConfigureAwait(false);

                bool triedToRemovedBehavior = false;

                // inspect the actual replicas to restart, only operate on stable ones
                try
                {
                    var stableReplicasToRestart = replicasResult.Where(r => r.ReplicaStatus == ServiceReplicaStatus.Ready).ToArray();

                    foreach (var replica in stableReplicasToRestart)
                    {
                        var currentReplica = replica;
                        if (action.RestartPartitionMode == RestartPartitionMode.OnlyActiveSecondaries)
                        {
                            StatefulServiceReplica statefulReplica = currentReplica as StatefulServiceReplica;
                            ReleaseAssert.AssertIf(statefulReplica == null, "Stateful service replica is not StatefulServiceReplica");
                            if (statefulReplica.ReplicaRole == ReplicaRole.Primary)
                            {
                                continue;
                            }
                        }

                        if (hasPersistedState)
                        {
                            await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                                () => testContext.FabricClient.FaultManager.RestartReplicaAsync(
                                    currentReplica.NodeName,
                                    partitionId,
                                    currentReplica.Id,
                                    CompletionMode.DoNotVerify,
                                    action.RequestTimeout.TotalSeconds,
                                    cancellationToken),
                                this.helper.GetRemainingTime(),
                                cancellationToken).ConfigureAwait(false);
                        }
                        else
                        {
                            await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                                () => testContext.FabricClient.FaultManager.RemoveReplicaAsync(
                                    currentReplica.NodeName,
                                    partitionId,
                                    currentReplica.Id,
                                    CompletionMode.DoNotVerify,
                                    false, /*force remove*/
                                    action.RequestTimeout.TotalSeconds,
                                    cancellationToken),
                                this.helper.GetRemainingTime(),
                                cancellationToken).ConfigureAwait(false);
                        }
                    }

                    triedToRemovedBehavior = true;
                    await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => testContext.FabricClient.TestManager.RemoveUnreliableTransportBehaviorAsync(
                            fmPrimaryNodeName,
                            behaviorName,
                            action.RequestTimeout,
                            cancellationToken),
                        FabricClientRetryErrors.RemoveUnreliableTransportBehaviorErrors.Value,
                        this.helper.GetRemainingTime(),
                        cancellationToken).ConfigureAwait(false);

                    // TODO: Wait for some time so that the unreliable transport behavior can be read from the files.
                    // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successfully applied
                    await Task.Delay(TimeSpan.FromSeconds(5.0)).ConfigureAwait(false);
                }
                finally
                {
                    // TODO: Provide a way to clear all behaviors just in case.
                    if (!triedToRemovedBehavior)
                    {
                        ActionTraceSource.WriteWarning(TraceType, "Exception after adding behavior to block messages. Removing behavior synchronously");
                        FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                            () => testContext.FabricClient.TestManager.RemoveUnreliableTransportBehaviorAsync(
                                fmPrimaryNodeName,
                                behaviorName,
                                action.RequestTimeout,
                                cancellationToken),
                            FabricClientRetryErrors.RemoveUnreliableTransportBehaviorErrors.Value,
                            this.helper.GetRemainingTime(),
                            cancellationToken).GetAwaiter().GetResult();

                        // TODO: Wait for some time so that the unreliable transport behavior can be read from the files.
                        // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successfully applied
                        Task.Delay(TimeSpan.FromSeconds(5.0)).GetAwaiter().GetResult();
                    }
                }

                // -- note there's no explict validation

                // action result
                action.Result     = new RestartPartitionResult(getPartitionStateAction.Result);
                ResultTraceString = StringHelper.Format("RestartPartitionAction succeeded for {0} with RestartPartitionMode = {1}", partitionId, action.RestartPartitionMode);
            }
        public static async Task Update()
        {
            try
            {
                var routes   = new ConcurrentBag <ProxyRoute>();
                var clusters = new ConcurrentBag <Cluster>();

                ApplicationList apps = null;
                do
                {
                    apps = await _fabricClient.QueryManager.GetApplicationPagedListAsync(new System.Fabric.Description.ApplicationQueryDescription()
                    {
                        MaxResults = Int32.MaxValue
                    });

                    await apps.AsyncParallelForEach(async app =>
                    {
                        ServiceList services = null;

                        do
                        {
                            services = await _fabricClient.QueryManager.GetServicePagedListAsync(new System.Fabric.Description.ServiceQueryDescription(app.ApplicationName)
                            {
                                MaxResults = Int32.MaxValue
                            });

                            await services.AsyncParallelForEach(async service =>
                            {
                                var cluster     = new Cluster();
                                var serviceName = service.ServiceName.ToString().Replace("fabric:/", "");
                                cluster.Id      = serviceName;
                                clusters.Add(cluster);
                                var destinations = new ConcurrentDictionary <string, Destination>();

                                { // Add Catch All
                                    var route        = new ProxyRoute();
                                    route.RouteId    = serviceName + ":catch-all";
                                    route.ClusterId  = serviceName;
                                    route.Match.Path = serviceName + "/{**catch-all}";
                                    route.Transforms = new List <IDictionary <string, string> >();
                                    route.AddTransformPathRemovePrefix(new AspNetCore.Http.PathString("/" + serviceName));
                                    route.AddTransformRequestHeader("X-Forwarded-PathBase", "/" + serviceName);

                                    routes.Add(route);
                                }
                                { // Add root match
                                    var route        = new ProxyRoute();
                                    route.RouteId    = serviceName + ":root-match";
                                    route.ClusterId  = serviceName;
                                    route.Match.Path = serviceName;
                                    route.Transforms = new List <IDictionary <string, string> >();
                                    route.AddTransformPathRemovePrefix(new AspNetCore.Http.PathString("/" + serviceName));
                                    route.AddTransformRequestHeader("X-Forwarded-PathBase", "/" + serviceName);
                                    routes.Add(route);
                                }

                                ServicePartitionList partitions = null;

                                do
                                {
                                    partitions = partitions == null ?
                                                 await _fabricClient.QueryManager.GetPartitionListAsync(service.ServiceName) :
                                                 await _fabricClient.QueryManager.GetPartitionListAsync(app.ApplicationName, services.ContinuationToken);

                                    await partitions.AsyncParallelForEach(async partition =>
                                    {
                                        var partitionId             = partition.PartitionInformation.Id;
                                        ServiceReplicaList replicas = null;

                                        do
                                        {
                                            replicas = replicas == null ?
                                                       await _fabricClient.QueryManager.GetReplicaListAsync(partitionId) :
                                                       await _fabricClient.QueryManager.GetReplicaListAsync(partitionId, services.ContinuationToken);

                                            await replicas.AsyncParallelForEach(async replica =>
                                            {
                                                var endpointSet = JsonSerializer.Deserialize <ReplicaAddress>(replica.ReplicaAddress);
                                                foreach (var endpoint in endpointSet.Endpoints)
                                                {
                                                    var destination     = new Destination();
                                                    destination.Address = endpoint.Value;
                                                    destinations.TryAdd($"{partitionId}:{replica.Id}", destination);
                                                }
                                            });
                                        }while (!string.IsNullOrEmpty(replicas.ContinuationToken));
                                    });
                                }while (!string.IsNullOrEmpty(partitions.ContinuationToken));
                                foreach (var dest in destinations)
                                {
                                    cluster.Destinations.Add(dest);
                                }
                            });
                        }while (!string.IsNullOrEmpty(services.ContinuationToken));
                    });
                }while (!string.IsNullOrEmpty(apps.ContinuationToken));


                var config    = new ServiceFabricConfig(clusters.ToList(), routes.ToList());
                var oldConfig = _config;
                _config = config;
                oldConfig.SignalChange();
            }catch (Exception ex)
            {
            }
        }
예제 #16
0
            protected override async Task ExecuteActionAsync(FabricTestContext testContext, MoveSecondaryAction action, CancellationToken cancellationToken)
            {
                ThrowIf.Null(action.PartitionSelector, "PartitionSelector");

                this.helper = new TimeoutHelper(action.ActionTimeout);

                string newSecondaryNode     = action.NewSecondaryNodeName;
                string currentSecondaryNode = action.CurrentSecondaryNodeName;

                var getPartitionStateAction = new GetSelectedPartitionStateAction(action.PartitionSelector)
                {
                    RequestTimeout = action.RequestTimeout,
                    ActionTimeout  = this.helper.GetRemainingTime()
                };

                await testContext.ActionExecutor.RunAsync(getPartitionStateAction, cancellationToken).ConfigureAwait(false);

                Guid partitionId = getPartitionStateAction.Result.PartitionId;

                if (!action.IgnoreConstraints)
                {
                    // get current primary replica node name.
                    ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => testContext.FabricClient.QueryManager.GetReplicaListAsync(
                            partitionId,
                            0,
                            action.RequestTimeout,
                            cancellationToken),
                        this.helper.GetRemainingTime(),
                        cancellationToken).ConfigureAwait(false);

                    string        currentPrimaryNodeInfo = string.Empty;
                    List <string> currentSecReplicaNodes = new List <string>();
                    foreach (var replica in replicasResult)
                    {
                        StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica;
                        if (statefulReplica == null)
                        {
                            throw new InvalidOperationException(StringHelper.Format(StringResources.Error_InvalidServiceTypeTestability, "MoveSecondary", "Stateful", action.PartitionSelector.ServiceName, "Stateless"));
                        }

                        if (statefulReplica.ReplicaRole == ReplicaRole.Primary)
                        {
                            currentPrimaryNodeInfo = statefulReplica.NodeName;
                            if (!string.IsNullOrEmpty(newSecondaryNode) && newSecondaryNode == statefulReplica.NodeName)
                            {
                                throw new FabricException(
                                          StringHelper.Format(StringResources.Error_InvalidNodeNameProvided, newSecondaryNode, "MoveSecondary", "Primary exists on node"),
                                          FabricErrorCode.AlreadyPrimaryReplica);
                            }
                        }
                        else if (statefulReplica.ReplicaRole == ReplicaRole.ActiveSecondary)
                        {
                            currentSecReplicaNodes.Add(statefulReplica.NodeName);
                            if (!string.IsNullOrEmpty(newSecondaryNode) && newSecondaryNode == statefulReplica.NodeName)
                            {
                                throw new FabricException(
                                          StringHelper.Format(StringResources.Error_InvalidNodeNameProvided, newSecondaryNode, "MoveSecondary", "Secondary exists on node"),
                                          FabricErrorCode.AlreadySecondaryReplica);
                            }
                        }
                    }

                    if (currentSecReplicaNodes.Count == 0)
                    {
                        throw new InvalidOperationException(StringResources.Error_NoSecondariesInReplicaSet);
                    }

                    if (string.IsNullOrEmpty(currentSecondaryNode))
                    {
                        int num = testContext.Random.Next(currentSecReplicaNodes.Count);
                        currentSecondaryNode = currentSecReplicaNodes.ElementAt(num);
                    }

                    if (!currentSecReplicaNodes.Contains(currentSecondaryNode))
                    {
                        throw new FabricException(
                                  StringHelper.Format(StringResources.Error_InvalidNodeNameProvided, newSecondaryNode, "MoveSecondary", "Current node does not have a secondary replica"),
                                  FabricErrorCode.InvalidReplicaStateForReplicaOperation);
                    }
                }

                ReleaseAssert.AssertIf(string.IsNullOrEmpty(currentSecondaryNode), "Current node name cannot be null or empty.");
                ReleaseAssert.AssertIf(newSecondaryNode == currentSecondaryNode, "Current and New node names are same.");

                ActionTraceSource.WriteInfo(TraceSource, "Calling move secondary with current node {0}, new node {1}, partition {2}", currentSecondaryNode, string.IsNullOrEmpty(newSecondaryNode) ? "Random" : newSecondaryNode, partitionId);
                await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.FaultManager.MoveSecondaryUsingNodeNameAsync(
                        currentSecondaryNode,
                        newSecondaryNode,
                        getPartitionStateAction.Result.ServiceName,
                        partitionId,
                        action.IgnoreConstraints,
                        action.RequestTimeout,
                        cancellationToken),
                    FabricClientRetryErrors.MoveSecondaryFabricErrors.Value,
                    this.helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                action.Result          = new MoveSecondaryResult(currentSecondaryNode, newSecondaryNode, getPartitionStateAction.Result);
                this.ResultTraceString = StringHelper.Format(
                    "MoveSecondaryAction succeeded for moving Primary for {0} from {1} to {2}.",
                    partitionId,
                    currentSecondaryNode,
                    newSecondaryNode);
            }
예제 #17
0
            public override async Task <ActionStateBase> RunAsync(CancellationToken cancellationToken, ServiceInternalFaultInfo serviceInternalFaultInfo)
            {
                InvokeDataLossState state = Convert(this.State);

                ServiceDescription result = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.ServiceManager.GetServiceDescriptionAsync(
                        this.partitionSelector.ServiceName,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                if (result.Kind != ServiceDescriptionKind.Stateful)
                {
                    // The message in the first arg is only for debugging, it is not returned to the user.
                    throw new FabricInvalidForStatelessServicesException("FabricInvalidForStatelessServicesException", FabricErrorCode.InvalidForStatelessServices);
                }

                int targetReplicaSetSize = (result as StatefulServiceDescription).TargetReplicaSetSize;

                SelectedPartition targetPartition = await FaultAnalysisServiceUtility.GetSelectedPartitionStateAsync(
                    this.FabricClient,
                    this.partitionSelector,
                    this.RequestTimeout,
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                Guid partitionId = targetPartition.PartitionId;

                long preDataLossNumber = 0;

                ServicePartitionList partitionsResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.QueryManager.GetPartitionListAsync(
                        this.partitionSelector.ServiceName,
                        null,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                bool partitionFound = false;

                foreach (StatefulServicePartition partition in partitionsResult)
                {
                    if (partition.PartitionInformation.Id == partitionId)
                    {
                        preDataLossNumber = partition.PrimaryEpoch.DataLossNumber;
                        partitionFound    = true;
                        break;
                    }
                }

                if (!partitionFound)
                {
                    throw new FabricException(StringHelper.Format(StringResources.Error_PartitionNotFound), FabricErrorCode.PartitionNotFound);
                }

                ServiceReplicaList failoverManagerReplicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.QueryManager.GetReplicaListAsync(
                        FASConstants.FmPartitionId,
                        0,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                string failoverManagerPrimaryNodeName = string.Empty;
                var    readyFMReplicas = failoverManagerReplicasResult.Where(r => r.ReplicaStatus == ServiceReplicaStatus.Ready).ToArray();

                foreach (var replica in readyFMReplicas)
                {
                    StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica;
                    ReleaseAssert.AssertIf(statefulReplica == null, "FM Replica is not a stateful replica");
                    if (statefulReplica.ReplicaRole == ReplicaRole.Primary)
                    {
                        failoverManagerPrimaryNodeName = replica.NodeName;
                    }
                }

                if (string.IsNullOrEmpty(failoverManagerPrimaryNodeName))
                {
                    throw new FabricException(StringHelper.Format(StringResources.Error_PartitionPrimaryNotReady, "FailoverManager"), FabricErrorCode.NotReady);
                }

                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - FM primary location={1}", this.State.OperationId, failoverManagerPrimaryNodeName);
                string behaviorName = "BlockDoReconfiguration_" + this.State.OperationId;
                List <Tuple <string, string> > unreliableTransportInfo = new List <Tuple <string, string> >();

                unreliableTransportInfo.Add(new Tuple <string, string>(failoverManagerPrimaryNodeName, behaviorName));

                state.StateProgress.Push(StepStateNames.PerformingActions);
                state.Info.DataLossNumber          = preDataLossNumber;
                state.Info.NodeName                = failoverManagerPrimaryNodeName;
                state.Info.PartitionId             = partitionId;
                state.Info.UnreliableTransportInfo = unreliableTransportInfo;
                state.Info.TargetReplicaSetSize    = targetReplicaSetSize;
                return(state);
            }
예제 #18
0
            private bool AllTargetReplicasDropped(List <StatefulServiceReplica> targetReplicas, ServiceReplicaList queriedReplicasAfterFault)
            {
                // For each target replica, look for dropped or not present
                foreach (StatefulServiceReplica r in targetReplicas)
                {
                    foreach (StatefulServiceReplica s in queriedReplicasAfterFault)
                    {
                        // If it is either:
                        //   - not present in the query OR
                        //   - present in the query and it has a ReplicaStatus Dropped
                        // that is what we want.  Otherwise, break out and retry the query.
                        if (r.ReplicaId == s.ReplicaId)
                        {
                            if (s.ReplicaStatus != ServiceReplicaStatus.Dropped)
                            {
                                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - Replica with id={1} is not Dropped and has status={2}", this.State.OperationId, s.ReplicaId, s.ReplicaStatus);
                                return(false);
                            }
                        }
                    }
                }

                return(true);
            }
예제 #19
0
            public override async Task <ActionStateBase> RunAsync(CancellationToken cancellationToken, ServiceInternalFaultInfo serviceInternalFaultInfo)
            {
                InvokeDataLossState state = Convert(this.State);

                PartitionSelector partitionSelector   = state.Info.PartitionSelector;
                DataLossMode      dataLossMode        = state.Info.DataLossMode;
                long   preDataLossNumber              = state.Info.DataLossNumber;
                string failoverManagerPrimaryNodeName = state.Info.NodeName;
                Guid   partitionId          = state.Info.PartitionId;
                string behaviorName         = state.Info.UnreliableTransportInfo.First().Item2;
                int    targetReplicaSetSize = state.Info.TargetReplicaSetSize;

                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - applying UT, partitionId={1}", this.State.OperationId, partitionId);
                System.Fabric.Common.UnreliableTransportBehavior behavior = new System.Fabric.Common.UnreliableTransportBehavior("*", "DoReconfiguration");
                behavior.AddFilterForPartitionId(partitionId);

                await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.TestManager.AddUnreliableTransportBehaviorAsync(
                        failoverManagerPrimaryNodeName,
                        behaviorName,
                        behavior,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                // TODO: Wait for some time so that the unreliable transport behavior can be read from the files.
                // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successfully applied
                await Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken).ConfigureAwait(false);

                ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.QueryManager.GetReplicaListAsync(
                        partitionId,
                        0,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                List <StatefulServiceReplica> replicaList = new List <StatefulServiceReplica>();

                foreach (var replica in replicasResult)
                {
                    StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica;
                    ReleaseAssert.AssertIf(statefulReplica == null, "Service Replica is not of stateful type even though service is stateful");
                    replicaList.Add(statefulReplica);
                }

                // Select target replicas based on the DataLosMode
                List <StatefulServiceReplica> targets = null;

                if (dataLossMode == DataLossMode.FullDataLoss)
                {
                    targets = GetReplicasForFullDataLoss(replicaList);
                }
                else if (dataLossMode == DataLossMode.PartialDataLoss)
                {
                    targets = FaultAnalysisServiceUtility.GetReplicasForPartialLoss(state.OperationId, replicaList);
                }
                else
                {
                    throw FaultAnalysisServiceUtility.CreateException(StepBase.TraceType, Interop.NativeTypes.FABRIC_ERROR_CODE.E_INVALIDARG, Strings.StringResources.Error_UnsupportedDataLossMode);
                }

                if (targets == null)
                {
                    // This will cause the command to rollback and retry
                    throw new FabricTransientException("The operation could not be performed, please retry", FabricErrorCode.NotReady);
                }

                foreach (var replica in targets)
                {
                    TestabilityTrace.TraceSource.WriteInfo(
                        StepBase.TraceType,
                        "{0} - Removing replica {1} in partition {2} with role {3} and status {4} to induce data loss",
                        this.State.OperationId,
                        replica.Id,
                        partitionId,
                        replica.ReplicaRole,
                        replica.ReplicaStatus);

                    await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => this.FabricClient.ServiceManager.RemoveReplicaAsync(
                            replica.NodeName,
                            partitionId,
                            replica.Id,
                            this.RequestTimeout,
                            cancellationToken),
                        FabricClientRetryErrors.RemoveReplicaErrors.Value,
                        this.OperationTimeout,
                        cancellationToken).ConfigureAwait(false);
                }

                ActionTest.PerformInternalServiceFaultIfRequested(this.State.OperationId, serviceInternalFaultInfo, this.State, cancellationToken, true);

                await this.WaitForAllTargetReplicasToGetDroppedAsync(partitionId, targets, cancellationToken).ConfigureAwait(false);

                await RemoveUnreliableTransportAsync(this.FabricClient, failoverManagerPrimaryNodeName, behaviorName, this.RequestTimeout, this.OperationTimeout, cancellationToken).ConfigureAwait(false);

                bool          dataLossWasSuccessful = false;
                TimeoutHelper timeoutHelper         = new TimeoutHelper(TimeSpan.FromSeconds(30));

                do
                {
                    ServicePartitionList partitionsResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => this.FabricClient.QueryManager.GetPartitionListAsync(
                            this.partitionSelector.ServiceName,
                            null,
                            this.RequestTimeout,
                            cancellationToken),
                        this.OperationTimeout,
                        cancellationToken).ConfigureAwait(false);

                    bool partitionFound     = false;
                    long postDataLossNumber = 0;
                    foreach (StatefulServicePartition partition in partitionsResult)
                    {
                        if (partition.PartitionInformation.Id == partitionId)
                        {
                            postDataLossNumber = partition.PrimaryEpoch.DataLossNumber;
                            partitionFound     = true;
                            break;
                        }
                    }

                    if (!partitionFound)
                    {
                        throw new FabricException(StringHelper.Format(StringResources.Error_PartitionNotFound), FabricErrorCode.PartitionNotFound);
                    }

                    TestabilityTrace.TraceSource.WriteInfo(
                        StepBase.TraceType,
                        "{0} - Checking data loss numbers for partition {1} with remaining time {2}. Current numbers {3}:{4}",
                        this.State.OperationId,
                        partitionId,
                        timeoutHelper.GetRemainingTime(),
                        preDataLossNumber,
                        postDataLossNumber);

                    if (postDataLossNumber != preDataLossNumber)
                    {
                        dataLossWasSuccessful = true;
                        break;
                    }

                    await System.Fabric.Common.AsyncWaiter.WaitAsync(TimeSpan.FromSeconds(this.dataLossCheckPollIntervalInSeconds), cancellationToken).ConfigureAwait(false);
                }while (timeoutHelper.GetRemainingTime() > TimeSpan.Zero);

                if (!dataLossWasSuccessful)
                {
                    // This is only viewable internally for debug.  This will cause a retry of the whole flow.
                    string error = string.Format(
                        CultureInfo.InvariantCulture,
                        "{0} - Service could not induce data loss for service '{1}' partition '{2}' in '{3}' Please retry",
                        this.State.OperationId,
                        partitionSelector.ServiceName,
                        partitionId,
                        this.dataLossCheckWaitDurationInSeconds);
                    TestabilityTrace.TraceSource.WriteWarning(StepBase.TraceType, error);
                    throw new FabricTransientException("The operation could not be performed, please retry", FabricErrorCode.NotReady);
                }

                state.StateProgress.Push(StepStateNames.CompletedSuccessfully);

                return(state);
            }
예제 #20
0
            protected override async Task ExecuteActionAsync(FabricTestContext testContext, MovePrimaryAction action, CancellationToken cancellationToken)
            {
                ThrowIf.Null(action.PartitionSelector, "PartitionSelector");

                this.helper = new TimeoutHelper(action.ActionTimeout);

                string newPrimaryNodeName = action.NodeName;

                var getPartitionStateAction = new GetSelectedPartitionStateAction(action.PartitionSelector)
                {
                    RequestTimeout = action.RequestTimeout,
                    ActionTimeout  = this.helper.GetRemainingTime()
                };

                await testContext.ActionExecutor.RunAsync(getPartitionStateAction, cancellationToken);

                Guid partitionId = getPartitionStateAction.Result.PartitionId;

                if (!action.IgnoreConstraints)
                {
                    // select random node where replica's primary not present
                    var nodesInfo = await testContext.FabricCluster.GetLatestNodeInfoAsync(action.RequestTimeout, this.helper.GetRemainingTime(), cancellationToken);

                    if ((nodesInfo == null || nodesInfo.Count() == 0))
                    {
                        throw new InvalidOperationException(StringHelper.Format(StringResources.Error_NotEnoughNodesForTestabilityAction, "MovePrimary"));
                    }

                    ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => testContext.FabricClient.QueryManager.GetReplicaListAsync(
                            partitionId,
                            0,
                            action.RequestTimeout,
                            cancellationToken),
                        this.helper.GetRemainingTime(),
                        cancellationToken).ConfigureAwait(false);

                    NodeInfo currentPrimaryNodeInfo = null;
                    string   currentPrimaryNodeName = string.Empty;
                    foreach (var replica in replicasResult)
                    {
                        StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica;
                        if (statefulReplica == null)
                        {
                            throw new InvalidOperationException(StringHelper.Format(StringResources.Error_InvalidServiceTypeTestability, "MovePrimary", "Stateful", action.PartitionSelector.ServiceName, "Stateless"));
                        }

                        if (statefulReplica.ReplicaRole == ReplicaRole.Primary)
                        {
                            currentPrimaryNodeInfo = nodesInfo.FirstOrDefault(n => n.NodeName == statefulReplica.NodeName);
                            if (!string.IsNullOrEmpty(newPrimaryNodeName) && newPrimaryNodeName == statefulReplica.NodeName)
                            {
                                throw new FabricException(
                                          StringHelper.Format(StringResources.Error_InvalidNodeNameProvided, newPrimaryNodeName, "MovePrimary", "Primary already exists on node"),
                                          FabricErrorCode.AlreadyPrimaryReplica);
                            }

                            break;
                        }
                    }

                    if (currentPrimaryNodeInfo == null)
                    {
                        throw new FabricException(StringHelper.Format(StringResources.Error_PartitionPrimaryNotReady, action.PartitionSelector + ":" + partitionId), FabricErrorCode.NotReady);
                    }

                    currentPrimaryNodeName = currentPrimaryNodeInfo.NodeName;

                    if (newPrimaryNodeName == currentPrimaryNodeName)
                    {
                        throw new FabricException(
                                  StringHelper.Format(StringResources.Error_InvalidNodeNameProvided, newPrimaryNodeName, "MovePrimary", "Primary already exists on node"),
                                  FabricErrorCode.AlreadyPrimaryReplica);
                    }
                }

                ActionTraceSource.WriteInfo(TraceSource, "Calling move primary with node {0}, partition {1}", string.IsNullOrEmpty(newPrimaryNodeName) ? "Random" : newPrimaryNodeName, partitionId);
                await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.FaultManager.MovePrimaryUsingNodeNameAsync(
                        newPrimaryNodeName,
                        getPartitionStateAction.Result.ServiceName,
                        partitionId,
                        action.IgnoreConstraints,
                        action.RequestTimeout,
                        cancellationToken),
                    FabricClientRetryErrors.MovePrimaryFabricErrors.Value,
                    this.helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                action.Result = new MovePrimaryResult(newPrimaryNodeName, getPartitionStateAction.Result);

                ResultTraceString = StringHelper.Format("MovePrimaryAction succeeded for moving Primary for {0}  to node  {1}.", partitionId, newPrimaryNodeName);
            }
예제 #21
0
            public override async Task <ActionStateBase> RunAsync(CancellationToken cancellationToken, ServiceInternalFaultInfo serviceInternalFaultInfo)
            {
                RestartPartitionState state = Convert(this.State);

                Guid   partitionId       = state.Info.PartitionId;
                bool   hasPersistedState = state.Info.HasPersistedState;
                string failoverManagerPrimaryNodeName = state.Info.NodeName;
                string behaviorName = state.Info.UnreliableTransportInfo.First().Item2;

                System.Fabric.Common.UnreliableTransportBehavior behavior = new System.Fabric.Common.UnreliableTransportBehavior("*", "DoReconfiguration");
                behavior.AddFilterForPartitionId(partitionId);

                await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.TestManager.AddUnreliableTransportBehaviorAsync(
                        failoverManagerPrimaryNodeName,
                        behaviorName,
                        behavior,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successfully applied
                await Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken).ConfigureAwait(false);

                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - applied UT on partitionId {1}, node={2}", this.State.OperationId, partitionId, failoverManagerPrimaryNodeName);
                ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.QueryManager.GetReplicaListAsync(
                        partitionId,
                        0,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                var stableReplicasToRestart = replicasResult.Where(r => r.ReplicaStatus == ServiceReplicaStatus.Ready).ToArray();

                foreach (var replica in stableReplicasToRestart)
                {
                    if (this.restartPartitionMode == RestartPartitionMode.OnlyActiveSecondaries)
                    {
                        StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica;
                        ReleaseAssert.AssertIf(statefulReplica == null, "Stateful service replica is not StatefulServiceReplica");
                        if (statefulReplica.ReplicaRole == ReplicaRole.Primary)
                        {
                            continue;
                        }
                    }

                    TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - restarting replica partition={1}, node={2}, replica id={3}", this.State.OperationId, partitionId, replica.NodeName, replica.Id);
                    if (hasPersistedState)
                    {
                        await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                            () => this.FabricClient.ServiceManager.RestartReplicaAsync(
                                replica.NodeName,
                                partitionId,
                                replica.Id,
                                this.RequestTimeout,
                                cancellationToken),
                            FabricClientRetryErrors.RestartReplicaErrors.Value,
                            this.OperationTimeout,
                            cancellationToken).ConfigureAwait(false);
                    }
                    else
                    {
                        await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                            () => this.FabricClient.ServiceManager.RemoveReplicaAsync(
                                replica.NodeName,
                                partitionId,
                                replica.Id,
                                this.RequestTimeout,
                                cancellationToken),
                            FabricClientRetryErrors.RemoveReplicaErrors.Value,
                            this.OperationTimeout,
                            cancellationToken).ConfigureAwait(false);
                    }
                }

                ActionTest.PerformInternalServiceFaultIfRequested(this.State.OperationId, serviceInternalFaultInfo, this.State, cancellationToken, true);

                await RemoveUnreliableTransportAsync(this.State, this.FabricClient, this.RequestTimeout, this.OperationTimeout, cancellationToken);

                state.StateProgress.Push(StepStateNames.CompletedSuccessfully);

                return(state);
            }
예제 #22
0
            protected override async Task ExecuteActionAsync(FabricTestContext testContext, InvokeDataLossAction action, CancellationToken cancellationToken)
            {
                ThrowIf.Null(action.PartitionSelector, "PartitionSelector");

                var helper = new TimeoutHelper(action.ActionTimeout);

                ServiceDescription result = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.ServiceManager.GetServiceDescriptionAsync(
                        action.PartitionSelector.ServiceName,
                        action.RequestTimeout,
                        cancellationToken),
                    helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                if (result.Kind != ServiceDescriptionKind.Stateful)
                {
                    throw new InvalidOperationException(StringHelper.Format(StringResources.Error_InvalidServiceTypeTestability, "DataLoss", "Stateful", action.PartitionSelector.ServiceName, "Stateless"));
                }

                var getPartitionStateAction = new GetSelectedPartitionStateAction(action.PartitionSelector)
                {
                    RequestTimeout = action.RequestTimeout,
                    ActionTimeout  = helper.GetRemainingTime()
                };

                await testContext.ActionExecutor.RunAsync(getPartitionStateAction, cancellationToken).ConfigureAwait(false);

                Guid partitionId = getPartitionStateAction.Result.PartitionId;

                long preDataLossNumber = 0;

                ServicePartitionList partitionsResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.QueryManager.GetPartitionListAsync(
                        action.PartitionSelector.ServiceName,
                        null,
                        action.RequestTimeout,
                        cancellationToken),
                    helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                bool partitionFound = false;

                foreach (StatefulServicePartition partition in partitionsResult)
                {
                    if (partition.PartitionInformation.Id == partitionId)
                    {
                        preDataLossNumber = partition.PrimaryEpoch.DataLossNumber;
                        partitionFound    = true;
                        break;
                    }
                }

                if (!partitionFound)
                {
                    throw new FabricException(StringHelper.Format(StringResources.Error_PartitionNotFound), FabricErrorCode.PartitionNotFound);
                }

                long postDataLossNumber = preDataLossNumber;

                do
                {
                    ActionTraceSource.WriteInfo(
                        TraceType,
                        "InvokeDataLossAction action pending time:{0}",
                        helper.GetRemainingTime());

                    if (helper.GetRemainingTime() <= TimeSpan.Zero)
                    {
                        throw new TimeoutException(StringHelper.Format(StringResources.Error_TestabilityActionTimeout, "InvokeDataLoss", partitionId));
                    }

                    ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => testContext.FabricClient.QueryManager.GetReplicaListAsync(
                            partitionId,
                            0,
                            action.RequestTimeout,
                            cancellationToken),
                        helper.GetRemainingTime(),
                        cancellationToken).ConfigureAwait(false);

                    ServiceReplicaList fmReplicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => testContext.FabricClient.QueryManager.GetReplicaListAsync(
                            Constants.FmPartitionId,
                            0,
                            action.RequestTimeout,
                            cancellationToken),
                        helper.GetRemainingTime(),
                        cancellationToken).ConfigureAwait(false);

                    string fmPrimaryNodeName = string.Empty;
                    var    readyFMReplicas   = fmReplicasResult.Where(r => r.ReplicaStatus == ServiceReplicaStatus.Ready).ToArray();
                    foreach (var replica in readyFMReplicas)
                    {
                        StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica;
                        ReleaseAssert.AssertIf(statefulReplica == null, "FM Replica is not a stateful replica");
                        if (statefulReplica.ReplicaRole == ReplicaRole.Primary)
                        {
                            fmPrimaryNodeName = replica.NodeName;
                        }
                    }

                    if (string.IsNullOrEmpty(fmPrimaryNodeName))
                    {
                        throw new FabricException(StringHelper.Format(StringResources.Error_PartitionPrimaryNotReady, "FailoverManager"), FabricErrorCode.NotReady);
                    }

                    UnreliableTransportBehavior behavior = new UnreliableTransportBehavior("*", "DoReconfiguration");
                    behavior.AddFilterForPartitionId(partitionId);
                    string behaviorName = "BlockDoReconfiguration";

                    await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => testContext.FabricClient.TestManager.AddUnreliableTransportBehaviorAsync(
                            fmPrimaryNodeName,
                            behaviorName,
                            behavior,
                            action.RequestTimeout,
                            cancellationToken),
                        helper.GetRemainingTime(),
                        cancellationToken).ConfigureAwait(false);

                    // TODO: Wait for some time so that the unreliable transport behavior can be read from the files.
                    // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successfully applied
                    await Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken).ConfigureAwait(false);

                    bool triedToRemovedBehavior = false;

                    try
                    {
                        var stableReplicasToRemove = replicasResult.Where(r => r.ReplicaStatus == ServiceReplicaStatus.Ready).ToArray();

                        ActionTraceSource.WriteInfo(TraceType, "Total number of replicas found {0}:{1}", replicasResult.Count(), stableReplicasToRemove.Count());

                        int replicasToRestartWithoutPrimary =
                            action.DataLossMode == DataLossMode.FullDataLoss
                                ? stableReplicasToRemove.Length - 1
                                : (stableReplicasToRemove.Length + 1) / 2 - 1;

                        foreach (var replica in stableReplicasToRemove)
                        {
                            var currentReplica = replica;
                            StatefulServiceReplica statefulReplica = currentReplica as StatefulServiceReplica;
                            ReleaseAssert.AssertIf(statefulReplica == null, "Service Replica is not of stateful type even though service is stateful");

                            ActionTraceSource.WriteInfo(
                                TraceType,
                                "Inspecting replica {0}:{1} with role {2} and status {3} to induce data loss",
                                currentReplica.Id,
                                partitionId,
                                statefulReplica.ReplicaRole,
                                statefulReplica.ReplicaStatus);

                            if (statefulReplica.ReplicaRole != ReplicaRole.Primary)
                            {
                                replicasToRestartWithoutPrimary--;
                            }

                            if (replicasToRestartWithoutPrimary >= 0 || statefulReplica.ReplicaRole == ReplicaRole.Primary)
                            {
                                ActionTraceSource.WriteInfo(TraceType, "Removing replica {0}:{1} to induce data loss", currentReplica.Id, partitionId);

                                await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                                    () => testContext.FabricClient.FaultManager.RemoveReplicaAsync(
                                        currentReplica.NodeName,
                                        partitionId,
                                        currentReplica.Id,
                                        CompletionMode.DoNotVerify,
                                        false, /*force remove*/
                                        action.RequestTimeout.TotalSeconds,
                                        cancellationToken),
                                    helper.GetRemainingTime(),
                                    cancellationToken).ConfigureAwait(false);
                            }
                        }

                        triedToRemovedBehavior = true;
                        await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                            () => testContext.FabricClient.TestManager.RemoveUnreliableTransportBehaviorAsync(
                                fmPrimaryNodeName,
                                behaviorName,
                                action.RequestTimeout,
                                cancellationToken),
                            FabricClientRetryErrors.RemoveUnreliableTransportBehaviorErrors.Value,
                            helper.GetRemainingTime(),
                            cancellationToken).ConfigureAwait(false);

                        // TODO: Wait for some time so that the removal of this unreliable transport behavior can be read from the files.
                        // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successully applied
                        await Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken).ConfigureAwait(false);

                        // retry check for whether data loss number has increased 5 times else do the entire process again
                        const int maxRetryCount = 5;
                        int       retryCount    = 0;
                        do
                        {
                            partitionsResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                                () => testContext.FabricClient.QueryManager.GetPartitionListAsync(
                                    action.PartitionSelector.ServiceName,
                                    null,
                                    action.RequestTimeout,
                                    cancellationToken),
                                FabricClientRetryErrors.GetPartitionListFabricErrors.Value,
                                helper.GetRemainingTime(),
                                cancellationToken).ConfigureAwait(false);

                            partitionFound = false;
                            foreach (StatefulServicePartition partition in partitionsResult)
                            {
                                if (partition.PartitionInformation.Id == partitionId)
                                {
                                    postDataLossNumber = partition.PrimaryEpoch.DataLossNumber;
                                    partitionFound     = true;
                                    break;
                                }
                            }

                            if (!partitionFound)
                            {
                                throw new FabricException(StringHelper.Format(StringResources.Error_PartitionNotFound), FabricErrorCode.PartitionNotFound);
                            }

                            ActionTraceSource.WriteInfo(
                                TraceType,
                                "Checking data loss numbers for partition {0} with retryCount {1}. Current numbers {2}:{3}",
                                partitionId,
                                retryCount,
                                preDataLossNumber,
                                postDataLossNumber);

                            if (postDataLossNumber != preDataLossNumber)
                            {
                                break;
                            }

                            await AsyncWaiter.WaitAsync(TimeSpan.FromSeconds(5), cancellationToken);

                            ++retryCount;
                        } while (retryCount < maxRetryCount);
                    }
                    finally
                    {
                        if (!triedToRemovedBehavior)
                        {
                            ActionTraceSource.WriteWarning(TraceType, "Exception after adding behavior to block messages. Removing behavior synchronously");
                            FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                                () => testContext.FabricClient.TestManager.RemoveUnreliableTransportBehaviorAsync(
                                    fmPrimaryNodeName,
                                    behaviorName,
                                    action.RequestTimeout,
                                    cancellationToken),
                                FabricClientRetryErrors.RemoveUnreliableTransportBehaviorErrors.Value,
                                helper.GetRemainingTime(),
                                cancellationToken).GetAwaiter().GetResult();

                            // TODO: Wait for some time so that the removal of this unreliable transport behavior can be read from the files.
                            // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successully applied
                            Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken).GetAwaiter().GetResult();
                        }
                    }
                }while (postDataLossNumber == preDataLossNumber);

                ActionTraceSource.WriteInfo(
                    TraceType,
                    "InvokeDataLossAction action completed postDataLossNumber:{0}, preDataLossNumber:{1}",
                    postDataLossNumber, preDataLossNumber);

                action.Result          = new InvokeDataLossResult(getPartitionStateAction.Result);
                this.ResultTraceString = StringHelper.Format("InvokeDataLossAction succeeded for {0} with DatalossMode = {1}", partitionId, action.DataLossMode);
            }
예제 #23
0
            public override async Task <ActionStateBase> RunAsync(CancellationToken cancellationToken, ServiceInternalFaultInfo serviceInternalFaultInfo)
            {
                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "Inside CollectingState, service={0}", this.partitionSelector.ServiceName);
                RestartPartitionState state = Convert(this.State);

                // Get service info and validate if the parameters are valid
                ServiceDescription result = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.ServiceManager.GetServiceDescriptionAsync(
                        this.partitionSelector.ServiceName,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                if (result.Kind != ServiceDescriptionKind.Stateful && this.restartPartitionMode == RestartPartitionMode.OnlyActiveSecondaries)
                {
                    // The message in the first arg is only for debugging, it is not returned to the user.
                    string debugText = string.Format(CultureInfo.InvariantCulture, "RestartPartition: for stateless services only RestartPartitionMode.AllReplicasOrInstances is valid");
                    TestabilityTrace.TraceSource.WriteWarning(StepBase.TraceType, debugText);
                    throw FaultAnalysisServiceUtility.CreateException(StepBase.TraceType, NativeTypes.FABRIC_ERROR_CODE.E_INVALIDARG, debugText);
                }

                bool hasPersistedState = false;

                if (result.Kind == ServiceDescriptionKind.Stateful)
                {
                    StatefulServiceDescription statefulDescription = result as StatefulServiceDescription;
                    ReleaseAssert.AssertIf(statefulDescription == null, "Stateful service description is not WinFabricStatefulServiceDescription");
                    hasPersistedState = statefulDescription.HasPersistedState;
                }

                SelectedPartition targetPartition = await FaultAnalysisServiceUtility.GetSelectedPartitionStateAsync(
                    this.FabricClient,
                    this.partitionSelector,
                    this.RequestTimeout,
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                Guid partitionId = targetPartition.PartitionId;

                // get replicas for target
                ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.QueryManager.GetReplicaListAsync(
                        partitionId,
                        0,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                // get replicas for fm in order to get the primary
                ServiceReplicaList failoverManagersReplicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.QueryManager.GetReplicaListAsync(
                        FASConstants.FmPartitionId,
                        0,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                string failoverManagerPrimaryNodeName = string.Empty;
                var    readyFMReplicas = failoverManagersReplicasResult.Where(r => r.ReplicaStatus == ServiceReplicaStatus.Ready).ToArray();

                foreach (var replica in readyFMReplicas)
                {
                    StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica;
                    ReleaseAssert.AssertIf(statefulReplica == null, "FM Replica is not a stateful replica");
                    if (statefulReplica.ReplicaRole == ReplicaRole.Primary)
                    {
                        failoverManagerPrimaryNodeName = replica.NodeName;
                    }
                }

                if (string.IsNullOrEmpty(failoverManagerPrimaryNodeName))
                {
                    throw new FabricException(StringHelper.Format(StringResources.Error_PartitionPrimaryNotReady, "FailoverManager"), FabricErrorCode.NotReady);
                }

                TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - FM primary is at node={1}", this.State.OperationId, failoverManagerPrimaryNodeName);
                string behaviorName = RestartingSelectedReplicas.UTBehaviorPrefixName + "_" + this.State.OperationId;
                List <Tuple <string, string> > unreliableTransportInfo = new List <Tuple <string, string> >();

                unreliableTransportInfo.Add(new Tuple <string, string>(failoverManagerPrimaryNodeName, behaviorName));

                state.StateProgress.Push(StepStateNames.PerformingActions);
                state.Info.PartitionId             = partitionId;
                state.Info.NodeName                = failoverManagerPrimaryNodeName;
                state.Info.HasPersistedState       = hasPersistedState;
                state.Info.UnreliableTransportInfo = unreliableTransportInfo;

                return(state);
            }
예제 #24
0
            public override async Task <ActionStateBase> RunAsync(CancellationToken cancellationToken, ServiceInternalFaultInfo serviceInternalFaultInfo)
            {
                InvokeQuorumLossState state = Convert(this.State);

                // get info about the service so we can check type and trss
                ServiceDescription result = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.ServiceManager.GetServiceDescriptionAsync(
                        this.partitionSelector.ServiceName,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                if (result.Kind != ServiceDescriptionKind.Stateful)
                {
                    // The message in the first arg is only for debugging, it is not returned to the user.
                    throw new FabricInvalidForStatelessServicesException("FabricInvalidForStatelessServicesException", FabricErrorCode.InvalidForStatelessServices);
                }

                StatefulServiceDescription statefulServiceDescription = result as StatefulServiceDescription;

                ReleaseAssert.AssertIf(statefulServiceDescription == null, string.Format(CultureInfo.InvariantCulture, "{0} - Service is not a stateful service", this.State.OperationId));

                if (!statefulServiceDescription.HasPersistedState)
                {
                    // The message in the first arg is only for debugging, it is not returned to the user.
                    throw new FabricOnlyValidForStatefulPersistentServicesException("This is only valid for stateful persistent services", FabricErrorCode.OnlyValidForStatefulPersistentServices);
                }

                SelectedPartition targetPartition = await FaultAnalysisServiceUtility.GetSelectedPartitionStateAsync(
                    this.FabricClient,
                    this.partitionSelector,
                    this.RequestTimeout,
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                Guid partitionId = targetPartition.PartitionId;

                // get data about replicas in that partition
                ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.FabricClient.QueryManager.GetReplicaListAsync(
                        partitionId,
                        0,
                        this.RequestTimeout,
                        cancellationToken),
                    this.OperationTimeout,
                    cancellationToken).ConfigureAwait(false);

                List <StatefulServiceReplica> tempReplicas = new List <StatefulServiceReplica>();

                foreach (var replica in replicasResult)
                {
                    StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica;
                    ReleaseAssert.AssertIf(statefulReplica == null, "Expected stateful replica");
                    tempReplicas.Add(statefulReplica);
                }

                List <StatefulServiceReplica> targetReplicas = null;

                if (this.quorumLossMode == QuorumLossMode.AllReplicas)
                {
                    targetReplicas = tempReplicas.Where(r => r.ReplicaRole == ReplicaRole.Primary || r.ReplicaRole == ReplicaRole.ActiveSecondary).ToList();
                }
                else if (this.quorumLossMode == QuorumLossMode.QuorumReplicas)
                {
                    targetReplicas = FaultAnalysisServiceUtility.GetReplicasForPartialLoss(state.OperationId, tempReplicas);
                }
                else
                {
                    throw FaultAnalysisServiceUtility.CreateException(StepBase.TraceType, Interop.NativeTypes.FABRIC_ERROR_CODE.E_INVALIDARG, Strings.StringResources.Error_UnsupportedQuorumLossMode);
                }

                if (targetReplicas == null)
                {
                    // This will cause the command to rollback and retry
                    throw new FabricTransientException("The operation could not be performed, please retry", FabricErrorCode.NotReady);
                }

                List <string> targetNodes = new List <string>();

                foreach (var replica in targetReplicas)
                {
                    targetNodes.Add(replica.NodeName);
                }

                List <Tuple <string, string> > unreliableTransportInfoList = new List <Tuple <string, string> >();

                foreach (string nodeName in targetNodes)
                {
                    UnreliableTransportBehavior behavior = new UnreliableTransportBehavior("*", "StatefulServiceReopen");
                    behavior.AddFilterForPartitionId(partitionId);

                    // ApplyingUnreliableTransport.BehaviorNamePrefix + nodeName;
                    string behaviorName = this.CreateBehaviorName(nodeName);

                    unreliableTransportInfoList.Add(new Tuple <string, string>(nodeName, behaviorName));
                }

                state.StateProgress.Push(StepStateNames.PerformingActions);

                state.Info.PartitionId             = partitionId;
                state.Info.ReplicaIds              = targetReplicas.Select(r => r.Id).ToList();
                state.Info.UnreliableTransportInfo = unreliableTransportInfoList;

                return(state);
            }
예제 #25
0
        public void Init()
        {
            // Application Type
            FABRIC_APPLICATION_TYPE_QUERY_RESULT_ITEM_      = this.random.CreateRandom <ApplicationType>();
            FABRIC_APPLICATION_TYPE_QUERY_RESULT_ITEM_LIST_ = new ApplicationTypeList()
            {
                this.random.CreateRandom <ApplicationType>()
            };

            FABRIC_APPLICATION_TYPE_QUERY_RESULT_ITEM_PAGED_LIST_ = new ApplicationTypePagedList();
            FABRIC_APPLICATION_TYPE_QUERY_RESULT_ITEM_PAGED_LIST_.ContinuationToken = "ContinuationToken342741";
            FABRIC_APPLICATION_TYPE_QUERY_RESULT_ITEM_PAGED_LIST_.Add(this.random.CreateRandom <ApplicationType>());
            FABRIC_APPLICATION_TYPE_QUERY_RESULT_ITEM_PAGED_LIST_.Add(this.random.CreateRandom <ApplicationType>());
            FABRIC_APPLICATION_TYPE_QUERY_RESULT_ITEM_PAGED_LIST_.Add(this.random.CreateRandom <ApplicationType>());

            // Application
            FABRIC_APPLICATION_QUERY_RESULT_ITEM_      = this.random.CreateRandom <Application>();
            FABRIC_APPLICATION_QUERY_RESULT_ITEM_LIST_ = new ApplicationList()
            {
                this.random.CreateRandom <Application>(), this.random.CreateRandom <Application>()
            };

            // Service Type
            FABRIC_SERVICE_TYPE_DESCRIPTION_       = this.random.CreateRandom <ServiceTypeDescription>();
            FABRIC_SERVICE_TYPE_QUERY_RESULT_ITEM_ = this.random.CreateRandom <ServiceType>();
            FABRIC_SERVICE_TYPE_QUERY_RESULT_LIST_ = new ServiceTypeList()
            {
                this.random.CreateRandom <ServiceType>()
            };

            //Replica
            FABRIC_SERVICE_REPLICA_QUERY_RESULT_ITEM_ = this.CreateReplica();

            FABRIC_SERVICE_REPLICA_LIST_RESULT_ = new ServiceReplicaList();
            FABRIC_SERVICE_REPLICA_LIST_RESULT_.ContinuationToken = "4387284";
            FABRIC_SERVICE_REPLICA_LIST_RESULT_.Add(this.CreateReplica());
            FABRIC_SERVICE_REPLICA_LIST_RESULT_.Add(this.CreateReplica());

            // Partition
            FABRIC_SERVICE_PARTITION_INFORMATION_       = this.random.CreateRandom <ServicePartitionInformation>();
            FABRIC_SERVICE_PARTITION_QUERY_RESULT_ITEM_ = CreatePartition();

            FABRIC_SERVICE_PARTITION_LIST_RESULT_ = new ServicePartitionList();
            FABRIC_SERVICE_PARTITION_LIST_RESULT_.ContinuationToken = Guid.NewGuid().ToString();
            FABRIC_SERVICE_PARTITION_LIST_RESULT_.Add(this.CreatePartition());
            FABRIC_SERVICE_PARTITION_LIST_RESULT_.Add(this.CreatePartition());

            //Service
            FABRIC_SERVICE_QUERY_RESULT_ITEM_ = this.CreateServiceQueryItem();
            FABRIC_SERVICE_QUERY_RESULT_LIST_ = new ServiceList()
            {
                this.CreateServiceQueryItem(), this.CreateServiceQueryItem()
            };

            // Node
            FABRIC_NODE_DEACTIVATION_QUERY_RESULT_ITEM_ = this.random.CreateRandom <NodeDeactivationResult>();
            FABRIC_NODE_DEACTIVATION_TASK_ = this.random.CreateRandom <NodeDeactivationTask>();
            FABRIC_NODE_QUERY_RESULT_ITEM_ = this.random.CreateRandom <Node>();

            // NodeList with continuation token
            FABRIC_NODE_LIST_QUERY_RESULT_ = new NodeList();
            FABRIC_NODE_LIST_QUERY_RESULT_.ContinuationToken = "ContinuationToken34274";
            FABRIC_NODE_LIST_QUERY_RESULT_.Add(this.random.CreateRandom <Node>());
            FABRIC_NODE_LIST_QUERY_RESULT_.Add(this.random.CreateRandom <Node>());
            FABRIC_NODE_LIST_QUERY_RESULT_.Add(this.random.CreateRandom <Node>());

            // Deployed Application
            FABRIC_DEPLOYED_APPLICATION_QUERY_RESULT_ITEM_      = this.random.CreateRandom <DeployedApplication>();
            FABRIC_DEPLOYED_APPLICATION_QUERY_RESULT_ITEM_LIST_ = new DeployedApplicationList()
            {
                FABRIC_DEPLOYED_APPLICATION_QUERY_RESULT_ITEM_
            };

            FABRIC_DEPLOYED_APPLICATION_QUERY_RESULT_ITEM_PAGED_LIST_ = new DeployedApplicationPagedList();
            FABRIC_DEPLOYED_APPLICATION_QUERY_RESULT_ITEM_PAGED_LIST_.ContinuationToken = "ContinuationToken342741";
            FABRIC_DEPLOYED_APPLICATION_QUERY_RESULT_ITEM_PAGED_LIST_.Add(this.random.CreateRandom <DeployedApplication>());
            FABRIC_DEPLOYED_APPLICATION_QUERY_RESULT_ITEM_PAGED_LIST_.Add(this.random.CreateRandom <DeployedApplication>());
            FABRIC_DEPLOYED_APPLICATION_QUERY_RESULT_ITEM_PAGED_LIST_.Add(this.random.CreateRandom <DeployedApplication>());

            // Deployed Service
            FABRIC_DEPLOYED_SERVICE_REPLICA_QUERY_RESULT_ITEM_         = this.random.CreateRandom <DeployedStatefulServiceReplica>();
            FABRIC_DEPLOYED_SERVICE_REPLICA_QUERY_RESULT_ITEM2_        = this.random.CreateRandom <DeployedStatelessServiceInstance>();
            FABRIC_DEPLOYED_SERVICE_REPLICA_QUERY_RESULT_ITEM3_        = this.random.CreateRandom <DeployedServiceReplica>();
            FABRIC_DEPLOYED_SERVICE_REPLICA_DETAIL_QUERY_RESULT_ITEM_  = this.random.CreateRandom <DeployedStatefulServiceReplicaDetail>();
            FABRIC_DEPLOYED_SERVICE_REPLICA_DETAIL_QUERY_RESULT_ITEM2_ = this.random.CreateRandom <DeployedStatelessServiceInstanceDetail>();

            // Code Package
            FABRIC_CODE_PACKAGE_ENTRY_POINT_STATISTICS_          = this.random.CreateRandom <CodePackageEntryPointStatistics>();
            FABRIC_CODE_PACKAGE_ENTRY_POINT_                     = this.random.CreateRandom <CodePackageEntryPoint>();
            FABRIC_DEPLOYED_CODE_PACKAGE_QUERY_RESULT_ITEM_      = this.random.CreateRandom <DeployedCodePackage>();
            FABRIC_DEPLOYED_CODE_PACKAGE_QUERY_RESULT_ITEM_LIST_ = new DeployedCodePackageList()
            {
                FABRIC_DEPLOYED_CODE_PACKAGE_QUERY_RESULT_ITEM_
            };
            FABRIC_DEPLOYED_SERVICE_PACKAGE_QUERY_RESULT_ITEM_ = this.random.CreateRandom <DeployedServicePackage>();

            // Replicator
            FABRIC_PRIMARY_REPLICATOR_STATUS_QUERY_RESULT_   = this.random.CreateRandom <PrimaryReplicatorStatus>();
            FABRIC_SECONDARY_REPLICATOR_STATUS_QUERY_RESULT_ = this.random.CreateRandom <SecondaryReplicatorStatus>();

            // Load
            FABRIC_LOAD_METRIC_INFORMATION_      = this.random.CreateRandom <LoadMetricInformation>();
            FABRIC_LOAD_METRIC_REPORT_           = this.random.CreateRandom <LoadMetricReport>();
            FABRIC_NODE_LOAD_METRIC_INFORMATION_ = this.random.CreateRandom <NodeLoadMetricInformation>();
            FABRIC_NODE_LOAD_INFORMATION_        = this.random.CreateRandom <NodeLoadInformation>();
            FABRIC_PARTITION_LOAD_INFORMATION_   = this.random.CreateRandom <PartitionLoadInformation>();
            FABRIC_REPLICA_LOAD_INFORMATION_     = this.random.CreateRandom <ReplicaLoadInformation>();

            // Service Group
            FABRIC_SERVICE_GROUP_TYPE_MEMBER_DESCRIPTION_       = this.random.CreateRandom <ServiceGroupTypeMemberDescription>();
            FABRIC_SERVICE_GROUP_MEMBER_TYPE_QUERY_RESULT_ITEM_ = this.random.CreateRandom <ServiceGroupMemberType>();
        }