Пример #1
0
            protected override async Task ExecuteActionAsync(FabricTestContext testContext, StartNodeAction action, CancellationToken cancellationToken)
            {
                ThrowIf.Null(action.NodeName, "NodeName");
                this.helper = new TimeoutHelper(action.ActionTimeout);
                string     nodeName     = action.NodeName;
                BigInteger nodeInstance = action.NodeInstance;

                if (nodeInstance == BigInteger.MinusOne)
                {
                    var nodeInfo = await GetCurrentNodeInfoAsync(testContext, action, cancellationToken);

                    if (nodeInfo == null)
                    {
                        throw new FabricException(StringResources.Error_NodeNotFound, FabricErrorCode.NodeNotFound);
                    }

                    nodeInstance = nodeInfo.NodeInstanceId;
                }

                await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.FaultManager.StartNodeUsingNodeNameAsync(
                        nodeName,
                        nodeInstance,
                        action.IPAddressOrFQDN,
                        action.ClusterConnectionPort,
                        action.RequestTimeout, cancellationToken),
                    this.helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                if (action.CompletionMode == CompletionMode.Verify)
                {
                    bool success = false;
                    while (this.helper.GetRemainingTime() > TimeSpan.Zero)
                    {
                        var nodeInfo = await this.GetCurrentNodeInfoAsync(testContext, action, cancellationToken).ConfigureAwait(false);

                        if (nodeInfo != null && nodeInfo.NodeInstanceId > nodeInstance && nodeInfo.IsNodeUp)
                        {
                            success = true;
                            break;
                        }

                        ActionTraceSource.WriteInfo(TraceSource, "NodeName = {0} not yet Started. Retrying...", action.NodeName);
                        await AsyncWaiter.WaitAsync(TimeSpan.FromSeconds(5), cancellationToken).ConfigureAwait(false);
                    }

                    if (!success)
                    {
                        throw new TimeoutException(StringHelper.Format(StringResources.Error_TestabilityActionTimeout,
                                                                       "StartNode",
                                                                       action.NodeName + ":" + action.NodeInstance));
                    }
                }

                action.Result          = new StartNodeResult(action.NodeName, nodeInstance);
                this.ResultTraceString = StringHelper.Format("StartNodeAction succeeded for {0}:{1} with CompletionMode = {2}", action.NodeName, nodeInstance, action.CompletionMode);
            }
Пример #2
0
            protected override async Task ExecuteActionAsync(
                FabricTestContext testContext,
                CleanTestStateAction action,
                CancellationToken cancellationToken)
            {
                this.helper = new TimeoutHelper(action.ActionTimeout);

                var nodes = await testContext.FabricCluster.GetLatestNodeInfoAsync(action.RequestTimeout, this.helper.GetRemainingTime(), cancellationToken).ConfigureAwait(false);

                foreach (var nodeInfo in nodes)
                {
                    if (nodeInfo.IsNodeUp)
                    {
                        var info = nodeInfo;
                        await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                            () => testContext.FabricClient.TestManager.RemoveUnreliableTransportBehaviorAsync(
                                info.NodeName,
                                "*",
                                action.RequestTimeout,
                                cancellationToken),
                            this.helper.GetRemainingTime(),
                            cancellationToken).ConfigureAwait(false);

                        // TODO: Wait for some time so that the removal of this unreliable transport behavior can be read from the files.
                        // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successully applied
                        await Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken);

                        ActionTraceSource.WriteInfo(TraceType, "Test state cleaned for node:{0}", nodeInfo.NodeName);
                    }
                    else
                    {
                        ActionTraceSource.WriteInfo(TraceType, "Test clean failed to start node {0}", nodeInfo.NodeName);
                    }
                }

                ResultTraceString = StringHelper.Format("CleanTestStateAction succeeded");
            }
            protected override async Task ExecuteActionAsync(FabricTestContext testContext, RestartPartitionAction action, CancellationToken cancellationToken)
            {
                ThrowIf.Null(action.PartitionSelector, "partitionSelector");

                this.helper = new TimeoutHelper(action.ActionTimeout);

                // get service info so we can validate if the operation is valid
                ServiceDescription result = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.ServiceManager.GetServiceDescriptionAsync(
                        action.PartitionSelector.ServiceName,
                        action.RequestTimeout,
                        cancellationToken),
                    this.helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                if (result.Kind != ServiceDescriptionKind.Stateful && action.RestartPartitionMode == RestartPartitionMode.OnlyActiveSecondaries)
                {
                    throw new InvalidOperationException(StringHelper.Format(StringResources.Error_InvalidServiceTypeTestability, "RestartPartitionMode.OnlyActiveSecondaries", "Stateful", action.PartitionSelector.ServiceName, "Stateless"));
                }

                bool hasPersistedState = false;

                if (result.Kind == ServiceDescriptionKind.Stateful)
                {
                    StatefulServiceDescription statefulDescription = result as StatefulServiceDescription;
                    ReleaseAssert.AssertIf(statefulDescription == null, "Stateful service description is not WinFabricStatefulServiceDescription");
                    hasPersistedState = statefulDescription.HasPersistedState;
                }

                // now actually select a partition
                var getPartitionStateAction = new GetSelectedPartitionStateAction(action.PartitionSelector)
                {
                    RequestTimeout = action.RequestTimeout,
                    ActionTimeout  = helper.GetRemainingTime()
                };

                await testContext.ActionExecutor.RunAsync(getPartitionStateAction, cancellationToken);

                Guid partitionId = getPartitionStateAction.Result.PartitionId;

                // get replicas for target
                ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.QueryManager.GetReplicaListAsync(
                        partitionId,
                        0,
                        action.RequestTimeout,
                        cancellationToken),
                    this.helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                // get replicas for fm in order to get the primary
                ServiceReplicaList fmReplicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.QueryManager.GetReplicaListAsync(
                        Constants.FmPartitionId,
                        0,
                        action.RequestTimeout,
                        cancellationToken),
                    this.helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                string fmPrimaryNodeName = string.Empty;
                var    readyFMReplicas   = fmReplicasResult.Where(r => r.ReplicaStatus == ServiceReplicaStatus.Ready).ToArray();

                foreach (var replica in readyFMReplicas)
                {
                    StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica;
                    ReleaseAssert.AssertIf(statefulReplica == null, "FM Replica is not a stateful replica");
                    if (statefulReplica.ReplicaRole == ReplicaRole.Primary)
                    {
                        fmPrimaryNodeName = replica.NodeName;
                    }
                }

                if (string.IsNullOrEmpty(fmPrimaryNodeName))
                {
                    throw new FabricException(StringHelper.Format(StringResources.Error_PartitionPrimaryNotReady, "FailoverManager"), FabricErrorCode.NotReady);
                }

                ////------------------------------------------------------
                // target ut at the fm primary only
                UnreliableTransportBehavior behavior = new UnreliableTransportBehavior("*", "DoReconfiguration");

                behavior.AddFilterForPartitionId(partitionId);
                string behaviorName = "BlockDoReconfiguration";

                await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.TestManager.AddUnreliableTransportBehaviorAsync(
                        fmPrimaryNodeName,
                        behaviorName,
                        behavior,
                        action.RequestTimeout,
                        cancellationToken),
                    this.helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                // TODO: Wait for some time so that the unreliable transport behavior can be read from the files.
                // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successfully applied
                await Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken).ConfigureAwait(false);

                bool triedToRemovedBehavior = false;

                // inspect the actual replicas to restart, only operate on stable ones
                try
                {
                    var stableReplicasToRestart = replicasResult.Where(r => r.ReplicaStatus == ServiceReplicaStatus.Ready).ToArray();

                    foreach (var replica in stableReplicasToRestart)
                    {
                        var currentReplica = replica;
                        if (action.RestartPartitionMode == RestartPartitionMode.OnlyActiveSecondaries)
                        {
                            StatefulServiceReplica statefulReplica = currentReplica as StatefulServiceReplica;
                            ReleaseAssert.AssertIf(statefulReplica == null, "Stateful service replica is not StatefulServiceReplica");
                            if (statefulReplica.ReplicaRole == ReplicaRole.Primary)
                            {
                                continue;
                            }
                        }

                        if (hasPersistedState)
                        {
                            await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                                () => testContext.FabricClient.FaultManager.RestartReplicaAsync(
                                    currentReplica.NodeName,
                                    partitionId,
                                    currentReplica.Id,
                                    CompletionMode.DoNotVerify,
                                    action.RequestTimeout.TotalSeconds,
                                    cancellationToken),
                                this.helper.GetRemainingTime(),
                                cancellationToken).ConfigureAwait(false);
                        }
                        else
                        {
                            await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                                () => testContext.FabricClient.FaultManager.RemoveReplicaAsync(
                                    currentReplica.NodeName,
                                    partitionId,
                                    currentReplica.Id,
                                    CompletionMode.DoNotVerify,
                                    false, /*force remove*/
                                    action.RequestTimeout.TotalSeconds,
                                    cancellationToken),
                                this.helper.GetRemainingTime(),
                                cancellationToken).ConfigureAwait(false);
                        }
                    }

                    triedToRemovedBehavior = true;
                    await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => testContext.FabricClient.TestManager.RemoveUnreliableTransportBehaviorAsync(
                            fmPrimaryNodeName,
                            behaviorName,
                            action.RequestTimeout,
                            cancellationToken),
                        FabricClientRetryErrors.RemoveUnreliableTransportBehaviorErrors.Value,
                        this.helper.GetRemainingTime(),
                        cancellationToken).ConfigureAwait(false);

                    // TODO: Wait for some time so that the unreliable transport behavior can be read from the files.
                    // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successfully applied
                    await Task.Delay(TimeSpan.FromSeconds(5.0)).ConfigureAwait(false);
                }
                finally
                {
                    // TODO: Provide a way to clear all behaviors just in case.
                    if (!triedToRemovedBehavior)
                    {
                        ActionTraceSource.WriteWarning(TraceType, "Exception after adding behavior to block messages. Removing behavior synchronously");
                        FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                            () => testContext.FabricClient.TestManager.RemoveUnreliableTransportBehaviorAsync(
                                fmPrimaryNodeName,
                                behaviorName,
                                action.RequestTimeout,
                                cancellationToken),
                            FabricClientRetryErrors.RemoveUnreliableTransportBehaviorErrors.Value,
                            this.helper.GetRemainingTime(),
                            cancellationToken).GetAwaiter().GetResult();

                        // TODO: Wait for some time so that the unreliable transport behavior can be read from the files.
                        // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successfully applied
                        Task.Delay(TimeSpan.FromSeconds(5.0)).GetAwaiter().GetResult();
                    }
                }

                // -- note there's no explict validation

                // action result
                action.Result     = new RestartPartitionResult(getPartitionStateAction.Result);
                ResultTraceString = StringHelper.Format("RestartPartitionAction succeeded for {0} with RestartPartitionMode = {1}", partitionId, action.RestartPartitionMode);
            }
Пример #4
0
            protected override async Task ExecuteActionAsync(FabricTestContext testContext, MoveSecondaryAction action, CancellationToken cancellationToken)
            {
                ThrowIf.Null(action.PartitionSelector, "PartitionSelector");

                this.helper = new TimeoutHelper(action.ActionTimeout);

                string newSecondaryNode     = action.NewSecondaryNodeName;
                string currentSecondaryNode = action.CurrentSecondaryNodeName;

                var getPartitionStateAction = new GetSelectedPartitionStateAction(action.PartitionSelector)
                {
                    RequestTimeout = action.RequestTimeout,
                    ActionTimeout  = this.helper.GetRemainingTime()
                };

                await testContext.ActionExecutor.RunAsync(getPartitionStateAction, cancellationToken).ConfigureAwait(false);

                Guid partitionId = getPartitionStateAction.Result.PartitionId;

                if (!action.IgnoreConstraints)
                {
                    // get current primary replica node name.
                    ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => testContext.FabricClient.QueryManager.GetReplicaListAsync(
                            partitionId,
                            0,
                            action.RequestTimeout,
                            cancellationToken),
                        this.helper.GetRemainingTime(),
                        cancellationToken).ConfigureAwait(false);

                    string        currentPrimaryNodeInfo = string.Empty;
                    List <string> currentSecReplicaNodes = new List <string>();
                    foreach (var replica in replicasResult)
                    {
                        StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica;
                        if (statefulReplica == null)
                        {
                            throw new InvalidOperationException(StringHelper.Format(StringResources.Error_InvalidServiceTypeTestability, "MoveSecondary", "Stateful", action.PartitionSelector.ServiceName, "Stateless"));
                        }

                        if (statefulReplica.ReplicaRole == ReplicaRole.Primary)
                        {
                            currentPrimaryNodeInfo = statefulReplica.NodeName;
                            if (!string.IsNullOrEmpty(newSecondaryNode) && newSecondaryNode == statefulReplica.NodeName)
                            {
                                throw new FabricException(
                                          StringHelper.Format(StringResources.Error_InvalidNodeNameProvided, newSecondaryNode, "MoveSecondary", "Primary exists on node"),
                                          FabricErrorCode.AlreadyPrimaryReplica);
                            }
                        }
                        else if (statefulReplica.ReplicaRole == ReplicaRole.ActiveSecondary)
                        {
                            currentSecReplicaNodes.Add(statefulReplica.NodeName);
                            if (!string.IsNullOrEmpty(newSecondaryNode) && newSecondaryNode == statefulReplica.NodeName)
                            {
                                throw new FabricException(
                                          StringHelper.Format(StringResources.Error_InvalidNodeNameProvided, newSecondaryNode, "MoveSecondary", "Secondary exists on node"),
                                          FabricErrorCode.AlreadySecondaryReplica);
                            }
                        }
                    }

                    if (currentSecReplicaNodes.Count == 0)
                    {
                        throw new InvalidOperationException(StringResources.Error_NoSecondariesInReplicaSet);
                    }

                    if (string.IsNullOrEmpty(currentSecondaryNode))
                    {
                        int num = testContext.Random.Next(currentSecReplicaNodes.Count);
                        currentSecondaryNode = currentSecReplicaNodes.ElementAt(num);
                    }

                    if (!currentSecReplicaNodes.Contains(currentSecondaryNode))
                    {
                        throw new FabricException(
                                  StringHelper.Format(StringResources.Error_InvalidNodeNameProvided, newSecondaryNode, "MoveSecondary", "Current node does not have a secondary replica"),
                                  FabricErrorCode.InvalidReplicaStateForReplicaOperation);
                    }
                }

                ReleaseAssert.AssertIf(string.IsNullOrEmpty(currentSecondaryNode), "Current node name cannot be null or empty.");
                ReleaseAssert.AssertIf(newSecondaryNode == currentSecondaryNode, "Current and New node names are same.");

                ActionTraceSource.WriteInfo(TraceSource, "Calling move secondary with current node {0}, new node {1}, partition {2}", currentSecondaryNode, string.IsNullOrEmpty(newSecondaryNode) ? "Random" : newSecondaryNode, partitionId);
                await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.FaultManager.MoveSecondaryUsingNodeNameAsync(
                        currentSecondaryNode,
                        newSecondaryNode,
                        getPartitionStateAction.Result.ServiceName,
                        partitionId,
                        action.IgnoreConstraints,
                        action.RequestTimeout,
                        cancellationToken),
                    FabricClientRetryErrors.MoveSecondaryFabricErrors.Value,
                    this.helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                action.Result          = new MoveSecondaryResult(currentSecondaryNode, newSecondaryNode, getPartitionStateAction.Result);
                this.ResultTraceString = StringHelper.Format(
                    "MoveSecondaryAction succeeded for moving Primary for {0} from {1} to {2}.",
                    partitionId,
                    currentSecondaryNode,
                    newSecondaryNode);
            }
Пример #5
0
            protected override async Task ExecuteActionAsync(FabricTestContext testContext, InvokeDataLossAction action, CancellationToken cancellationToken)
            {
                ThrowIf.Null(action.PartitionSelector, "PartitionSelector");

                var helper = new TimeoutHelper(action.ActionTimeout);

                ServiceDescription result = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.ServiceManager.GetServiceDescriptionAsync(
                        action.PartitionSelector.ServiceName,
                        action.RequestTimeout,
                        cancellationToken),
                    helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                if (result.Kind != ServiceDescriptionKind.Stateful)
                {
                    throw new InvalidOperationException(StringHelper.Format(StringResources.Error_InvalidServiceTypeTestability, "DataLoss", "Stateful", action.PartitionSelector.ServiceName, "Stateless"));
                }

                var getPartitionStateAction = new GetSelectedPartitionStateAction(action.PartitionSelector)
                {
                    RequestTimeout = action.RequestTimeout,
                    ActionTimeout  = helper.GetRemainingTime()
                };

                await testContext.ActionExecutor.RunAsync(getPartitionStateAction, cancellationToken).ConfigureAwait(false);

                Guid partitionId = getPartitionStateAction.Result.PartitionId;

                long preDataLossNumber = 0;

                ServicePartitionList partitionsResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.QueryManager.GetPartitionListAsync(
                        action.PartitionSelector.ServiceName,
                        null,
                        action.RequestTimeout,
                        cancellationToken),
                    helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                bool partitionFound = false;

                foreach (StatefulServicePartition partition in partitionsResult)
                {
                    if (partition.PartitionInformation.Id == partitionId)
                    {
                        preDataLossNumber = partition.PrimaryEpoch.DataLossNumber;
                        partitionFound    = true;
                        break;
                    }
                }

                if (!partitionFound)
                {
                    throw new FabricException(StringHelper.Format(StringResources.Error_PartitionNotFound), FabricErrorCode.PartitionNotFound);
                }

                long postDataLossNumber = preDataLossNumber;

                do
                {
                    ActionTraceSource.WriteInfo(
                        TraceType,
                        "InvokeDataLossAction action pending time:{0}",
                        helper.GetRemainingTime());

                    if (helper.GetRemainingTime() <= TimeSpan.Zero)
                    {
                        throw new TimeoutException(StringHelper.Format(StringResources.Error_TestabilityActionTimeout, "InvokeDataLoss", partitionId));
                    }

                    ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => testContext.FabricClient.QueryManager.GetReplicaListAsync(
                            partitionId,
                            0,
                            action.RequestTimeout,
                            cancellationToken),
                        helper.GetRemainingTime(),
                        cancellationToken).ConfigureAwait(false);

                    ServiceReplicaList fmReplicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => testContext.FabricClient.QueryManager.GetReplicaListAsync(
                            Constants.FmPartitionId,
                            0,
                            action.RequestTimeout,
                            cancellationToken),
                        helper.GetRemainingTime(),
                        cancellationToken).ConfigureAwait(false);

                    string fmPrimaryNodeName = string.Empty;
                    var    readyFMReplicas   = fmReplicasResult.Where(r => r.ReplicaStatus == ServiceReplicaStatus.Ready).ToArray();
                    foreach (var replica in readyFMReplicas)
                    {
                        StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica;
                        ReleaseAssert.AssertIf(statefulReplica == null, "FM Replica is not a stateful replica");
                        if (statefulReplica.ReplicaRole == ReplicaRole.Primary)
                        {
                            fmPrimaryNodeName = replica.NodeName;
                        }
                    }

                    if (string.IsNullOrEmpty(fmPrimaryNodeName))
                    {
                        throw new FabricException(StringHelper.Format(StringResources.Error_PartitionPrimaryNotReady, "FailoverManager"), FabricErrorCode.NotReady);
                    }

                    UnreliableTransportBehavior behavior = new UnreliableTransportBehavior("*", "DoReconfiguration");
                    behavior.AddFilterForPartitionId(partitionId);
                    string behaviorName = "BlockDoReconfiguration";

                    await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => testContext.FabricClient.TestManager.AddUnreliableTransportBehaviorAsync(
                            fmPrimaryNodeName,
                            behaviorName,
                            behavior,
                            action.RequestTimeout,
                            cancellationToken),
                        helper.GetRemainingTime(),
                        cancellationToken).ConfigureAwait(false);

                    // TODO: Wait for some time so that the unreliable transport behavior can be read from the files.
                    // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successfully applied
                    await Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken).ConfigureAwait(false);

                    bool triedToRemovedBehavior = false;

                    try
                    {
                        var stableReplicasToRemove = replicasResult.Where(r => r.ReplicaStatus == ServiceReplicaStatus.Ready).ToArray();

                        ActionTraceSource.WriteInfo(TraceType, "Total number of replicas found {0}:{1}", replicasResult.Count(), stableReplicasToRemove.Count());

                        int replicasToRestartWithoutPrimary =
                            action.DataLossMode == DataLossMode.FullDataLoss
                                ? stableReplicasToRemove.Length - 1
                                : (stableReplicasToRemove.Length + 1) / 2 - 1;

                        foreach (var replica in stableReplicasToRemove)
                        {
                            var currentReplica = replica;
                            StatefulServiceReplica statefulReplica = currentReplica as StatefulServiceReplica;
                            ReleaseAssert.AssertIf(statefulReplica == null, "Service Replica is not of stateful type even though service is stateful");

                            ActionTraceSource.WriteInfo(
                                TraceType,
                                "Inspecting replica {0}:{1} with role {2} and status {3} to induce data loss",
                                currentReplica.Id,
                                partitionId,
                                statefulReplica.ReplicaRole,
                                statefulReplica.ReplicaStatus);

                            if (statefulReplica.ReplicaRole != ReplicaRole.Primary)
                            {
                                replicasToRestartWithoutPrimary--;
                            }

                            if (replicasToRestartWithoutPrimary >= 0 || statefulReplica.ReplicaRole == ReplicaRole.Primary)
                            {
                                ActionTraceSource.WriteInfo(TraceType, "Removing replica {0}:{1} to induce data loss", currentReplica.Id, partitionId);

                                await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                                    () => testContext.FabricClient.FaultManager.RemoveReplicaAsync(
                                        currentReplica.NodeName,
                                        partitionId,
                                        currentReplica.Id,
                                        CompletionMode.DoNotVerify,
                                        false, /*force remove*/
                                        action.RequestTimeout.TotalSeconds,
                                        cancellationToken),
                                    helper.GetRemainingTime(),
                                    cancellationToken).ConfigureAwait(false);
                            }
                        }

                        triedToRemovedBehavior = true;
                        await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                            () => testContext.FabricClient.TestManager.RemoveUnreliableTransportBehaviorAsync(
                                fmPrimaryNodeName,
                                behaviorName,
                                action.RequestTimeout,
                                cancellationToken),
                            FabricClientRetryErrors.RemoveUnreliableTransportBehaviorErrors.Value,
                            helper.GetRemainingTime(),
                            cancellationToken).ConfigureAwait(false);

                        // TODO: Wait for some time so that the removal of this unreliable transport behavior can be read from the files.
                        // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successully applied
                        await Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken).ConfigureAwait(false);

                        // retry check for whether data loss number has increased 5 times else do the entire process again
                        const int maxRetryCount = 5;
                        int       retryCount    = 0;
                        do
                        {
                            partitionsResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                                () => testContext.FabricClient.QueryManager.GetPartitionListAsync(
                                    action.PartitionSelector.ServiceName,
                                    null,
                                    action.RequestTimeout,
                                    cancellationToken),
                                FabricClientRetryErrors.GetPartitionListFabricErrors.Value,
                                helper.GetRemainingTime(),
                                cancellationToken).ConfigureAwait(false);

                            partitionFound = false;
                            foreach (StatefulServicePartition partition in partitionsResult)
                            {
                                if (partition.PartitionInformation.Id == partitionId)
                                {
                                    postDataLossNumber = partition.PrimaryEpoch.DataLossNumber;
                                    partitionFound     = true;
                                    break;
                                }
                            }

                            if (!partitionFound)
                            {
                                throw new FabricException(StringHelper.Format(StringResources.Error_PartitionNotFound), FabricErrorCode.PartitionNotFound);
                            }

                            ActionTraceSource.WriteInfo(
                                TraceType,
                                "Checking data loss numbers for partition {0} with retryCount {1}. Current numbers {2}:{3}",
                                partitionId,
                                retryCount,
                                preDataLossNumber,
                                postDataLossNumber);

                            if (postDataLossNumber != preDataLossNumber)
                            {
                                break;
                            }

                            await AsyncWaiter.WaitAsync(TimeSpan.FromSeconds(5), cancellationToken);

                            ++retryCount;
                        } while (retryCount < maxRetryCount);
                    }
                    finally
                    {
                        if (!triedToRemovedBehavior)
                        {
                            ActionTraceSource.WriteWarning(TraceType, "Exception after adding behavior to block messages. Removing behavior synchronously");
                            FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                                () => testContext.FabricClient.TestManager.RemoveUnreliableTransportBehaviorAsync(
                                    fmPrimaryNodeName,
                                    behaviorName,
                                    action.RequestTimeout,
                                    cancellationToken),
                                FabricClientRetryErrors.RemoveUnreliableTransportBehaviorErrors.Value,
                                helper.GetRemainingTime(),
                                cancellationToken).GetAwaiter().GetResult();

                            // TODO: Wait for some time so that the removal of this unreliable transport behavior can be read from the files.
                            // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successully applied
                            Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken).GetAwaiter().GetResult();
                        }
                    }
                }while (postDataLossNumber == preDataLossNumber);

                ActionTraceSource.WriteInfo(
                    TraceType,
                    "InvokeDataLossAction action completed postDataLossNumber:{0}, preDataLossNumber:{1}",
                    postDataLossNumber, preDataLossNumber);

                action.Result          = new InvokeDataLossResult(getPartitionStateAction.Result);
                this.ResultTraceString = StringHelper.Format("InvokeDataLossAction succeeded for {0} with DatalossMode = {1}", partitionId, action.DataLossMode);
            }
            protected override async Task ExecuteActionAsync(FabricTestContext testContext, RemoveReplicaAction action, CancellationToken cancellationToken)
            {
                TimeoutHelper helper = new TimeoutHelper(action.ActionTimeout);

                string          nodeName              = action.NodeName;
                Guid?           partitionId           = action.PartitionId;
                long?           replicaId             = action.ReplicaId;
                SelectedReplica replicaSelectorResult = SelectedReplica.None;

                if (string.IsNullOrEmpty(nodeName) ||
                    !partitionId.HasValue ||
                    !replicaId.HasValue)
                {
                    ThrowIf.Null(action.ReplicaSelector, "ReplicaSelector");

                    var getReplicaStateAction = new GetSelectedReplicaStateAction(action.ReplicaSelector)
                    {
                        RequestTimeout = action.RequestTimeout,
                        ActionTimeout  = helper.GetRemainingTime()
                    };

                    await testContext.ActionExecutor.RunAsync(getReplicaStateAction, cancellationToken).ConfigureAwait(false);

                    var replicaStateActionResult = getReplicaStateAction.Result;
                    ReleaseAssert.AssertIf(replicaStateActionResult == null, "replicaStateActionResult cannot be null");
                    replicaSelectorResult = replicaStateActionResult.Item1;

                    partitionId = replicaStateActionResult.Item1.SelectedPartition.PartitionId;

                    Replica replicaStateResult = replicaStateActionResult.Item2;
                    ReleaseAssert.AssertIf(replicaStateResult == null, "replicaStateResult cannot be null");

                    nodeName  = replicaStateResult.NodeName;
                    replicaId = replicaStateResult.Id;
                }

                ThrowIf.IsFalse(partitionId.HasValue, "PartitionID");
                ThrowIf.IsFalse(replicaId.HasValue, "ReplicaID");

                bool forceRemove = action.ForceRemove;

                await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.ServiceManager.RemoveReplicaAsync(
                        nodeName,
                        partitionId.Value,
                        replicaId.Value,
                        forceRemove,
                        action.RequestTimeout,
                        cancellationToken),
                    FabricClientRetryErrors.RemoveReplicaErrors.Value,
                    helper.GetRemainingTime(),
                    cancellationToken);

                if (action.CompletionMode == CompletionMode.Verify)
                {
                    // Check that replica on selected node has been removed i.e. the replica id does not exist anymore.
                    bool success = false;
                    while (helper.GetRemainingTime() > TimeSpan.Zero)
                    {
                        var replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                            () => testContext.FabricClient.QueryManager.GetReplicaListAsync(
                                partitionId.Value,
                                replicaId.Value,
                                action.RequestTimeout,
                                cancellationToken),
                            helper.GetRemainingTime(),
                            cancellationToken).ConfigureAwait(false);

                        bool dropped = replicasResult.Count == 0;
                        if (!dropped)
                        {
                            // Since we added a replica filter the result should contain the replica or none
                            ReleaseAssert.AssertIf(replicasResult.Count > 1, "More than 1 replica returned with replica filter {0}:{1}", partitionId.Value, replicaId.Value);
                            ReleaseAssert.AssertIf(replicasResult[0].Id != replicaId, "Incorrect replica Id {0} returned by query instead of {1}", replicasResult[0].Id, replicaId);
                            dropped = replicasResult[0].ReplicaStatus == ServiceReplicaStatus.Dropped;
                        }

                        if (dropped)
                        {
                            success = true;
                            break;
                        }

                        ActionTraceSource.WriteInfo(TraceSource, "Replica = {0}:{1} not yet completely removed. Retrying...", partitionId.Value, replicaId.Value);
                        await AsyncWaiter.WaitAsync(TimeSpan.FromSeconds(5), cancellationToken);
                    }

                    if (!success)
                    {
                        throw new TimeoutException(StringHelper.Format(StringResources.Error_TestabilityActionTimeout,
                                                                       "RemoveReplica",
                                                                       StringHelper.Format("{0}:{1}", partitionId.Value, replicaId.Value)));
                    }
                }

                action.Result     = new RemoveReplicaResult(replicaSelectorResult);
                ResultTraceString = StringHelper.Format(
                    "RemoveReplicaOrInstance succeeded by removing replica {0}:{1} on node {2} with CompletionMode {3}",
                    partitionId.Value,
                    replicaId.Value,
                    nodeName,
                    action.CompletionMode);
            }
            protected override async Task ExecuteActionAsync(FabricTestContext testContext, RestartDeployedCodePackageAction action, CancellationToken cancellationToken)
            {
                this.helper = new TimeoutHelper(action.ActionTimeout);

                string          nodeName                   = action.NodeName;
                Uri             applicationName            = action.ApplicationName;
                string          serviceManifestName        = action.ServiceManifestName;
                string          servicePackageActivationId = action.ServicePackageActivationId;
                string          codePackageName            = action.CodePackageName;
                SelectedReplica replicaSelectorResult      = SelectedReplica.None;

                ThrowIf.Null(applicationName, "ApplicationName");

                if (string.IsNullOrEmpty(nodeName) ||
                    string.IsNullOrEmpty(serviceManifestName) ||
                    string.IsNullOrEmpty(codePackageName))
                {
                    ThrowIf.Null(action.ReplicaSelector, "ReplicaSelector");

                    var getReplicaStateAction = new GetSelectedReplicaStateAction(action.ReplicaSelector)
                    {
                        RequestTimeout = action.RequestTimeout,
                        ActionTimeout  = this.helper.GetRemainingTime()
                    };

                    await testContext.ActionExecutor.RunAsync(getReplicaStateAction, cancellationToken).ConfigureAwait(false);

                    var replicaStateActionResult = getReplicaStateAction.Result;
                    ReleaseAssert.AssertIf(replicaStateActionResult == null, "replicaStateActionResult cannot be null");
                    replicaSelectorResult = replicaStateActionResult.Item1;
                    ReleaseAssert.AssertIf(replicaSelectorResult == null || replicaSelectorResult.SelectedPartition == null,
                                           "replicaSelectorResult cannot be null or for a non-null replicaSelectorResult, the selected partition must be non-null");
                    Guid partitionId = replicaStateActionResult.Item1.SelectedPartition.PartitionId;

                    Replica replicaStateResult = replicaStateActionResult.Item2;
                    ReleaseAssert.AssertIf(replicaStateResult == null, "replicaStateResult cannot be null");

                    nodeName = replicaStateResult.NodeName;

                    var deployedReplicaListResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync <DeployedServiceReplicaList>(
                        () => testContext.FabricClient.QueryManager.GetDeployedReplicaListAsync(
                            nodeName,
                            applicationName,
                            null,
                            partitionId,
                            action.RequestTimeout,
                            cancellationToken),
                        this.helper.GetRemainingTime(),
                        cancellationToken).ConfigureAwait(false);

                    DeployedServiceReplica selectedReplica = deployedReplicaListResult.FirstOrDefault(r => r.Partitionid == partitionId);
                    if (selectedReplica == null)
                    {
                        throw new FabricException(
                                  StringHelper.Format(StringResources.Error_DidNotFindDeployedReplicaOnNode, partitionId, nodeName),
                                  FabricErrorCode.ReplicaDoesNotExist);
                    }

                    serviceManifestName        = selectedReplica.ServiceManifestName;
                    servicePackageActivationId = selectedReplica.ServicePackageActivationId;
                    codePackageName            = selectedReplica.CodePackageName;
                }

                ActionTraceSource.WriteInfo(TraceSource, "SelectedReplica: serviceManifestName: {0}, servicePackageActivationId: {1}, codePackageName: {2}", serviceManifestName, servicePackageActivationId, codePackageName);

                DeployedCodePackage deployedCodePackageListResult = await this.GetCodePackageInfoAsync(testContext, nodeName, applicationName, serviceManifestName, servicePackageActivationId, codePackageName, action, cancellationToken).ConfigureAwait(false);

                var codepackageEntrypointToRestart = GetCodepackageEntrypointToRestart(action, deployedCodePackageListResult);

                await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.FaultManager.RestartDeployedCodePackageUsingNodeNameAsync(
                        nodeName,
                        applicationName,
                        serviceManifestName,
                        servicePackageActivationId,
                        codePackageName,
                        codepackageEntrypointToRestart.EntryPoint.CodePackageInstanceId,
                        action.RequestTimeout,
                        cancellationToken),
                    this.helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                if (action.CompletionMode == CompletionMode.Verify)
                {
                    bool success = false;
                    while (this.helper.GetRemainingTime() > TimeSpan.Zero)
                    {
                        var deployedCodePackageListResultAfterRestart = await this.GetCodePackageInfoAsync(testContext, nodeName, applicationName, serviceManifestName, servicePackageActivationId, codePackageName, action, cancellationToken).ConfigureAwait(false);

                        if (deployedCodePackageListResultAfterRestart != null)
                        {
                            var entryPointAfterRestart = codepackageEntrypointToRestart.EntryPointType == EntryPointType.Main ? deployedCodePackageListResultAfterRestart.EntryPoint : deployedCodePackageListResultAfterRestart.SetupEntryPoint;
                            if (entryPointAfterRestart != null && entryPointAfterRestart.CodePackageInstanceId > codepackageEntrypointToRestart.EntryPoint.CodePackageInstanceId && entryPointAfterRestart.EntryPointStatus == EntryPointStatus.Started)
                            {
                                success = true;
                                break;
                            }
                        }

                        ActionTraceSource.WriteInfo(TraceSource, "CodePackage = {0}:{1}:{2} not yet restarted. Retrying...", nodeName, applicationName, codePackageName);
                        await AsyncWaiter.WaitAsync(TimeSpan.FromSeconds(5), cancellationToken).ConfigureAwait(false);
                    }

                    if (!success)
                    {
                        throw new TimeoutException(StringHelper.Format(StringResources.Error_TestabilityActionTimeout,
                                                                       "RestartDeployedCodePackage",
                                                                       applicationName));
                    }
                }

                action.Result = new RestartDeployedCodePackageResult(
                    nodeName,
                    applicationName,
                    serviceManifestName,
                    servicePackageActivationId,
                    codePackageName,
                    codepackageEntrypointToRestart.EntryPoint.CodePackageInstanceId,
                    replicaSelectorResult);

                ResultTraceString = StringHelper.Format("RestartCodePackageAction succeeded for {0}:{1}:{2} with CompletionMode = {3}", nodeName, applicationName, codePackageName, action.CompletionMode);
            }
Пример #8
0
            protected override async Task ExecuteActionAsync(FabricTestContext testContext, RestartNodeAction action, CancellationToken cancellationToken)
            {
                ActionTraceSource.WriteInfo(TraceSource, "Enter RestartNodeAction/ExecuteActionAsync: operationTimeout='{0}', requestTimeout='{1}'", action.ActionTimeout, action.RequestTimeout);

                this.helper = new TimeoutHelper(action.ActionTimeout);
                SelectedReplica selectedReplica  = SelectedReplica.None;
                string          nodeName         = action.NodeName;
                BigInteger      nodeInstance     = action.NodeInstance;
                bool            createFabricDump = action.CreateFabricDump;

                if (string.IsNullOrEmpty(nodeName))
                {
                    ThrowIf.Null(action.ReplicaSelector, "ReplicaSelector");

                    var getReplicaStateAction = new GetSelectedReplicaStateAction(action.ReplicaSelector)
                    {
                        RequestTimeout = action.RequestTimeout,
                        ActionTimeout  = helper.GetRemainingTime()
                    };

                    await testContext.ActionExecutor.RunAsync(getReplicaStateAction, cancellationToken).ConfigureAwait(false);

                    var replicaStateActionResult = getReplicaStateAction.Result;
                    ReleaseAssert.AssertIf(replicaStateActionResult == null, "replicaStateActionResult cannot be null");
                    selectedReplica = replicaStateActionResult.Item1;
                    Replica replicaStateResult = replicaStateActionResult.Item2;
                    ReleaseAssert.AssertIf(replicaStateResult == null, "replicaStateResult cannot be null");

                    nodeName     = replicaStateResult.NodeName;
                    nodeInstance = BigInteger.MinusOne;
                }

                if (nodeInstance == BigInteger.MinusOne)
                {
                    var nodeInfo = await this.GetCurrentNodeInfoAsync(testContext, nodeName, action, cancellationToken).ConfigureAwait(false);

                    nodeInstance = nodeInfo.NodeInstanceId;
                }

                await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.FaultManager.RestartNodeUsingNodeNameAsync(
                        nodeName,
                        nodeInstance,
                        createFabricDump,
                        action.RequestTimeout,
                        cancellationToken),
                    this.helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                if (action.CompletionMode == CompletionMode.Verify)
                {
                    bool success = false;
                    while (this.helper.GetRemainingTime() > TimeSpan.Zero)
                    {
                        var nodeInfo = await this.GetCurrentNodeInfoAsync(testContext, nodeName, action, cancellationToken).ConfigureAwait(false);

                        if (nodeInfo.NodeInstanceId > nodeInstance && nodeInfo.IsNodeUp)
                        {
                            success = true;
                            break;
                        }

                        ActionTraceSource.WriteInfo(TraceSource, "NodeName = {0} not yet restarted. '{1}' seconds remain. Retrying...", nodeName, this.helper.GetRemainingTime().TotalSeconds);
                        await AsyncWaiter.WaitAsync(TimeSpan.FromSeconds(5), cancellationToken);
                    }

                    if (!success)
                    {
                        throw new TimeoutException(StringHelper.Format(StringResources.Error_TestabilityActionTimeout,
                                                                       "RestartNode",
                                                                       nodeName));
                    }
                }

                // create result
                action.Result = new RestartNodeResult(selectedReplica, new NodeResult(nodeName, nodeInstance));

                ResultTraceString = StringHelper.Format("RestartNodeAction succeeded for {0}:{1} with CompletionMode = {2}", nodeName, nodeInstance, action.CompletionMode);
            }
Пример #9
0
            protected override async Task ExecuteActionAsync(FabricTestContext testContext, MovePrimaryAction action, CancellationToken cancellationToken)
            {
                ThrowIf.Null(action.PartitionSelector, "PartitionSelector");

                this.helper = new TimeoutHelper(action.ActionTimeout);

                string newPrimaryNodeName = action.NodeName;

                var getPartitionStateAction = new GetSelectedPartitionStateAction(action.PartitionSelector)
                {
                    RequestTimeout = action.RequestTimeout,
                    ActionTimeout  = this.helper.GetRemainingTime()
                };

                await testContext.ActionExecutor.RunAsync(getPartitionStateAction, cancellationToken);

                Guid partitionId = getPartitionStateAction.Result.PartitionId;

                if (!action.IgnoreConstraints)
                {
                    // select random node where replica's primary not present
                    var nodesInfo = await testContext.FabricCluster.GetLatestNodeInfoAsync(action.RequestTimeout, this.helper.GetRemainingTime(), cancellationToken);

                    if ((nodesInfo == null || nodesInfo.Count() == 0))
                    {
                        throw new InvalidOperationException(StringHelper.Format(StringResources.Error_NotEnoughNodesForTestabilityAction, "MovePrimary"));
                    }

                    ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => testContext.FabricClient.QueryManager.GetReplicaListAsync(
                            partitionId,
                            0,
                            action.RequestTimeout,
                            cancellationToken),
                        this.helper.GetRemainingTime(),
                        cancellationToken).ConfigureAwait(false);

                    NodeInfo currentPrimaryNodeInfo = null;
                    string   currentPrimaryNodeName = string.Empty;
                    foreach (var replica in replicasResult)
                    {
                        StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica;
                        if (statefulReplica == null)
                        {
                            throw new InvalidOperationException(StringHelper.Format(StringResources.Error_InvalidServiceTypeTestability, "MovePrimary", "Stateful", action.PartitionSelector.ServiceName, "Stateless"));
                        }

                        if (statefulReplica.ReplicaRole == ReplicaRole.Primary)
                        {
                            currentPrimaryNodeInfo = nodesInfo.FirstOrDefault(n => n.NodeName == statefulReplica.NodeName);
                            if (!string.IsNullOrEmpty(newPrimaryNodeName) && newPrimaryNodeName == statefulReplica.NodeName)
                            {
                                throw new FabricException(
                                          StringHelper.Format(StringResources.Error_InvalidNodeNameProvided, newPrimaryNodeName, "MovePrimary", "Primary already exists on node"),
                                          FabricErrorCode.AlreadyPrimaryReplica);
                            }

                            break;
                        }
                    }

                    if (currentPrimaryNodeInfo == null)
                    {
                        throw new FabricException(StringHelper.Format(StringResources.Error_PartitionPrimaryNotReady, action.PartitionSelector + ":" + partitionId), FabricErrorCode.NotReady);
                    }

                    currentPrimaryNodeName = currentPrimaryNodeInfo.NodeName;

                    if (newPrimaryNodeName == currentPrimaryNodeName)
                    {
                        throw new FabricException(
                                  StringHelper.Format(StringResources.Error_InvalidNodeNameProvided, newPrimaryNodeName, "MovePrimary", "Primary already exists on node"),
                                  FabricErrorCode.AlreadyPrimaryReplica);
                    }
                }

                ActionTraceSource.WriteInfo(TraceSource, "Calling move primary with node {0}, partition {1}", string.IsNullOrEmpty(newPrimaryNodeName) ? "Random" : newPrimaryNodeName, partitionId);
                await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.FaultManager.MovePrimaryUsingNodeNameAsync(
                        newPrimaryNodeName,
                        getPartitionStateAction.Result.ServiceName,
                        partitionId,
                        action.IgnoreConstraints,
                        action.RequestTimeout,
                        cancellationToken),
                    FabricClientRetryErrors.MovePrimaryFabricErrors.Value,
                    this.helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                action.Result = new MovePrimaryResult(newPrimaryNodeName, getPartitionStateAction.Result);

                ResultTraceString = StringHelper.Format("MovePrimaryAction succeeded for moving Primary for {0}  to node  {1}.", partitionId, newPrimaryNodeName);
            }