Ejemplo n.º 1
0
        internal static async Task <Node> GetNodeWithFASSecondary()
        {
            NodeList           nodeList = ActionTest.GetNodeListAsync().Result;
            ServiceReplicaList list     = null;
            FabricClient       fc       = new FabricClient();

            System.Fabric.Common.TimeoutHelper timeoutHelper = new System.Fabric.Common.TimeoutHelper(TimeSpan.FromMinutes(2));

            do
            {
                try
                {
                    list = await fc.QueryManager.GetReplicaListAsync(new Guid("00000000-0000-0000-0000-000000005000"));
                }
                catch (Exception)
                {
                    Task.Delay(TimeSpan.FromSeconds(1)).Wait();
                }
            }while (list == null && timeoutHelper.GetRemainingTime() > TimeSpan.Zero);

            if (list == null)
            {
                throw new InvalidOperationException("Could not resolve FAS primary");
            }

            Replica replica = list.Where(r => ((StatefulServiceReplica)r).ReplicaRole == ReplicaRole.ActiveSecondary).FirstOrDefault();

            return(nodeList.Where(n => n.NodeName == replica.NodeName).FirstOrDefault());
        }
Ejemplo n.º 2
0
        private void WaitForActionCount(long targetCount)
        {
            long count = 0;

            System.Fabric.Common.TimeoutHelper timeoutHelper = new System.Fabric.Common.TimeoutHelper(TimeSpan.FromMinutes(3));

            do
            {
                count = this.actionStore.GetActionCountAsync(false).GetAwaiter().GetResult();
                TestabilityTrace.TraceSource.WriteInfo(TraceType, "Current action count='{0}', target action count='{1}'", count, targetCount);
                if (count == targetCount)
                {
                    break;
                }

                Task.Delay(TimeSpan.FromSeconds(5)).Wait();
            }while (count != targetCount && timeoutHelper.GetRemainingTime() > TimeSpan.Zero);

            if (count != targetCount)
            {
                string error = string.Format(CultureInfo.InvariantCulture, "Did not reach expected target action count='{0}', current action count='{1}'", targetCount, count);
                TestabilityTrace.TraceSource.WriteError(TraceType, error);
                System.Fabric.Common.ReleaseAssert.Failfast(error);
            }
        }
Ejemplo n.º 3
0
        internal static async Task <bool> IsUriReachableAsync(
            Uri uri,
            string requestMethod     = DMConstants.HttpMethodHead,
            int operationTimeoutInMs = DMConstants.UriReachableTimeoutInMs,
            int requestTimeoutInMs   = DMConstants.UriRequestTimeoutInMs,
            int retryIntervalInMs    = DMConstants.UriReachableRetryIntervalInMs)
        {
            ReleaseAssert.AssertIf(uri == null, "uri cannot be null for IsUriReachableAsync.");
            if (uri.IsFile)
            {
                FileInfo fi = new FileInfo(uri.LocalPath);
                return(fi.Exists);
            }
            else
            {
                if (string.IsNullOrWhiteSpace(uri.Host))
                {
                    return(false);
                }

                var timeout = new System.Fabric.Common.TimeoutHelper(TimeSpan.FromMilliseconds(operationTimeoutInMs));
                while (!System.Fabric.Common.TimeoutHelper.HasExpired(timeout))
                {
                    WebRequest request = WebRequest.Create(uri);
#if !DotNetCoreClrLinux
                    request.Timeout = requestTimeoutInMs;
#endif
                    request.Method = requestMethod;
                    try
                    {
                        using (WebResponse response = await request.GetResponseAsync().ConfigureAwait(false))
                        {
                            if (response is HttpWebResponse)
                            {
                                if (((HttpWebResponse)response).StatusCode == HttpStatusCode.OK)
                                {
                                    return(true);
                                }

                                return(false);
                            }
                            else
                            {
                                return(response.ContentLength > 0);
                            }
                        }
                    }
                    catch (WebException ex)
                    {
                        SFDeployerTrace.WriteNoise(StringResources.Error_SFUriUnreachable_Formatted, uri, ex.Message);
                    }

                    System.Threading.Thread.Sleep(retryIntervalInMs);
                }
            }

            return(false);
        }
Ejemplo n.º 4
0
        // This doesn't run in automation, but it is being kept here so it can be run as a small test.
        // See FaultAnalysisServiceTruncate.test for a test on this code path.
        private void TestTruncate()
        {
            this.StartActionIfItHasNotBeenStarted(Command.StuckAction);
            this.StartActionIfItHasNotBeenStarted(Command.FailoverManagerDataLoss);
            this.StartActionIfItHasNotBeenStarted(Command.InvokeDataLossMidActionTestFatal);
            this.StartActionIfItHasNotBeenStarted(Command.InvokeDataLossMidActionTestTransient);

            this.StartActionIfItHasNotBeenStarted(Command.RestartPartitionMidActionTestFatal);
            this.StartActionIfItHasNotBeenStarted(Command.RestartPartitionMidActionTestTransient);

            this.WaitForActionCount(FASConstants.TestMaxStoredActionCountValue);

            // Confirm this action is still stuck - ie that an action not in terminal state is not removed
            this.mockClient.WaitForState(MockClient.MockClientCommandInfo[Command.StuckAction], Actions.Steps.StepStateNames.LookingUpState);
            this.mockClient.WaitForState(MockClient.MockClientCommandInfo[Command.RestartPartitionMidActionTestTransient], Actions.Steps.StepStateNames.CompletedSuccessfully);

            // At this point there should be 1 command in the actionTable, StuckAction, and somewhere between 2 (Constants.TestMaxStoredActionCountValue) and 5 (the total number of possible
            // completed commands) commands in the historyTable.  In steady state, after truncates have run, the historyTable should have 2 (Constants.TestMaxStoredActionCountValue) commands remaining,
            // and they should be the ones that completed last.  Since this test only allows 1 action at a time, this will always be the 2 that were started last -
            // the RestartPartition ones.
            bool conditionSatisfied = false;
            var  timeoutHelper      = new System.Fabric.Common.TimeoutHelper(TimeSpan.FromSeconds(3 * FASConstants.TestStoredActionCleanupIntervalInSeconds));

            do
            {
                TestCommandListDescription queryDescription = new TestCommandListDescription(Query.TestCommandStateFilter.CompletedSuccessfully, Query.TestCommandTypeFilter.PartitionRestart);
                TestCommandQueryResult     queryResult      = this.mockClient.GetTestCommandListAsync(queryDescription).GetAwaiter().GetResult();
                List <TestCommandStatus>   result           = queryResult.Items;

                if (result.Count < FASConstants.TestMaxStoredActionCountValue)
                {
                    string error = string.Format(
                        CultureInfo.InvariantCulture,
                        "Number of commands in the historyTable {0} is below TestMaxStoredActionCountValue (config 'DefaultMaxStoredActionCount')",
                        result.Count);
                    TestabilityTrace.TraceSource.WriteError(TraceType, error);
                    System.Fabric.Common.ReleaseAssert.Failfast(error);
                }

                if (result.Where(c => c.TestCommandType == TestCommandType.PartitionRestart).Count() != FASConstants.TestMaxStoredActionCountValue)
                {
                    TestabilityTrace.TraceSource.WriteInfo(TraceType, "Number of PartitionRestart results is {0}, expecting {1}, retrying", result.Count, FASConstants.TestMaxStoredActionCountValue);
                    continue;
                }

                conditionSatisfied = true;
            }while (!conditionSatisfied && timeoutHelper.GetRemainingTime() > TimeSpan.Zero);

            System.Fabric.Common.ReleaseAssert.Failfast(string.Format(CultureInfo.InvariantCulture, "Did not reach expected target action, see traces above filtered by type~ActionTest'"));

            TestabilityTrace.TraceSource.WriteInfo(TraceType, "Exiting TestTruncate");
        }
        private async Task PollAddedNodesActivatedAsync(List <NodeDescription> addedNodes)
        {
            System.Fabric.Common.TimeoutHelper timeoutHelper = new System.Fabric.Common.TimeoutHelper(Constants.FabricPollActivationNodesTimeoutInMinutes);
            bool isActivationComplete = true;

            while (!System.Fabric.Common.TimeoutHelper.HasExpired(timeoutHelper) && !this.cancellationToken.IsCancellationRequested)
            {
                UpgradeOrchestrationTrace.TraceSource.WriteInfo(TraceType, "Checking activation status of added nodes.");
                try
                {
                    System.Fabric.Query.NodeList nodes = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () =>
                        this.fabricClient.QueryManager.GetNodeListAsync(
                            null,
                            Constants.FabricQueryTimeoutInMinutes,
                            this.cancellationToken),
                        Constants.FabricQueryRetryTimeoutInMinutes).ConfigureAwait(false);

                    isActivationComplete = true;
                    for (int i = 0; i < nodes.Count; ++i)
                    {
                        if (addedNodes.Any(addedNode => addedNode.NodeName == nodes[i].NodeName) &&
                            nodes[i].NodeStatus != System.Fabric.Query.NodeStatus.Up)
                        {
                            UpgradeOrchestrationTrace.TraceSource.WriteInfo(TraceType, "Node {0} has not been activated", nodes[i].IpAddressOrFQDN);
                            isActivationComplete = false;
                            break;
                        }
                    }

                    if (isActivationComplete)
                    {
                        UpgradeOrchestrationTrace.TraceSource.WriteInfo(TraceType, "All nodes to be added are up. Continuing with upgrade");
                        break;
                    }
                    else
                    {
                        UpgradeOrchestrationTrace.TraceSource.WriteInfo(TraceType, "Waiting for five seconds before polling activation status again.");
                    }

                    await Task.Delay(TimeSpan.FromSeconds(5), this.cancellationToken).ConfigureAwait(false);
                }
                catch (FabricTransientException fte)
                {
                    UpgradeOrchestrationTrace.TraceSource.WriteError(TraceType, "Retrying Polling Activated Node because of retryable exception {0}", fte);
                }
            }

            timeoutHelper.ThrowIfExpired();
        }
Ejemplo n.º 6
0
        internal static async Task <string> GetContentsFromUriAsyncWithRetry(Uri uri, TimeSpan retryInterval, TimeSpan operationTimeout, CancellationToken cancellationToken)
        {
            string downloadedContent = null;
            var    timeoutHelper     = new System.Fabric.Common.TimeoutHelper(operationTimeout);

            while (!System.Fabric.Common.TimeoutHelper.HasExpired(timeoutHelper))
            {
                cancellationToken.ThrowIfCancellationRequested();

                try
                {
                    if (uri.IsFile)
                    {
                        downloadedContent = File.ReadAllText(uri.LocalPath);
                    }
                    else
                    {
                        using (var wc = new WebClient())
                        {
                            downloadedContent = await wc.DownloadStringTaskAsync(uri).ConfigureAwait(false);
                        }
                    }

                    return(downloadedContent);
                }
                catch (Exception e)
                {
                    SFDeployerTrace.WriteWarning(StringResources.Error_SFUriNotDownloaded, uri, e.ToString());
                }

                await Task.Delay(retryInterval, cancellationToken).ConfigureAwait(false);
            }

            SFDeployerTrace.WriteError(StringResources.Error_SFTimedOut, operationTimeout);
            throw new FabricValidationException(string.Format(StringResources.Error_SFTimedOut, operationTimeout), FabricErrorCode.OperationCanceled);
        }
Ejemplo n.º 7
0
        internal async Task ExecuteIterationsWithPauseAsync()
        {
            TestabilityTrace.TraceSource.WriteNoise("StartTrek", "Enter ExecuteIterationsWithPauseAsync, datetimeutc={0}", DateTime.UtcNow);

            this.stopwatch = new Stopwatch();
            this.stopwatch.Start();

            Exception capturedException = null;

            TimeSpan waitTime = this.ChaosParameters.WaitTimeBetweenIterations;

            while (!this.cancellationToken.IsCancellationRequested)
            {
                // If this is not the beginning of a fresh Chaos run, before starting a new iteration,
                // consult the NextIterationTimeStampRD to find out if there is some
                // residual wait time from the previous iteration, if there is then wait that amount
                var nextIterationTimeStampUtc = await this.StateManager.GetUtcTimeStampAsync(
                    FASConstants.NextItearationTimeStampRDName,
                    FASConstants.NextItearationTimeStampKey,
                    this.partition,
                    this.cancellationToken).ConfigureAwait(false);

                var residualWaitTime = nextIterationTimeStampUtc.Subtract(DateTime.UtcNow);

                if (residualWaitTime > TimeSpan.Zero)
                {
                    await this.StateManager.RegisterChaosEventAndUpdateChaosStatusAsync(
                        new WaitingEvent(DateTime.UtcNow, StringHelper.Format(StringResources.ChaosInfo_ResidualWaitingFromPreviousIteration, residualWaitTime)),
                        ChaosStatus.Running,
                        this.partition,
                        this.cancellationToken,
                        () =>
                    {
                        TestabilityTrace.TraceSource.WriteInfo(TraceType, "Registering WaitingEvent for waiting '{0}' left over from the previous iteration.", residualWaitTime);
                    }).ConfigureAwait(false);

                    await Task.Delay(residualWaitTime, this.cancellationToken).ConfigureAwait(false);
                }

                try
                {
                    if (await this.IsClusterReadyForFaultsAsync(this.cancellationToken).ConfigureAwait(false))
                    {
                        System.Fabric.Common.TimeoutHelper timer = new System.Fabric.Common.TimeoutHelper(this.ChaosParameters.MaxClusterStabilizationTimeout);

                        StringBuilder validationReport = new StringBuilder();

                        var clusterReport =
                            await
                            this.validationHelper.ValidateClusterHealthAsync(
                                timer.GetRemainingTime(),
                                this.cancellationToken).ConfigureAwait(false);

                        if (clusterReport.ValidationFailed)
                        {
                            // quadratic with an upper bound of DefaultMaximumBackoffForChaosIterations
                            waitTime += (waitTime >= FASConstants.DefaultMaximumBackoffForChaosIterations) ? TimeSpan.Zero : waitTime;

                            var serviceReport =
                                await
                                this.validationHelper.ValidateAllServicesAsync(
                                    timer.GetRemainingTime(),
                                    this.cancellationToken).ConfigureAwait(false);

                            if (serviceReport.ValidationFailed)
                            {
                                TestabilityTrace.TraceSource.WriteInfo(TraceType, "Even though some services are unhealthy or unstable, going to induce faults, because the cluster is healthy.");

                                TestabilityTrace.TraceSource.WriteInfo(TraceType, "Failure reason: \n'{0}'", serviceReport.FailureReason);

                                validationReport.Append(serviceReport.FailureReason);
                            }

                            StringBuilder reportBuilder = new StringBuilder();
                            reportBuilder.Append(StringHelper.Format(StringResources.ChaosInfo_WaitingNotice, waitTime));
                            reportBuilder.AppendLine();
                            reportBuilder.AppendLine(clusterReport.FailureReason);

                            validationReport.Insert(0, reportBuilder.ToString());

                            TestabilityTrace.TraceSource.WriteInfo(TraceType, "Cluster validation failed for '{0}'.", clusterReport.FailureReason);

                            var validationFailedEvent = new ValidationFailedEvent(DateTime.UtcNow, validationReport.ToString());

                            // record validation failed event
                            await this.StateManager.RegisterChaosEventAndUpdateChaosStatusAsync(
                                validationFailedEvent,
                                ChaosStatus.Running,
                                this.partition,
                                this.cancellationToken,
                                () =>
                            {
                                FabricEvents.Events.ChaosValidationFailed(
                                    Guid.NewGuid().ToString(),
                                    validationFailedEvent.TimeStampUtc.Ticks,
                                    validationFailedEvent.Reason);
                            }).ConfigureAwait(false);

                            TestabilityTrace.TraceSource.WriteInfo(TraceType, "Pausing for '{0}' before performing next check.", waitTime);
                        }
                        else
                        {
                            waitTime = this.ChaosParameters.WaitTimeBetweenIterations;

                            await this.StateManager.RegisterCurrentStatusAsync(
                                ChaosStatus.Running,
                                this.partition,
                                this.cancellationToken).ConfigureAwait(false);

                            var timestampOfNextIteration = DateTime.UtcNow.Add(waitTime);

                            await this.StateManager.SetUtcTimeStampAsync(
                                FASConstants.NextItearationTimeStampRDName,
                                FASConstants.NextItearationTimeStampKey,
                                timestampOfNextIteration,
                                this.partition,
                                this.cancellationToken).ConfigureAwait(false);

                            await this.ExecuteFaultIterationAsync(this.cancellationToken).ConfigureAwait(false);

                            TestabilityTrace.TraceSource.WriteInfo(TraceType, "Pausing for '{0}' before executing next iteration.", waitTime);
                        }
                    }
                }
                catch (Exception exception)
                {
                    capturedException = exception;

                    // quadratic with an upper bound of DefaultMaximumBackoffForChaosIterations
                    waitTime += (waitTime >= FASConstants.DefaultMaximumBackoffForChaosIterations) ? TimeSpan.Zero : waitTime;

                    var timestampOfNextIteration = DateTime.UtcNow.Add(waitTime);

                    await this.StateManager.SetUtcTimeStampAsync(
                        FASConstants.NextItearationTimeStampRDName,
                        FASConstants.NextItearationTimeStampKey,
                        timestampOfNextIteration,
                        this.partition,
                        this.cancellationToken).ConfigureAwait(false);

                    TestabilityTrace.TraceSource.WriteInfo(TraceType, "Exception occurred in the ChaosTestScenario loop: {0}, cancellationreq: {1}", capturedException, this.cancellationToken.IsCancellationRequested);
                }

                if (capturedException != null)
                {
                    var ae = capturedException as AggregateException;
                    if (ae != null)
                    {
                        capturedException = ae.Flatten().InnerException;
                    }

                    if (!ChaosUtil.IsExpectedException(capturedException))
                    {
                        string testErrorEventMessage = capturedException.Message;

                        if (capturedException is FabricChaosEngineException)
                        {
                            testErrorEventMessage = StringResources.ChaosError_UnexpectedInChaosEngine;
                        }

                        await this.StateManager.RegisterChaosEventAndUpdateChaosStatusAsync(
                            new TestErrorEvent(DateTime.UtcNow, testErrorEventMessage),
                            ChaosStatus.Running,
                            this.partition,
                            this.cancellationToken).ConfigureAwait(false);

                        TestabilityTrace.TraceSource.WriteInfo(TraceType, "Unexpected exception '{0}' was turned into TestErrorEvent.", capturedException);
                    }

                    // handled the exception, now clear it
                    capturedException = null;
                }

                if (this.testMode && (this.ChaosParameters.Context != null && this.ChaosParameters.Context.ContainsKey(ChaosConstants.FailoverAfterChaosFaultsKey)))
                {
                    this.partition.ReportFault(FaultType.Transient);
                }

                await this.StateManager.RegisterChaosEventAndUpdateChaosStatusAsync(
                    new WaitingEvent(DateTime.UtcNow, StringHelper.Format(StringResources.ChaosInfo_WaitingBetweenIterations, waitTime)),
                    ChaosStatus.Running,
                    this.partition,
                    this.cancellationToken,
                    () =>
                {
                    TestabilityTrace.TraceSource.WriteInfo(TraceType, "Registering WaitingEvent for waiting '{0}' between iterations.", waitTime);
                }).ConfigureAwait(false);

                await Task.Delay(waitTime, this.cancellationToken).ConfigureAwait(false);

                this.IterationsCompleted++;
            }

            TestabilityTrace.TraceSource.WriteInfo(TraceType, "Session has completed. \nTotal iterations: {0}. Total elapsed time: {1}", this.IterationsCompleted, this.GetElapsedTime());
        }
Ejemplo n.º 8
0
        private static bool CheckRPCAccess(MachineHealthContainer machineHealthContainer)
        {
            var retryTimeout = new System.Fabric.Common.TimeoutHelper(DMConstants.BpaRpcRetryTimeout);

            SFDeployerTrace.WriteNoise(StringResources.Info_SFRpcInfo);

            Parallel.ForEach <string>(
                machineHealthContainer.GetHealthyMachineNames(),
                (string machine) =>
            {
                bool result = true;
                bool willRetry;

                do
                {
                    willRetry = false;

                    try
                    {
                        Utility.GetTempPath(machine);
                    }
                    catch (Exception ex)
                    {
                        string message;
                        if (ex is System.IO.IOException)
                        {
                            switch (ex.HResult)
                            {
                            // If new failures are discovered: https://msdn.microsoft.com/en-us/library/windows/desktop/ms681382(v=vs.85).aspx
                            case 53:         // ERROR_BAD_NETPATH
                                message   = string.Format(StringResources.Error_SFRpcIoNetpath, machine, ex.HResult);
                                willRetry = true;
                                break;

                            case 1723:         // RPC_S_SERVER_TOO_BUSY
                                message   = string.Format(StringResources.Error_SFRpcIoTooBusy, machine, ex.HResult);
                                willRetry = true;
                                break;

                            case 1727:         // RPC_S_CALL_FAILED_DNE
                                message = string.Format(StringResources.Error_SFRpcIoFailedDne, machine, ex.HResult);
                                break;

                            default:
                                message = string.Format(StringResources.Error_SFRpcIoGeneric, machine, ex.HResult);
                                break;
                            }
                        }
                        else if (ex is System.Security.SecurityException)
                        {
                            switch (ex.HResult)
                            {
                            case -2146233078:         // COR_E_SECURITY
                                message = string.Format(StringResources.Error_SFRpcSecAccess, machine, ex.HResult);
                                break;

                            default:
                                message = string.Format(StringResources.Error_SFRpcSecGeneric, machine, ex.HResult);
                                break;
                            }
                        }
                        else if (ex is NullReferenceException)
                        {
                            switch (ex.HResult)
                            {
                            case -2146232828:         // COR_E_TARGETINVOCATION
                                message = string.Format(StringResources.Error_SFRpcNullRegAccess, machine, ex.HResult);
                                break;

                            default:
                                message = string.Format(StringResources.Error_SFRpcNullGeneric, machine, ex.HResult);
                                break;
                            }
                        }
                        else
                        {
                            // This is to catch coding errors.
                            message = string.Format(StringResources.Error_SFRpcGeneric, machine, ex.HResult);
                        }

                        willRetry &= !System.Fabric.Common.TimeoutHelper.HasExpired(retryTimeout);

                        if (willRetry)
                        {
                            SFDeployerTrace.WriteWarning(message);

                            StandaloneUtility.OpenRemoteRegistryNamedPipe(machine, retryTimeout.GetRemainingTime());

                            Thread.Sleep(TimeSpan.FromSeconds(5));
                        }
                        else
                        {
                            SFDeployerTrace.WriteError(message);

                            result = false;
                        }
                    }
                }while (willRetry);

                if (!result)
                {
                    machineHealthContainer.MarkMachineAsUnhealthy(machine);
                }
            });

            return(machineHealthContainer.EnoughHealthyMachines());
        }
            protected override async Task ExecuteActionAsync(FabricTestContext testContext, RestartDeployedCodePackageAction action, CancellationToken cancellationToken)
            {
                this.helper = new TimeoutHelper(action.ActionTimeout);

                string          nodeName                   = action.NodeName;
                Uri             applicationName            = action.ApplicationName;
                string          serviceManifestName        = action.ServiceManifestName;
                string          servicePackageActivationId = action.ServicePackageActivationId;
                string          codePackageName            = action.CodePackageName;
                SelectedReplica replicaSelectorResult      = SelectedReplica.None;

                ThrowIf.Null(applicationName, "ApplicationName");

                if (string.IsNullOrEmpty(nodeName) ||
                    string.IsNullOrEmpty(serviceManifestName) ||
                    string.IsNullOrEmpty(codePackageName))
                {
                    ThrowIf.Null(action.ReplicaSelector, "ReplicaSelector");

                    var getReplicaStateAction = new GetSelectedReplicaStateAction(action.ReplicaSelector)
                    {
                        RequestTimeout = action.RequestTimeout,
                        ActionTimeout  = this.helper.GetRemainingTime()
                    };

                    await testContext.ActionExecutor.RunAsync(getReplicaStateAction, cancellationToken).ConfigureAwait(false);

                    var replicaStateActionResult = getReplicaStateAction.Result;
                    ReleaseAssert.AssertIf(replicaStateActionResult == null, "replicaStateActionResult cannot be null");
                    replicaSelectorResult = replicaStateActionResult.Item1;
                    ReleaseAssert.AssertIf(replicaSelectorResult == null || replicaSelectorResult.SelectedPartition == null,
                                           "replicaSelectorResult cannot be null or for a non-null replicaSelectorResult, the selected partition must be non-null");
                    Guid partitionId = replicaStateActionResult.Item1.SelectedPartition.PartitionId;

                    Replica replicaStateResult = replicaStateActionResult.Item2;
                    ReleaseAssert.AssertIf(replicaStateResult == null, "replicaStateResult cannot be null");

                    nodeName = replicaStateResult.NodeName;

                    var deployedReplicaListResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync <DeployedServiceReplicaList>(
                        () => testContext.FabricClient.QueryManager.GetDeployedReplicaListAsync(
                            nodeName,
                            applicationName,
                            null,
                            partitionId,
                            action.RequestTimeout,
                            cancellationToken),
                        this.helper.GetRemainingTime(),
                        cancellationToken).ConfigureAwait(false);

                    DeployedServiceReplica selectedReplica = deployedReplicaListResult.FirstOrDefault(r => r.Partitionid == partitionId);
                    if (selectedReplica == null)
                    {
                        throw new FabricException(
                                  StringHelper.Format(StringResources.Error_DidNotFindDeployedReplicaOnNode, partitionId, nodeName),
                                  FabricErrorCode.ReplicaDoesNotExist);
                    }

                    serviceManifestName        = selectedReplica.ServiceManifestName;
                    servicePackageActivationId = selectedReplica.ServicePackageActivationId;
                    codePackageName            = selectedReplica.CodePackageName;
                }

                ActionTraceSource.WriteInfo(TraceSource, "SelectedReplica: serviceManifestName: {0}, servicePackageActivationId: {1}, codePackageName: {2}", serviceManifestName, servicePackageActivationId, codePackageName);

                DeployedCodePackage deployedCodePackageListResult = await this.GetCodePackageInfoAsync(testContext, nodeName, applicationName, serviceManifestName, servicePackageActivationId, codePackageName, action, cancellationToken).ConfigureAwait(false);

                var codepackageEntrypointToRestart = GetCodepackageEntrypointToRestart(action, deployedCodePackageListResult);

                await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.FaultManager.RestartDeployedCodePackageUsingNodeNameAsync(
                        nodeName,
                        applicationName,
                        serviceManifestName,
                        servicePackageActivationId,
                        codePackageName,
                        codepackageEntrypointToRestart.EntryPoint.CodePackageInstanceId,
                        action.RequestTimeout,
                        cancellationToken),
                    this.helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                if (action.CompletionMode == CompletionMode.Verify)
                {
                    bool success = false;
                    while (this.helper.GetRemainingTime() > TimeSpan.Zero)
                    {
                        var deployedCodePackageListResultAfterRestart = await this.GetCodePackageInfoAsync(testContext, nodeName, applicationName, serviceManifestName, servicePackageActivationId, codePackageName, action, cancellationToken).ConfigureAwait(false);

                        if (deployedCodePackageListResultAfterRestart != null)
                        {
                            var entryPointAfterRestart = codepackageEntrypointToRestart.EntryPointType == EntryPointType.Main ? deployedCodePackageListResultAfterRestart.EntryPoint : deployedCodePackageListResultAfterRestart.SetupEntryPoint;
                            if (entryPointAfterRestart != null && entryPointAfterRestart.CodePackageInstanceId > codepackageEntrypointToRestart.EntryPoint.CodePackageInstanceId && entryPointAfterRestart.EntryPointStatus == EntryPointStatus.Started)
                            {
                                success = true;
                                break;
                            }
                        }

                        ActionTraceSource.WriteInfo(TraceSource, "CodePackage = {0}:{1}:{2} not yet restarted. Retrying...", nodeName, applicationName, codePackageName);
                        await AsyncWaiter.WaitAsync(TimeSpan.FromSeconds(5), cancellationToken).ConfigureAwait(false);
                    }

                    if (!success)
                    {
                        throw new TimeoutException(StringHelper.Format(StringResources.Error_TestabilityActionTimeout,
                                                                       "RestartDeployedCodePackage",
                                                                       applicationName));
                    }
                }

                action.Result = new RestartDeployedCodePackageResult(
                    nodeName,
                    applicationName,
                    serviceManifestName,
                    servicePackageActivationId,
                    codePackageName,
                    codepackageEntrypointToRestart.EntryPoint.CodePackageInstanceId,
                    replicaSelectorResult);

                ResultTraceString = StringHelper.Format("RestartCodePackageAction succeeded for {0}:{1}:{2} with CompletionMode = {3}", nodeName, applicationName, codePackageName, action.CompletionMode);
            }
Ejemplo n.º 10
0
            protected override async Task ExecuteActionAsync(FabricTestContext testContext, RestartNodeAction action, CancellationToken cancellationToken)
            {
                ActionTraceSource.WriteInfo(TraceSource, "Enter RestartNodeAction/ExecuteActionAsync: operationTimeout='{0}', requestTimeout='{1}'", action.ActionTimeout, action.RequestTimeout);

                this.helper = new TimeoutHelper(action.ActionTimeout);
                SelectedReplica selectedReplica  = SelectedReplica.None;
                string          nodeName         = action.NodeName;
                BigInteger      nodeInstance     = action.NodeInstance;
                bool            createFabricDump = action.CreateFabricDump;

                if (string.IsNullOrEmpty(nodeName))
                {
                    ThrowIf.Null(action.ReplicaSelector, "ReplicaSelector");

                    var getReplicaStateAction = new GetSelectedReplicaStateAction(action.ReplicaSelector)
                    {
                        RequestTimeout = action.RequestTimeout,
                        ActionTimeout  = helper.GetRemainingTime()
                    };

                    await testContext.ActionExecutor.RunAsync(getReplicaStateAction, cancellationToken).ConfigureAwait(false);

                    var replicaStateActionResult = getReplicaStateAction.Result;
                    ReleaseAssert.AssertIf(replicaStateActionResult == null, "replicaStateActionResult cannot be null");
                    selectedReplica = replicaStateActionResult.Item1;
                    Replica replicaStateResult = replicaStateActionResult.Item2;
                    ReleaseAssert.AssertIf(replicaStateResult == null, "replicaStateResult cannot be null");

                    nodeName     = replicaStateResult.NodeName;
                    nodeInstance = BigInteger.MinusOne;
                }

                if (nodeInstance == BigInteger.MinusOne)
                {
                    var nodeInfo = await this.GetCurrentNodeInfoAsync(testContext, nodeName, action, cancellationToken).ConfigureAwait(false);

                    nodeInstance = nodeInfo.NodeInstanceId;
                }

                await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.FaultManager.RestartNodeUsingNodeNameAsync(
                        nodeName,
                        nodeInstance,
                        createFabricDump,
                        action.RequestTimeout,
                        cancellationToken),
                    this.helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                if (action.CompletionMode == CompletionMode.Verify)
                {
                    bool success = false;
                    while (this.helper.GetRemainingTime() > TimeSpan.Zero)
                    {
                        var nodeInfo = await this.GetCurrentNodeInfoAsync(testContext, nodeName, action, cancellationToken).ConfigureAwait(false);

                        if (nodeInfo.NodeInstanceId > nodeInstance && nodeInfo.IsNodeUp)
                        {
                            success = true;
                            break;
                        }

                        ActionTraceSource.WriteInfo(TraceSource, "NodeName = {0} not yet restarted. '{1}' seconds remain. Retrying...", nodeName, this.helper.GetRemainingTime().TotalSeconds);
                        await AsyncWaiter.WaitAsync(TimeSpan.FromSeconds(5), cancellationToken);
                    }

                    if (!success)
                    {
                        throw new TimeoutException(StringHelper.Format(StringResources.Error_TestabilityActionTimeout,
                                                                       "RestartNode",
                                                                       nodeName));
                    }
                }

                // create result
                action.Result = new RestartNodeResult(selectedReplica, new NodeResult(nodeName, nodeInstance));

                ResultTraceString = StringHelper.Format("RestartNodeAction succeeded for {0}:{1} with CompletionMode = {2}", nodeName, nodeInstance, action.CompletionMode);
            }
Ejemplo n.º 11
0
            protected override async Task ExecuteActionAsync(FabricTestContext testContext, MovePrimaryAction action, CancellationToken cancellationToken)
            {
                ThrowIf.Null(action.PartitionSelector, "PartitionSelector");

                this.helper = new TimeoutHelper(action.ActionTimeout);

                string newPrimaryNodeName = action.NodeName;

                var getPartitionStateAction = new GetSelectedPartitionStateAction(action.PartitionSelector)
                {
                    RequestTimeout = action.RequestTimeout,
                    ActionTimeout  = this.helper.GetRemainingTime()
                };

                await testContext.ActionExecutor.RunAsync(getPartitionStateAction, cancellationToken);

                Guid partitionId = getPartitionStateAction.Result.PartitionId;

                if (!action.IgnoreConstraints)
                {
                    // select random node where replica's primary not present
                    var nodesInfo = await testContext.FabricCluster.GetLatestNodeInfoAsync(action.RequestTimeout, this.helper.GetRemainingTime(), cancellationToken);

                    if ((nodesInfo == null || nodesInfo.Count() == 0))
                    {
                        throw new InvalidOperationException(StringHelper.Format(StringResources.Error_NotEnoughNodesForTestabilityAction, "MovePrimary"));
                    }

                    ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                        () => testContext.FabricClient.QueryManager.GetReplicaListAsync(
                            partitionId,
                            0,
                            action.RequestTimeout,
                            cancellationToken),
                        this.helper.GetRemainingTime(),
                        cancellationToken).ConfigureAwait(false);

                    NodeInfo currentPrimaryNodeInfo = null;
                    string   currentPrimaryNodeName = string.Empty;
                    foreach (var replica in replicasResult)
                    {
                        StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica;
                        if (statefulReplica == null)
                        {
                            throw new InvalidOperationException(StringHelper.Format(StringResources.Error_InvalidServiceTypeTestability, "MovePrimary", "Stateful", action.PartitionSelector.ServiceName, "Stateless"));
                        }

                        if (statefulReplica.ReplicaRole == ReplicaRole.Primary)
                        {
                            currentPrimaryNodeInfo = nodesInfo.FirstOrDefault(n => n.NodeName == statefulReplica.NodeName);
                            if (!string.IsNullOrEmpty(newPrimaryNodeName) && newPrimaryNodeName == statefulReplica.NodeName)
                            {
                                throw new FabricException(
                                          StringHelper.Format(StringResources.Error_InvalidNodeNameProvided, newPrimaryNodeName, "MovePrimary", "Primary already exists on node"),
                                          FabricErrorCode.AlreadyPrimaryReplica);
                            }

                            break;
                        }
                    }

                    if (currentPrimaryNodeInfo == null)
                    {
                        throw new FabricException(StringHelper.Format(StringResources.Error_PartitionPrimaryNotReady, action.PartitionSelector + ":" + partitionId), FabricErrorCode.NotReady);
                    }

                    currentPrimaryNodeName = currentPrimaryNodeInfo.NodeName;

                    if (newPrimaryNodeName == currentPrimaryNodeName)
                    {
                        throw new FabricException(
                                  StringHelper.Format(StringResources.Error_InvalidNodeNameProvided, newPrimaryNodeName, "MovePrimary", "Primary already exists on node"),
                                  FabricErrorCode.AlreadyPrimaryReplica);
                    }
                }

                ActionTraceSource.WriteInfo(TraceSource, "Calling move primary with node {0}, partition {1}", string.IsNullOrEmpty(newPrimaryNodeName) ? "Random" : newPrimaryNodeName, partitionId);
                await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.FaultManager.MovePrimaryUsingNodeNameAsync(
                        newPrimaryNodeName,
                        getPartitionStateAction.Result.ServiceName,
                        partitionId,
                        action.IgnoreConstraints,
                        action.RequestTimeout,
                        cancellationToken),
                    FabricClientRetryErrors.MovePrimaryFabricErrors.Value,
                    this.helper.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                action.Result = new MovePrimaryResult(newPrimaryNodeName, getPartitionStateAction.Result);

                ResultTraceString = StringHelper.Format("MovePrimaryAction succeeded for moving Primary for {0}  to node  {1}.", partitionId, newPrimaryNodeName);
            }