Example #1
0
            protected async override Task ExecuteActionAsync(FabricTestContext testContext, ApiFaultInformationAction action, CancellationToken cancellationToken)
            {
                object value;
                var    storeConnection = testContext.ExtensionProperties.TryGetValue(connectionPrameterName, out value)
                                          ? value as
                                         EventStoreConnection
                                          : null;

                if (storeConnection == null)
                {
                    throw new InvalidOperationException(StringResources.EventStoreError_ConnectionRequired);
                }

                var task =
                    Task <ApiFaultInformation> .Factory.StartNew(
                        () => EventStoreHelper.GetApiFaultInformation(
                            storeConnection,
                            action.StartTime,
                            action.EndTime),
                        cancellationToken);

                await task;

                ResultTraceString = StringHelper.Format("ApiFaultInformationAction succeeded.");
                action.Result     = task.Result;
            }
            protected override async Task ExecuteActionAsync(FabricTestContext testContext, IsClusterUpgradingAction action, CancellationToken cancellationToken)
            {
                var currentProgress = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => testContext.FabricClient.ClusterManager.GetFabricUpgradeProgressAsync(
                        action.RequestTimeout,
                        cancellationToken),
                    FabricClientRetryErrors.UpgradeFabricErrors.Value,
                    action.ActionTimeout,
                    cancellationToken).ConfigureAwait(false);

                ReleaseAssert.AssertIfNull(currentProgress, "currentProgress");

                action.Result = currentProgress.UpgradeState != FabricUpgradeState.RollingBackCompleted &&
                                currentProgress.UpgradeState != FabricUpgradeState.RollingForwardCompleted;
                ResultTraceString = "IsClusterUpgradingAction succeeded";
            }
            protected override async Task ExecuteActionAsync(FabricTestContext testContext, IsInfrastructureUpgradingAction action, CancellationToken cancellationToken)
            {
                TestabilityTrace.TraceSource.WriteInfo(TraceType, "Checking if InfrastructureUpgrade is going on.");

                var status = await this.GetInfrastructureServiceStatus(
                    testContext.FabricClient,
                    action.ActionTimeout,
                    action.RequestTimeout,
                    cancellationToken).ConfigureAwait(false);

                if (!status.Any())
                {
                    action.Result = false;
                }
                else
                {
                    foreach (var kvp in status)
                    {
                        var jobCollectionData = kvp.Value.Jobs;
                        if (jobCollectionData == null)
                        {
                            continue;
                        }

                        var jobDataArray = jobCollectionData.Jobs;

                        if (jobDataArray == null)
                        {
                            continue;
                        }

                        var activeJobs =
                            jobDataArray.Where(
                                j => ChaosConstants.UpgradingInfrastructureJobStatus.Contains(j.DetailedStatus));

                        if (activeJobs.Any())
                        {
                            action.Result = true;
                            return;
                        }
                    }

                    action.Result = false;
                }
            }
            private async Task <ClusterStateSnapshot> CaptureClusterStateSnapshotAndPopulateEntitiesAsync(
                FabricTestContext testContext,
                GetClusterStateSnapshotAction action,
                CancellationToken cancellationToken)
            {
                this.PartitionMapFromFM    = new HashSet <string>(StringComparer.InvariantCulture);
                this.PartitionMapFromNodes = new HashSet <string>(StringComparer.InvariantCulture);

                this.requestTimeOut           = action.RequestTimeout;
                this.timer                    = new TimeoutHelper(action.ActionTimeout);
                this.testContext              = testContext;
                this.deployedSystemReplicaMap = new Dictionary <NodeInfo, DeployedServiceReplicaList>();

                var nodes = await this.testContext.FabricCluster.GetLatestNodeInfoAsync(this.requestTimeOut, this.timer.GetRemainingTime(), cancellationToken).ConfigureAwait(false);

                var clusterSnapshot = new ClusterStateSnapshot(false, action.ShouldFaultSystem);
                var nodeInfos       = nodes as IList <NodeInfo> ?? nodes.ToList();

                clusterSnapshot.Nodes.AddNodes(nodeInfos);
                clusterSnapshot.PopulateNodeMaps(nodes);

                // Get all current active applications
                var appListResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.testContext.FabricClient.QueryManager.GetApplicationListAsync(
                        null,
                        string.Empty,
                        this.requestTimeOut,
                        cancellationToken),
                    this.timer.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                if (appListResult != null)
                {
                    foreach (var appResultItem in appListResult)
                    {
                        var applicationEntity = clusterSnapshot.Applications.AddApplication(appResultItem);
                        await this.PopulateApplicationEntityAsync(applicationEntity, cancellationToken).ConfigureAwait(false);
                    }

                    var systemApplicationEntity = clusterSnapshot.Applications.AddApplication(SystemApplication);
                    await this.PopulateApplicationEntityAsync(systemApplicationEntity, cancellationToken).ConfigureAwait(false);
                }

                foreach (var node in nodeInfos)
                {
                    var node1 = node.Clone();
                    if (node1.IsNodeUp)
                    {
                        var retryableErrorsForGetDeployedApplicationList = new FabricClientRetryErrors();
                        retryableErrorsForGetDeployedApplicationList.RetryableFabricErrorCodes.Add(FabricErrorCode.InvalidAddress);

                        var deployedApplicationList = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                            () => this.testContext.FabricClient.QueryManager.GetDeployedApplicationListAsync(
                                node1.NodeName,
                                null,
                                this.requestTimeOut,
                                cancellationToken),
                            retryableErrorsForGetDeployedApplicationList,
                            this.timer.GetRemainingTime(),
                            cancellationToken).ConfigureAwait(false);

                        // Add system app entity in the deployed application list
                        // so that we get the deployed replica list for the node
                        if (await this.HasDeployedSystemServiceAsync(node1, cancellationToken).ConfigureAwait(false))
                        {
                            if (deployedApplicationList == null)
                            {
                                deployedApplicationList = new DeployedApplicationList();
                            }

                            deployedApplicationList.Add(DeployedSystemApplication);
                        }

                        TestabilityTrace.TraceSource.WriteInfo(TraceType, "Node: {0} has the following apps deployed...", node1);

                        foreach (var app in deployedApplicationList)
                        {
                            TestabilityTrace.TraceSource.WriteInfo(TraceType, "Deployed app = {0}", app.ApplicationName.OriginalString);
                        }

                        foreach (var app in deployedApplicationList)
                        {
                            var application       = app;
                            var applicationEntity = clusterSnapshot.Applications.FirstOrDefault(a => a.Application.ApplicationName == application.ApplicationName);
                            if (applicationEntity != null)
                            {
                                if (!await this.TryAssociateDeployedReplicaWithDeployedCodepackageAsync(
                                        node1,
                                        applicationEntity,
                                        cancellationToken).ConfigureAwait(false))
                                {
                                    return(null);
                                }
                            }
                        } // iterate through the deployed apps
                    }     // if a node is up
                }         // iterate through the nodes

                // Information acquired through queries could go stale due to the cluster dynamism.
                // This happened while the cluster snapshot was being taken -- making the snapshot internally inconsistent.
                // The fix is to ignore the inconsistent snapshot and capture it again.
                //
                // If FailoverManager's point of view coincides with that of the Nodes, return the snapshot;
                // otherwise, throw FabricException to indicate that the snapshot should be captured afresh.
                //
                if (!this.PartitionMapFromFM.SetEquals(this.PartitionMapFromNodes))
                {
                    StringBuilder exceptionMessageBuilder = new StringBuilder();

                    var copyOfFmInfo = new HashSet <string>(this.PartitionMapFromFM);

                    this.PartitionMapFromFM.ExceptWith(this.PartitionMapFromNodes);

                    if (this.PartitionMapFromFM.Any())
                    {
                        exceptionMessageBuilder.AppendLine(string.Format(CultureInfo.InvariantCulture, "FM has the following extra information:"));

                        foreach (var pinfo in this.PartitionMapFromFM)
                        {
                            exceptionMessageBuilder.AppendLine(string.Format(CultureInfo.InvariantCulture, ReplicaViewPrintFormat, Tab, pinfo));
                        }
                    }

                    this.PartitionMapFromNodes.ExceptWith(copyOfFmInfo);

                    if (this.PartitionMapFromNodes.Any())
                    {
                        exceptionMessageBuilder.AppendLine(string.Format(CultureInfo.InvariantCulture, "Nodes has the following partitions deployed, which FM does not know about:"));

                        foreach (var pinfo in this.PartitionMapFromNodes)
                        {
                            exceptionMessageBuilder.AppendLine(string.Format(CultureInfo.InvariantCulture, ReplicaViewPrintFormat, Tab, pinfo));
                        }
                    }

                    TestabilityTrace.TraceSource.WriteWarning(TraceType, string.Format(CultureInfo.InvariantCulture, "{0}", exceptionMessageBuilder.ToString()));

                    throw new ChaosInconsistentClusterSnapshotException(exceptionMessageBuilder.ToString());
                }

                return(clusterSnapshot);
            }
            protected override async Task ExecuteActionAsync(FabricTestContext testContext, GetClusterStateSnapshotAction action, CancellationToken cancellationToken)
            {
                Dictionary <string, int> ExceptionHistory = new Dictionary <string, int>();

                int retries = 0;

                GetClusterStateSnapshotAction.ServiceCount   = 0;
                GetClusterStateSnapshotAction.PartitionCount = 0;
                GetClusterStateSnapshotAction.ReplicaCount   = 0;

                Stopwatch stopWatch = Stopwatch.StartNew();

                ClusterStateSnapshot clusterSnapshot = null;

                do
                {
                    ++retries;

                    await Task.Delay(Constants.DefaultChaosSnapshotRecaptureBackoffInterval, cancellationToken).ConfigureAwait(false);

                    try
                    {
                        clusterSnapshot = await this.CaptureClusterStateSnapshotAndPopulateEntitiesAsync(
                            testContext,
                            action,
                            cancellationToken).ConfigureAwait(false);
                    }
                    catch (Exception exception) when(exception is FabricException || exception is ChaosInconsistentClusterSnapshotException)
                    {
                        string exceptionString = exception.Message;

                        if (ExceptionHistory.ContainsKey(exceptionString))
                        {
                            ExceptionHistory[exceptionString]++;
                        }
                        else
                        {
                            ExceptionHistory[exceptionString] = 1;
                        }
                    }

                    string allExceptions = string.Join(ExceptionDelimeter, ExceptionHistory);

                    if (retries >= action.MaximumNumberOfRetries)
                    {
                        TestabilityTrace.TraceSource.WriteWarning(TraceType, "While taking a consistent cluster snapshot, following exceptions occurred: {0}", allExceptions);
                    }

                    ChaosUtility.ThrowOrAssertIfTrue(
                        ChaosConstants.GetClusterSnapshotAction_MaximumNumberOfRetriesAchieved_TelemetryId,
                        retries >= action.MaximumNumberOfRetries,
                        string.Format(StringResources.ChaosEngineError_GetClusterSnapshotAction_MaximumNumberOfRetriesAchieved, action.MaximumNumberOfRetries, allExceptions));
                }while (clusterSnapshot == null);

                stopWatch.Stop();

                var elapsedInGatherSnapshot = stopWatch.Elapsed;

                stopWatch = Stopwatch.StartNew();

                clusterSnapshot.ApplyChaosTargetFilter(action.ChaosTargetFilter);

                clusterSnapshot.MarkAllUnsafeEntities();

                stopWatch.Stop();

                var elapsedInMarkAllUnsafe = stopWatch.Elapsed;

                if (UniformRandomNumberGenerator.NextDouble() < action.TelemetrySamplingProbability)
                {
                    FabricEvents.Events.ChaosSnapshot(
                        Guid.NewGuid().ToString(),
                        clusterSnapshot.Nodes.Count,
                        clusterSnapshot.Applications.Count,
                        GetClusterStateSnapshotAction.ServiceCount,
                        GetClusterStateSnapshotAction.PartitionCount,
                        GetClusterStateSnapshotAction.ReplicaCount,
                        elapsedInGatherSnapshot.TotalSeconds,
                        elapsedInMarkAllUnsafe.TotalSeconds,
                        retries);
                }

                TestabilityTrace.TraceSource.WriteInfo(TraceType, "For '{0}' nodes, '{1}' apps, '{2}' services, '{3}' partitions, '{4}' replicas, snapshot took '{5}', mark unsafe took '{6}', took '{7}' retries.",
                                                       clusterSnapshot.Nodes.Count,
                                                       clusterSnapshot.Applications.Count,
                                                       GetClusterStateSnapshotAction.ServiceCount,
                                                       GetClusterStateSnapshotAction.PartitionCount,
                                                       GetClusterStateSnapshotAction.ReplicaCount,
                                                       elapsedInGatherSnapshot,
                                                       elapsedInMarkAllUnsafe,
                                                       retries);

                action.Result     = clusterSnapshot;
                ResultTraceString = "GetClusterStateSnapshotAction succeeded";
            }