Esempio n. 1
0
        // Generates random actions of given category
        private IList <StateTransitionAction> GenerateRandomFaults(ClusterStateSnapshot stateSnapshot, Guid activityId = default(Guid))
        {
            List <StateTransitionAction> generatedActions = new List <StateTransitionAction>();

            var faultCategories = this.GenerateFaultCategories();

            foreach (var faultCategory in faultCategories)
            {
                switch (faultCategory)
                {
                case FaultCategory.NodeFaults:
                    generatedActions.AddRange(this.GenerateActionsUsingActionsManager(this.nodeFaultActionsManager, stateSnapshot, activityId));
                    break;

                case FaultCategory.ServiceFaults:
                    generatedActions.AddRange(this.GenerateActionsUsingActionsManager(this.serviceFaultActionsManager, stateSnapshot, activityId));
                    break;

                case FaultCategory.SystemFaults:
                    generatedActions.AddRange(this.GenerateActionsUsingActionsManager(this.systemFaultActionGenerator, stateSnapshot, activityId));
                    break;

                default:
                    throw new ArgumentException("Unknown category:" + faultCategory);
                }
            }

            return(generatedActions);
        }
Esempio n. 2
0
 public ApplicationEntity(Application application, ClusterStateSnapshot clusterSnapshot)
 {
     this.Application     = application;
     this.ServiceList     = new List <ServiceEntity>();
     this.CodePackages    = new List <CodePackageEntity>();
     this.ClusterSnapshot = clusterSnapshot;
 }
Esempio n. 3
0
        protected override void GenerateAndEnqueueRandomActions(ClusterStateSnapshot stateSnapshot, Guid activityId = default(Guid))
        {
            List <StateTransitionAction> generatedActions = new List <StateTransitionAction>();
            ActionCategory category;

            if (this.initialSetup && this.testParameters.WorkloadParameters.WorkloadScripts.Count > 0)
            {
                category          = ActionCategory.WorkLoad;
                this.initialSetup = false;
            }
            else
            {
                category = (ActionCategory)this.actionCategoryChooser.NextRoll();
            }

            switch (category)
            {
            case ActionCategory.WorkLoad:
                generatedActions.AddRange(this.GenerateActionsUsingActionsManager(this.workloadActionsManager, stateSnapshot, activityId));
                break;

            case ActionCategory.Faults:
                generatedActions.AddRange(this.GenerateRandomFaults(stateSnapshot, activityId));
                break;

            default:
                throw new ArgumentException("Unknown category:" + category);
            }

            this.EnqueueActions(generatedActions);
        }
        public IList <StateTransitionAction> GetNextActions(ClusterStateSnapshot stateInfo, Guid activityId = default(Guid))
        {
            // Generate and enqueue actions.
            this.GenerateAndEnqueueRandomActions(stateInfo, activityId);
            List <StateTransitionAction> actions = this.DequeueActions();

            return(actions);
        }
Esempio n. 5
0
        internal override IList <StateTransitionAction> GetPendingActions(ClusterStateSnapshot stateInfo, Guid activityId = default(Guid))
        {
            var pendingActionsList = new List <StateTransitionAction>();

            pendingActionsList.AddRange(this.nodeFaultActionsManager.GetPendingActions(stateInfo, activityId));
            pendingActionsList.AddRange(this.serviceFaultActionsManager.GetPendingActions(stateInfo, activityId));
            pendingActionsList.AddRange(this.workloadActionsManager.GetPendingActions(stateInfo, activityId));

            return(pendingActionsList);
        }
Esempio n. 6
0
        // Generates random actions using given actionManager
        private IList <StateTransitionAction> GenerateActionsUsingActionsManager(
            ActionGeneratorBase actionManager,
            ClusterStateSnapshot stateSnapshot,
            Guid activityId = default(Guid))
        {
            // Generate actions first
            IList <StateTransitionAction> generatedActions = actionManager.GetNextActions(stateSnapshot, activityId);

            this.Log.WriteInfo(this.TraceType, "{0}: Returning {1} actions.", activityId, generatedActions.Count);
            return(generatedActions);
        }
            private async Task <Tuple <bool, bool, bool> > GetPresenceOfCodePackagelessSystemServiceAsync(
                NodeInfo node,
                ApplicationEntity application,
                CancellationToken cancellationToken)
            {
                var isSystemApplication = ClusterStateSnapshot.IsSystemApplication(application);

                if (!isSystemApplication)
                {
                    return(new Tuple <bool, bool, bool>(false, false, false));
                }

                var deployedSystemReplicaList = await this.GetDeployedSystemServiceReplicaListAsync(node, cancellationToken).ConfigureAwait(false);

                bool isFmPresent = false, isCmPresent = false, isNsPresent = false;

                foreach (var systemReplica in deployedSystemReplicaList)
                {
                    if (!isFmPresent &&
                        systemReplica.ServiceName.OriginalString.Equals(Constants.FailoverManagerServiceName, StringComparison.OrdinalIgnoreCase))
                    {
                        isFmPresent = true;
                    }
                    else if (!isCmPresent &&
                             systemReplica.ServiceName.OriginalString.Equals(Constants.ClusterManagerServiceName, StringComparison.OrdinalIgnoreCase))
                    {
                        isCmPresent = true;
                    }
                    else if (!isNsPresent &&
                             systemReplica.ServiceName.OriginalString.Equals(Constants.NamingServiceName, StringComparison.OrdinalIgnoreCase))
                    {
                        isNsPresent = true;
                    }
                }

                return(new Tuple <bool, bool, bool>(isFmPresent, isCmPresent, isNsPresent));
            }
 protected override void GenerateAndEnqueueRandomActions(ClusterStateSnapshot stateSnapshot, Guid activityId = default(Guid))
 {
     // Enqueue Actions
     this.stateSnapshot = stateSnapshot;
     this.CreateAndEnqueueServiceFaultActions(activityId);
 }
 protected abstract void GenerateAndEnqueueRandomActions(ClusterStateSnapshot stateInfo, Guid activityId = default(Guid));
Esempio n. 10
0
 internal virtual IList <StateTransitionAction> GetPendingActions(ClusterStateSnapshot stateInfo, Guid activityId = default(Guid))
 {
     return(new List <StateTransitionAction>());
 }
Esempio n. 11
0
 protected override void GenerateAndEnqueueRandomActions(ClusterStateSnapshot stateInfo, Guid activityId = default(Guid))
 {
     // Use current state information.
     this.GenerateAndEnqueueRandomActions(stateInfo.Nodes, stateInfo.UnsafeModeEnabled, stateInfo.ClusterSnapshotId, activityId);
 }
Esempio n. 12
0
 protected override void GenerateAndEnqueueRandomActions(ClusterStateSnapshot stateSnapShot, Guid activityId = default(Guid))
 {
     this.GenerateAndEnqueueRandomActions(stateSnapShot.WorkloadList, activityId);
 }
Esempio n. 13
0
        internal ReplicaEntity FindReplicaEntityGivenNodeName(string nodeName)
        {
            Requires.Argument("nodeName", nodeName).NotNullOrEmpty();

            return(this.ReplicaList.FirstOrDefault(r => ClusterStateSnapshot.MatchNodesByNameOrId(r.Replica.NodeName, nodeName)));
        }
            private async Task <ClusterStateSnapshot> CaptureClusterStateSnapshotAndPopulateEntitiesAsync(
                FabricTestContext testContext,
                GetClusterStateSnapshotAction action,
                CancellationToken cancellationToken)
            {
                this.PartitionMapFromFM    = new HashSet <string>(StringComparer.InvariantCulture);
                this.PartitionMapFromNodes = new HashSet <string>(StringComparer.InvariantCulture);

                this.requestTimeOut           = action.RequestTimeout;
                this.timer                    = new TimeoutHelper(action.ActionTimeout);
                this.testContext              = testContext;
                this.deployedSystemReplicaMap = new Dictionary <NodeInfo, DeployedServiceReplicaList>();

                var nodes = await this.testContext.FabricCluster.GetLatestNodeInfoAsync(this.requestTimeOut, this.timer.GetRemainingTime(), cancellationToken).ConfigureAwait(false);

                var clusterSnapshot = new ClusterStateSnapshot(false, action.ShouldFaultSystem);
                var nodeInfos       = nodes as IList <NodeInfo> ?? nodes.ToList();

                clusterSnapshot.Nodes.AddNodes(nodeInfos);
                clusterSnapshot.PopulateNodeMaps(nodes);

                // Get all current active applications
                var appListResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                    () => this.testContext.FabricClient.QueryManager.GetApplicationListAsync(
                        null,
                        string.Empty,
                        this.requestTimeOut,
                        cancellationToken),
                    this.timer.GetRemainingTime(),
                    cancellationToken).ConfigureAwait(false);

                if (appListResult != null)
                {
                    foreach (var appResultItem in appListResult)
                    {
                        var applicationEntity = clusterSnapshot.Applications.AddApplication(appResultItem);
                        await this.PopulateApplicationEntityAsync(applicationEntity, cancellationToken).ConfigureAwait(false);
                    }

                    var systemApplicationEntity = clusterSnapshot.Applications.AddApplication(SystemApplication);
                    await this.PopulateApplicationEntityAsync(systemApplicationEntity, cancellationToken).ConfigureAwait(false);
                }

                foreach (var node in nodeInfos)
                {
                    var node1 = node.Clone();
                    if (node1.IsNodeUp)
                    {
                        var retryableErrorsForGetDeployedApplicationList = new FabricClientRetryErrors();
                        retryableErrorsForGetDeployedApplicationList.RetryableFabricErrorCodes.Add(FabricErrorCode.InvalidAddress);

                        var deployedApplicationList = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                            () => this.testContext.FabricClient.QueryManager.GetDeployedApplicationListAsync(
                                node1.NodeName,
                                null,
                                this.requestTimeOut,
                                cancellationToken),
                            retryableErrorsForGetDeployedApplicationList,
                            this.timer.GetRemainingTime(),
                            cancellationToken).ConfigureAwait(false);

                        // Add system app entity in the deployed application list
                        // so that we get the deployed replica list for the node
                        if (await this.HasDeployedSystemServiceAsync(node1, cancellationToken).ConfigureAwait(false))
                        {
                            if (deployedApplicationList == null)
                            {
                                deployedApplicationList = new DeployedApplicationList();
                            }

                            deployedApplicationList.Add(DeployedSystemApplication);
                        }

                        TestabilityTrace.TraceSource.WriteInfo(TraceType, "Node: {0} has the following apps deployed...", node1);

                        foreach (var app in deployedApplicationList)
                        {
                            TestabilityTrace.TraceSource.WriteInfo(TraceType, "Deployed app = {0}", app.ApplicationName.OriginalString);
                        }

                        foreach (var app in deployedApplicationList)
                        {
                            var application       = app;
                            var applicationEntity = clusterSnapshot.Applications.FirstOrDefault(a => a.Application.ApplicationName == application.ApplicationName);
                            if (applicationEntity != null)
                            {
                                if (!await this.TryAssociateDeployedReplicaWithDeployedCodepackageAsync(
                                        node1,
                                        applicationEntity,
                                        cancellationToken).ConfigureAwait(false))
                                {
                                    return(null);
                                }
                            }
                        } // iterate through the deployed apps
                    }     // if a node is up
                }         // iterate through the nodes

                // Information acquired through queries could go stale due to the cluster dynamism.
                // This happened while the cluster snapshot was being taken -- making the snapshot internally inconsistent.
                // The fix is to ignore the inconsistent snapshot and capture it again.
                //
                // If FailoverManager's point of view coincides with that of the Nodes, return the snapshot;
                // otherwise, throw FabricException to indicate that the snapshot should be captured afresh.
                //
                if (!this.PartitionMapFromFM.SetEquals(this.PartitionMapFromNodes))
                {
                    StringBuilder exceptionMessageBuilder = new StringBuilder();

                    var copyOfFmInfo = new HashSet <string>(this.PartitionMapFromFM);

                    this.PartitionMapFromFM.ExceptWith(this.PartitionMapFromNodes);

                    if (this.PartitionMapFromFM.Any())
                    {
                        exceptionMessageBuilder.AppendLine(string.Format(CultureInfo.InvariantCulture, "FM has the following extra information:"));

                        foreach (var pinfo in this.PartitionMapFromFM)
                        {
                            exceptionMessageBuilder.AppendLine(string.Format(CultureInfo.InvariantCulture, ReplicaViewPrintFormat, Tab, pinfo));
                        }
                    }

                    this.PartitionMapFromNodes.ExceptWith(copyOfFmInfo);

                    if (this.PartitionMapFromNodes.Any())
                    {
                        exceptionMessageBuilder.AppendLine(string.Format(CultureInfo.InvariantCulture, "Nodes has the following partitions deployed, which FM does not know about:"));

                        foreach (var pinfo in this.PartitionMapFromNodes)
                        {
                            exceptionMessageBuilder.AppendLine(string.Format(CultureInfo.InvariantCulture, ReplicaViewPrintFormat, Tab, pinfo));
                        }
                    }

                    TestabilityTrace.TraceSource.WriteWarning(TraceType, string.Format(CultureInfo.InvariantCulture, "{0}", exceptionMessageBuilder.ToString()));

                    throw new ChaosInconsistentClusterSnapshotException(exceptionMessageBuilder.ToString());
                }

                return(clusterSnapshot);
            }
            protected override async Task ExecuteActionAsync(FabricTestContext testContext, GetClusterStateSnapshotAction action, CancellationToken cancellationToken)
            {
                Dictionary <string, int> ExceptionHistory = new Dictionary <string, int>();

                int retries = 0;

                GetClusterStateSnapshotAction.ServiceCount   = 0;
                GetClusterStateSnapshotAction.PartitionCount = 0;
                GetClusterStateSnapshotAction.ReplicaCount   = 0;

                Stopwatch stopWatch = Stopwatch.StartNew();

                ClusterStateSnapshot clusterSnapshot = null;

                do
                {
                    ++retries;

                    await Task.Delay(Constants.DefaultChaosSnapshotRecaptureBackoffInterval, cancellationToken).ConfigureAwait(false);

                    try
                    {
                        clusterSnapshot = await this.CaptureClusterStateSnapshotAndPopulateEntitiesAsync(
                            testContext,
                            action,
                            cancellationToken).ConfigureAwait(false);
                    }
                    catch (Exception exception) when(exception is FabricException || exception is ChaosInconsistentClusterSnapshotException)
                    {
                        string exceptionString = exception.Message;

                        if (ExceptionHistory.ContainsKey(exceptionString))
                        {
                            ExceptionHistory[exceptionString]++;
                        }
                        else
                        {
                            ExceptionHistory[exceptionString] = 1;
                        }
                    }

                    string allExceptions = string.Join(ExceptionDelimeter, ExceptionHistory);

                    if (retries >= action.MaximumNumberOfRetries)
                    {
                        TestabilityTrace.TraceSource.WriteWarning(TraceType, "While taking a consistent cluster snapshot, following exceptions occurred: {0}", allExceptions);
                    }

                    ChaosUtility.ThrowOrAssertIfTrue(
                        ChaosConstants.GetClusterSnapshotAction_MaximumNumberOfRetriesAchieved_TelemetryId,
                        retries >= action.MaximumNumberOfRetries,
                        string.Format(StringResources.ChaosEngineError_GetClusterSnapshotAction_MaximumNumberOfRetriesAchieved, action.MaximumNumberOfRetries, allExceptions));
                }while (clusterSnapshot == null);

                stopWatch.Stop();

                var elapsedInGatherSnapshot = stopWatch.Elapsed;

                stopWatch = Stopwatch.StartNew();

                clusterSnapshot.ApplyChaosTargetFilter(action.ChaosTargetFilter);

                clusterSnapshot.MarkAllUnsafeEntities();

                stopWatch.Stop();

                var elapsedInMarkAllUnsafe = stopWatch.Elapsed;

                if (UniformRandomNumberGenerator.NextDouble() < action.TelemetrySamplingProbability)
                {
                    FabricEvents.Events.ChaosSnapshot(
                        Guid.NewGuid().ToString(),
                        clusterSnapshot.Nodes.Count,
                        clusterSnapshot.Applications.Count,
                        GetClusterStateSnapshotAction.ServiceCount,
                        GetClusterStateSnapshotAction.PartitionCount,
                        GetClusterStateSnapshotAction.ReplicaCount,
                        elapsedInGatherSnapshot.TotalSeconds,
                        elapsedInMarkAllUnsafe.TotalSeconds,
                        retries);
                }

                TestabilityTrace.TraceSource.WriteInfo(TraceType, "For '{0}' nodes, '{1}' apps, '{2}' services, '{3}' partitions, '{4}' replicas, snapshot took '{5}', mark unsafe took '{6}', took '{7}' retries.",
                                                       clusterSnapshot.Nodes.Count,
                                                       clusterSnapshot.Applications.Count,
                                                       GetClusterStateSnapshotAction.ServiceCount,
                                                       GetClusterStateSnapshotAction.PartitionCount,
                                                       GetClusterStateSnapshotAction.ReplicaCount,
                                                       elapsedInGatherSnapshot,
                                                       elapsedInMarkAllUnsafe,
                                                       retries);

                action.Result     = clusterSnapshot;
                ResultTraceString = "GetClusterStateSnapshotAction succeeded";
            }
Esempio n. 16
0
 public ApplicationEntityList(ClusterStateSnapshot clusterStateSnapshot)
 {
     this.list = new List <ApplicationEntity>();
     this.clusterStateSnapshot = clusterStateSnapshot;
 }
Esempio n. 17
0
 public NodeEntityList(ClusterStateSnapshot stateSnapshot)
 {
     this.list          = new List <NodeEntity>();
     this.stateSnapshot = stateSnapshot;
 }
Esempio n. 18
0
 internal override IList <StateTransitionAction> GetPendingActions(ClusterStateSnapshot stateInfo, Guid activityId = default(Guid))
 {
     return(GetPendingActions(stateInfo.WorkloadList, activityId));
 }
Esempio n. 19
0
        internal NodeEntity FindMatchingNodeEntity(string nodeName)
        {
            Requires.Argument("nodeName", nodeName).NotNullOrEmpty();

            return(this.list.FirstOrDefault(n => ClusterStateSnapshot.MatchNodesByNameOrId(n.CurrentNodeInfo.NodeName, nodeName)));
        }
        protected override void GenerateAndEnqueueRandomActions(ClusterStateSnapshot stateInfo, Guid activityId = default(Guid))
        {
            this.stateSnapshot = stateInfo;

            this.GenerateAndEnqueueRandomActions(activityId);
        }
Esempio n. 21
0
 public NodeEntity(NodeInfo nodeInfo, ClusterStateSnapshot clusterSnapshot)
 {
     this.CurrentNodeInfo = nodeInfo;
     this.ClusterSnapshot = clusterSnapshot;
     this.NodeFlags       = ClusterEntityFlags.Excluded;
 }