// Generates random actions of given category private IList <StateTransitionAction> GenerateRandomFaults(ClusterStateSnapshot stateSnapshot, Guid activityId = default(Guid)) { List <StateTransitionAction> generatedActions = new List <StateTransitionAction>(); var faultCategories = this.GenerateFaultCategories(); foreach (var faultCategory in faultCategories) { switch (faultCategory) { case FaultCategory.NodeFaults: generatedActions.AddRange(this.GenerateActionsUsingActionsManager(this.nodeFaultActionsManager, stateSnapshot, activityId)); break; case FaultCategory.ServiceFaults: generatedActions.AddRange(this.GenerateActionsUsingActionsManager(this.serviceFaultActionsManager, stateSnapshot, activityId)); break; case FaultCategory.SystemFaults: generatedActions.AddRange(this.GenerateActionsUsingActionsManager(this.systemFaultActionGenerator, stateSnapshot, activityId)); break; default: throw new ArgumentException("Unknown category:" + faultCategory); } } return(generatedActions); }
public ApplicationEntity(Application application, ClusterStateSnapshot clusterSnapshot) { this.Application = application; this.ServiceList = new List <ServiceEntity>(); this.CodePackages = new List <CodePackageEntity>(); this.ClusterSnapshot = clusterSnapshot; }
protected override void GenerateAndEnqueueRandomActions(ClusterStateSnapshot stateSnapshot, Guid activityId = default(Guid)) { List <StateTransitionAction> generatedActions = new List <StateTransitionAction>(); ActionCategory category; if (this.initialSetup && this.testParameters.WorkloadParameters.WorkloadScripts.Count > 0) { category = ActionCategory.WorkLoad; this.initialSetup = false; } else { category = (ActionCategory)this.actionCategoryChooser.NextRoll(); } switch (category) { case ActionCategory.WorkLoad: generatedActions.AddRange(this.GenerateActionsUsingActionsManager(this.workloadActionsManager, stateSnapshot, activityId)); break; case ActionCategory.Faults: generatedActions.AddRange(this.GenerateRandomFaults(stateSnapshot, activityId)); break; default: throw new ArgumentException("Unknown category:" + category); } this.EnqueueActions(generatedActions); }
public IList <StateTransitionAction> GetNextActions(ClusterStateSnapshot stateInfo, Guid activityId = default(Guid)) { // Generate and enqueue actions. this.GenerateAndEnqueueRandomActions(stateInfo, activityId); List <StateTransitionAction> actions = this.DequeueActions(); return(actions); }
internal override IList <StateTransitionAction> GetPendingActions(ClusterStateSnapshot stateInfo, Guid activityId = default(Guid)) { var pendingActionsList = new List <StateTransitionAction>(); pendingActionsList.AddRange(this.nodeFaultActionsManager.GetPendingActions(stateInfo, activityId)); pendingActionsList.AddRange(this.serviceFaultActionsManager.GetPendingActions(stateInfo, activityId)); pendingActionsList.AddRange(this.workloadActionsManager.GetPendingActions(stateInfo, activityId)); return(pendingActionsList); }
// Generates random actions using given actionManager private IList <StateTransitionAction> GenerateActionsUsingActionsManager( ActionGeneratorBase actionManager, ClusterStateSnapshot stateSnapshot, Guid activityId = default(Guid)) { // Generate actions first IList <StateTransitionAction> generatedActions = actionManager.GetNextActions(stateSnapshot, activityId); this.Log.WriteInfo(this.TraceType, "{0}: Returning {1} actions.", activityId, generatedActions.Count); return(generatedActions); }
private async Task <Tuple <bool, bool, bool> > GetPresenceOfCodePackagelessSystemServiceAsync( NodeInfo node, ApplicationEntity application, CancellationToken cancellationToken) { var isSystemApplication = ClusterStateSnapshot.IsSystemApplication(application); if (!isSystemApplication) { return(new Tuple <bool, bool, bool>(false, false, false)); } var deployedSystemReplicaList = await this.GetDeployedSystemServiceReplicaListAsync(node, cancellationToken).ConfigureAwait(false); bool isFmPresent = false, isCmPresent = false, isNsPresent = false; foreach (var systemReplica in deployedSystemReplicaList) { if (!isFmPresent && systemReplica.ServiceName.OriginalString.Equals(Constants.FailoverManagerServiceName, StringComparison.OrdinalIgnoreCase)) { isFmPresent = true; } else if (!isCmPresent && systemReplica.ServiceName.OriginalString.Equals(Constants.ClusterManagerServiceName, StringComparison.OrdinalIgnoreCase)) { isCmPresent = true; } else if (!isNsPresent && systemReplica.ServiceName.OriginalString.Equals(Constants.NamingServiceName, StringComparison.OrdinalIgnoreCase)) { isNsPresent = true; } } return(new Tuple <bool, bool, bool>(isFmPresent, isCmPresent, isNsPresent)); }
protected override void GenerateAndEnqueueRandomActions(ClusterStateSnapshot stateSnapshot, Guid activityId = default(Guid)) { // Enqueue Actions this.stateSnapshot = stateSnapshot; this.CreateAndEnqueueServiceFaultActions(activityId); }
protected abstract void GenerateAndEnqueueRandomActions(ClusterStateSnapshot stateInfo, Guid activityId = default(Guid));
internal virtual IList <StateTransitionAction> GetPendingActions(ClusterStateSnapshot stateInfo, Guid activityId = default(Guid)) { return(new List <StateTransitionAction>()); }
protected override void GenerateAndEnqueueRandomActions(ClusterStateSnapshot stateInfo, Guid activityId = default(Guid)) { // Use current state information. this.GenerateAndEnqueueRandomActions(stateInfo.Nodes, stateInfo.UnsafeModeEnabled, stateInfo.ClusterSnapshotId, activityId); }
protected override void GenerateAndEnqueueRandomActions(ClusterStateSnapshot stateSnapShot, Guid activityId = default(Guid)) { this.GenerateAndEnqueueRandomActions(stateSnapShot.WorkloadList, activityId); }
internal ReplicaEntity FindReplicaEntityGivenNodeName(string nodeName) { Requires.Argument("nodeName", nodeName).NotNullOrEmpty(); return(this.ReplicaList.FirstOrDefault(r => ClusterStateSnapshot.MatchNodesByNameOrId(r.Replica.NodeName, nodeName))); }
private async Task <ClusterStateSnapshot> CaptureClusterStateSnapshotAndPopulateEntitiesAsync( FabricTestContext testContext, GetClusterStateSnapshotAction action, CancellationToken cancellationToken) { this.PartitionMapFromFM = new HashSet <string>(StringComparer.InvariantCulture); this.PartitionMapFromNodes = new HashSet <string>(StringComparer.InvariantCulture); this.requestTimeOut = action.RequestTimeout; this.timer = new TimeoutHelper(action.ActionTimeout); this.testContext = testContext; this.deployedSystemReplicaMap = new Dictionary <NodeInfo, DeployedServiceReplicaList>(); var nodes = await this.testContext.FabricCluster.GetLatestNodeInfoAsync(this.requestTimeOut, this.timer.GetRemainingTime(), cancellationToken).ConfigureAwait(false); var clusterSnapshot = new ClusterStateSnapshot(false, action.ShouldFaultSystem); var nodeInfos = nodes as IList <NodeInfo> ?? nodes.ToList(); clusterSnapshot.Nodes.AddNodes(nodeInfos); clusterSnapshot.PopulateNodeMaps(nodes); // Get all current active applications var appListResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync( () => this.testContext.FabricClient.QueryManager.GetApplicationListAsync( null, string.Empty, this.requestTimeOut, cancellationToken), this.timer.GetRemainingTime(), cancellationToken).ConfigureAwait(false); if (appListResult != null) { foreach (var appResultItem in appListResult) { var applicationEntity = clusterSnapshot.Applications.AddApplication(appResultItem); await this.PopulateApplicationEntityAsync(applicationEntity, cancellationToken).ConfigureAwait(false); } var systemApplicationEntity = clusterSnapshot.Applications.AddApplication(SystemApplication); await this.PopulateApplicationEntityAsync(systemApplicationEntity, cancellationToken).ConfigureAwait(false); } foreach (var node in nodeInfos) { var node1 = node.Clone(); if (node1.IsNodeUp) { var retryableErrorsForGetDeployedApplicationList = new FabricClientRetryErrors(); retryableErrorsForGetDeployedApplicationList.RetryableFabricErrorCodes.Add(FabricErrorCode.InvalidAddress); var deployedApplicationList = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync( () => this.testContext.FabricClient.QueryManager.GetDeployedApplicationListAsync( node1.NodeName, null, this.requestTimeOut, cancellationToken), retryableErrorsForGetDeployedApplicationList, this.timer.GetRemainingTime(), cancellationToken).ConfigureAwait(false); // Add system app entity in the deployed application list // so that we get the deployed replica list for the node if (await this.HasDeployedSystemServiceAsync(node1, cancellationToken).ConfigureAwait(false)) { if (deployedApplicationList == null) { deployedApplicationList = new DeployedApplicationList(); } deployedApplicationList.Add(DeployedSystemApplication); } TestabilityTrace.TraceSource.WriteInfo(TraceType, "Node: {0} has the following apps deployed...", node1); foreach (var app in deployedApplicationList) { TestabilityTrace.TraceSource.WriteInfo(TraceType, "Deployed app = {0}", app.ApplicationName.OriginalString); } foreach (var app in deployedApplicationList) { var application = app; var applicationEntity = clusterSnapshot.Applications.FirstOrDefault(a => a.Application.ApplicationName == application.ApplicationName); if (applicationEntity != null) { if (!await this.TryAssociateDeployedReplicaWithDeployedCodepackageAsync( node1, applicationEntity, cancellationToken).ConfigureAwait(false)) { return(null); } } } // iterate through the deployed apps } // if a node is up } // iterate through the nodes // Information acquired through queries could go stale due to the cluster dynamism. // This happened while the cluster snapshot was being taken -- making the snapshot internally inconsistent. // The fix is to ignore the inconsistent snapshot and capture it again. // // If FailoverManager's point of view coincides with that of the Nodes, return the snapshot; // otherwise, throw FabricException to indicate that the snapshot should be captured afresh. // if (!this.PartitionMapFromFM.SetEquals(this.PartitionMapFromNodes)) { StringBuilder exceptionMessageBuilder = new StringBuilder(); var copyOfFmInfo = new HashSet <string>(this.PartitionMapFromFM); this.PartitionMapFromFM.ExceptWith(this.PartitionMapFromNodes); if (this.PartitionMapFromFM.Any()) { exceptionMessageBuilder.AppendLine(string.Format(CultureInfo.InvariantCulture, "FM has the following extra information:")); foreach (var pinfo in this.PartitionMapFromFM) { exceptionMessageBuilder.AppendLine(string.Format(CultureInfo.InvariantCulture, ReplicaViewPrintFormat, Tab, pinfo)); } } this.PartitionMapFromNodes.ExceptWith(copyOfFmInfo); if (this.PartitionMapFromNodes.Any()) { exceptionMessageBuilder.AppendLine(string.Format(CultureInfo.InvariantCulture, "Nodes has the following partitions deployed, which FM does not know about:")); foreach (var pinfo in this.PartitionMapFromNodes) { exceptionMessageBuilder.AppendLine(string.Format(CultureInfo.InvariantCulture, ReplicaViewPrintFormat, Tab, pinfo)); } } TestabilityTrace.TraceSource.WriteWarning(TraceType, string.Format(CultureInfo.InvariantCulture, "{0}", exceptionMessageBuilder.ToString())); throw new ChaosInconsistentClusterSnapshotException(exceptionMessageBuilder.ToString()); } return(clusterSnapshot); }
protected override async Task ExecuteActionAsync(FabricTestContext testContext, GetClusterStateSnapshotAction action, CancellationToken cancellationToken) { Dictionary <string, int> ExceptionHistory = new Dictionary <string, int>(); int retries = 0; GetClusterStateSnapshotAction.ServiceCount = 0; GetClusterStateSnapshotAction.PartitionCount = 0; GetClusterStateSnapshotAction.ReplicaCount = 0; Stopwatch stopWatch = Stopwatch.StartNew(); ClusterStateSnapshot clusterSnapshot = null; do { ++retries; await Task.Delay(Constants.DefaultChaosSnapshotRecaptureBackoffInterval, cancellationToken).ConfigureAwait(false); try { clusterSnapshot = await this.CaptureClusterStateSnapshotAndPopulateEntitiesAsync( testContext, action, cancellationToken).ConfigureAwait(false); } catch (Exception exception) when(exception is FabricException || exception is ChaosInconsistentClusterSnapshotException) { string exceptionString = exception.Message; if (ExceptionHistory.ContainsKey(exceptionString)) { ExceptionHistory[exceptionString]++; } else { ExceptionHistory[exceptionString] = 1; } } string allExceptions = string.Join(ExceptionDelimeter, ExceptionHistory); if (retries >= action.MaximumNumberOfRetries) { TestabilityTrace.TraceSource.WriteWarning(TraceType, "While taking a consistent cluster snapshot, following exceptions occurred: {0}", allExceptions); } ChaosUtility.ThrowOrAssertIfTrue( ChaosConstants.GetClusterSnapshotAction_MaximumNumberOfRetriesAchieved_TelemetryId, retries >= action.MaximumNumberOfRetries, string.Format(StringResources.ChaosEngineError_GetClusterSnapshotAction_MaximumNumberOfRetriesAchieved, action.MaximumNumberOfRetries, allExceptions)); }while (clusterSnapshot == null); stopWatch.Stop(); var elapsedInGatherSnapshot = stopWatch.Elapsed; stopWatch = Stopwatch.StartNew(); clusterSnapshot.ApplyChaosTargetFilter(action.ChaosTargetFilter); clusterSnapshot.MarkAllUnsafeEntities(); stopWatch.Stop(); var elapsedInMarkAllUnsafe = stopWatch.Elapsed; if (UniformRandomNumberGenerator.NextDouble() < action.TelemetrySamplingProbability) { FabricEvents.Events.ChaosSnapshot( Guid.NewGuid().ToString(), clusterSnapshot.Nodes.Count, clusterSnapshot.Applications.Count, GetClusterStateSnapshotAction.ServiceCount, GetClusterStateSnapshotAction.PartitionCount, GetClusterStateSnapshotAction.ReplicaCount, elapsedInGatherSnapshot.TotalSeconds, elapsedInMarkAllUnsafe.TotalSeconds, retries); } TestabilityTrace.TraceSource.WriteInfo(TraceType, "For '{0}' nodes, '{1}' apps, '{2}' services, '{3}' partitions, '{4}' replicas, snapshot took '{5}', mark unsafe took '{6}', took '{7}' retries.", clusterSnapshot.Nodes.Count, clusterSnapshot.Applications.Count, GetClusterStateSnapshotAction.ServiceCount, GetClusterStateSnapshotAction.PartitionCount, GetClusterStateSnapshotAction.ReplicaCount, elapsedInGatherSnapshot, elapsedInMarkAllUnsafe, retries); action.Result = clusterSnapshot; ResultTraceString = "GetClusterStateSnapshotAction succeeded"; }
public ApplicationEntityList(ClusterStateSnapshot clusterStateSnapshot) { this.list = new List <ApplicationEntity>(); this.clusterStateSnapshot = clusterStateSnapshot; }
public NodeEntityList(ClusterStateSnapshot stateSnapshot) { this.list = new List <NodeEntity>(); this.stateSnapshot = stateSnapshot; }
internal override IList <StateTransitionAction> GetPendingActions(ClusterStateSnapshot stateInfo, Guid activityId = default(Guid)) { return(GetPendingActions(stateInfo.WorkloadList, activityId)); }
internal NodeEntity FindMatchingNodeEntity(string nodeName) { Requires.Argument("nodeName", nodeName).NotNullOrEmpty(); return(this.list.FirstOrDefault(n => ClusterStateSnapshot.MatchNodesByNameOrId(n.CurrentNodeInfo.NodeName, nodeName))); }
protected override void GenerateAndEnqueueRandomActions(ClusterStateSnapshot stateInfo, Guid activityId = default(Guid)) { this.stateSnapshot = stateInfo; this.GenerateAndEnqueueRandomActions(activityId); }
public NodeEntity(NodeInfo nodeInfo, ClusterStateSnapshot clusterSnapshot) { this.CurrentNodeInfo = nodeInfo; this.ClusterSnapshot = clusterSnapshot; this.NodeFlags = ClusterEntityFlags.Excluded; }