/// <inheritdoc/> protected override void ProcessRecordInternal() { var clusterHealthPolicy = new ClusterHealthPolicy( considerWarningAsError: this.ConsiderWarningAsError, maxPercentUnhealthyNodes: this.MaxPercentUnhealthyNodes, maxPercentUnhealthyApplications: this.MaxPercentUnhealthyApplications, applicationTypeHealthPolicyMap: this.ApplicationTypeHealthPolicyMap); var chaosContext = new ChaosContext( map: this.Map?.ToDictionary <string, string>()); var chaosTargetFilter = new ChaosTargetFilter( nodeTypeInclusionList: this.NodeTypeInclusionList, applicationInclusionList: this.ApplicationInclusionList); var chaosParameters = new ChaosParameters( timeToRunInSeconds: this.TimeToRunInSeconds, maxClusterStabilizationTimeoutInSeconds: this.MaxClusterStabilizationTimeoutInSeconds, maxConcurrentFaults: this.MaxConcurrentFaults, enableMoveReplicaFaults: this.EnableMoveReplicaFaults, waitTimeBetweenFaultsInSeconds: this.WaitTimeBetweenFaultsInSeconds, waitTimeBetweenIterationsInSeconds: this.WaitTimeBetweenIterationsInSeconds, clusterHealthPolicy: clusterHealthPolicy, context: chaosContext, chaosTargetFilter: chaosTargetFilter); this.ServiceFabricClient.ChaosClient.StartChaosAsync( chaosParameters: chaosParameters, serverTimeout: this.ServerTimeout, cancellationToken: this.CancellationToken).GetAwaiter().GetResult(); Console.WriteLine("Success!"); }
public static void WriteChaosStartedTraceEvent(ChaosParameters chaosParameters) { Requires.Argument <ChaosParameters>("chaosParameters", chaosParameters).NotNull(); Guid eventInstanceId = Guid.NewGuid(); long maxConcurrentFaults = chaosParameters.MaxConcurrentFaults; double timeToRunInSeconds = chaosParameters.TimeToRun.TotalSeconds; double maxClusterStabilizationTimeoutInSeconds = chaosParameters.MaxClusterStabilizationTimeout.TotalSeconds; double waitTimeBetweenIterationsInSeconds = chaosParameters.WaitTimeBetweenIterations.TotalSeconds; double waitTimeBetweenFaultsInSeconds = chaosParameters.WaitTimeBetweenFaults.TotalSeconds; bool enableMoveReplicaFaults = chaosParameters.EnableMoveReplicaFaults; ChaosTargetFilter targetFilter = chaosParameters.ChaosTargetFilter; IList <string> nodeTypeList = targetFilter != null ? targetFilter.NodeTypeInclusionList : null; IList <string> applicationList = targetFilter != null ? targetFilter.ApplicationInclusionList : null; string nodeTypeListAsString = string.Empty; string applicationListAsString = string.Empty; if (targetFilter != null && nodeTypeList != null) { nodeTypeListAsString = string.Join(ListEntryDelimeter, nodeTypeList); } if (targetFilter != null && applicationList != null) { applicationListAsString = string.Join(ListEntryDelimeter, applicationList); } var clusterHealthPolicy = chaosParameters.ClusterHealthPolicy; string clusterHealthPolicyAsString = string.Empty; if (clusterHealthPolicy != null) { clusterHealthPolicyAsString = clusterHealthPolicy.ToString(); } var context = chaosParameters.Context; string contextAsString = string.Empty; if (context != null) { contextAsString = string.Join(ListEntryDelimeter, context); } FabricEvents.Events.ChaosStartedEvent( eventInstanceId, maxConcurrentFaults, timeToRunInSeconds, maxClusterStabilizationTimeoutInSeconds, waitTimeBetweenIterationsInSeconds, waitTimeBetweenFaultsInSeconds, enableMoveReplicaFaults, nodeTypeListAsString, applicationListAsString, clusterHealthPolicyAsString, contextAsString); }
internal void ApplyChaosTargetFilter(ChaosTargetFilter chaosTargetFilter) { if (chaosTargetFilter != null) { this.ChaosTargetFilterPresent = true; this.ApplyChaosTargetInclusionFilters(chaosTargetFilter); } }
internal GetClusterStateSnapshotAction( double chaosSnapshotTelemetrySamplingProbability, bool shouldFaultSystem, int maximumNumberOfRetries, ChaosTargetFilter chaosTargetFilter) { this.TelemetrySamplingProbability = chaosSnapshotTelemetrySamplingProbability; this.ShouldFaultSystem = shouldFaultSystem; ThrowIf.IsTrue(maximumNumberOfRetries < 0, "Value must be non-negative."); this.MaximumNumberOfRetries = maximumNumberOfRetries; this.ChaosTargetFilter = chaosTargetFilter; }
/// <summary> /// Serializes the object to JSON. /// </summary> /// <param name="writer">The <see cref="T: Newtonsoft.Json.JsonWriter" /> to write to.</param> /// <param name="obj">The object to serialize to JSON.</param> internal static void Serialize(JsonWriter writer, ChaosTargetFilter obj) { // Required properties are always serialized, optional properties are serialized when not null. writer.WriteStartObject(); if (obj.NodeTypeInclusionList != null) { writer.WriteEnumerableProperty(obj.NodeTypeInclusionList, "NodeTypeInclusionList", (w, v) => writer.WriteStringValue(v)); } if (obj.ApplicationInclusionList != null) { writer.WriteEnumerableProperty(obj.ApplicationInclusionList, "ApplicationInclusionList", (w, v) => writer.WriteStringValue(v)); } writer.WriteEndObject(); }
public void ChaosParametersSerializationTest() { TimeSpan maxClusterStabilizationTimeout = TimeSpan.FromSeconds(997); long maxConcurrentFaults = 7; TimeSpan waitTimeBetweenIterations = TimeSpan.FromSeconds(131); TimeSpan waitTimeBetweenFaults = TimeSpan.FromSeconds(19); TimeSpan timeToRun = TimeSpan.FromSeconds(104729); bool enableMoveReplicaFaults = true; var healthPolicy = new ClusterHealthPolicy { ConsiderWarningAsError = false, MaxPercentUnhealthyNodes = 10, MaxPercentUnhealthyApplications = 15 }; healthPolicy.ApplicationTypeHealthPolicyMap["TestApplicationTypePolicy"] = 11; var context = new Dictionary <string, string> { { "key1", "value1" }, { "key2", "value2" } }; var chaosParameters = new ChaosParameters( maxClusterStabilizationTimeout, maxConcurrentFaults, enableMoveReplicaFaults, timeToRun, context, waitTimeBetweenIterations, waitTimeBetweenFaults, healthPolicy); var chaosTargetFilter = new ChaosTargetFilter(); var nodeTypeInclustionList = new List <string> { "NodeType1", "NodeType2" }; var applicationInclusionList = new List <string> { "fabric:/app1", "fabric:/app2" }; chaosTargetFilter.ApplicationInclusionList = applicationInclusionList; chaosTargetFilter.NodeTypeInclusionList = nodeTypeInclustionList; this.TestUsingSerializer(this.Serializer, chaosParameters); }
static void Main(string[] args) { // README: // // Please ensure your cluster certificate is installed in // the 'CurrentUser' certificate store. // // REQUIRED STEPS: // - Paste your Service Fabric certificate's thumbprint below (line 52,53) // - Update the cluster domain name to match your SF cluster (line 54) // - Add your cluster node types to the inclusion list (line 102) string clientCertThumb = "D6426E96E0169B60ED030E53FCD05EAC12AAA1E0"; string serverCertThumb = "D6426E96E0169B60ED030E53FCD05EAC12AAA1E0"; string clusterDomainName = "dotjson.westeurope.cloudapp.azure.com"; string commonName = $"www.{clusterDomainName}"; string clusterEndpoint = $"{clusterDomainName}:19000"; var creds = GetCredentials(clientCertThumb, serverCertThumb, commonName); Console.WriteLine($"Connecting to cluster {clusterEndpoint} using certificate '{clientCertThumb}'."); using (var client = new FabricClient(creds, clusterEndpoint)) { var startTimeUtc = DateTime.UtcNow; // The maximum amount of time to wait for all cluster entities to become stable and healthy. // Chaos executes in iterations and at the start of each iteration it validates the health of cluster entities. // During validation if a cluster entity is not stable and healthy within MaxClusterStabilizationTimeoutInSeconds, Chaos generates a validation failed event. var maxClusterStabilizationTimeout = TimeSpan.FromSeconds(30.0); var timeToRun = TimeSpan.FromMinutes(60.0); // MaxConcurrentFaults is the maximum number of concurrent faults induced per iteration. // Chaos executes in iterations and two consecutive iterations are separated by a validation phase. // The higher the concurrency, the more aggressive the injection of faults -- inducing more complex series of states to uncover bugs. // The recommendation is to start with a value of 2 or 3 and to exercise caution while moving up. var maxConcurrentFaults = 3; // Describes a map, which is a collection of (string, string) type key-value pairs. The map can be used to record information about // the Chaos run. There cannot be more than 100 such pairs and each string (key or value) can be at most 4095 characters long. // This map is set by the starter of the Chaos run to optionally store the context about the specific run. var startContext = new Dictionary <string, string> { { "ReasonForStart", "Testing" } }; // Time-separation (in seconds) between two consecutive iterations of Chaos. The larger the value, the lower the fault injection rate. var waitTimeBetweenIterations = TimeSpan.FromSeconds(10); // Wait time (in seconds) between consecutive faults within a single iteration. // The larger the value, the lower the overlapping between faults and the simpler the sequence of state transitions that the cluster goes through. // The recommendation is to start with a value between 1 and 5 and exercise caution while moving up. var waitTimeBetweenFaults = TimeSpan.Zero; // Passed-in cluster health policy is used to validate health of the cluster in between Chaos iterations. var clusterHealthPolicy = new ClusterHealthPolicy { ConsiderWarningAsError = false, MaxPercentUnhealthyApplications = 100, MaxPercentUnhealthyNodes = 100 }; // All types of faults, restart node, restart code package, restart replica, move primary replica, and move secondary replica will happen // for nodes of type 'FrontEndType' var nodetypeInclusionList = new List <string> { "nodetype0" }; // In addition to the faults included by nodetypeInclusionList, // restart code package, restart replica, move primary replica, move secondary replica faults will happen for 'fabric:/TestApp2' // even if a replica or code package from 'fabric:/TestApp2' is residing on a node which is not of type included in nodeypeInclusionList. var applicationInclusionList = new List <string> { "fabric:/Exchange" }; // List of cluster entities to target for Chaos faults. var chaosTargetFilter = new ChaosTargetFilter { NodeTypeInclusionList = nodetypeInclusionList, ApplicationInclusionList = applicationInclusionList }; var parameters = new ChaosParameters( maxClusterStabilizationTimeout, maxConcurrentFaults, true, /* EnableMoveReplicaFault */ timeToRun, startContext, waitTimeBetweenIterations, waitTimeBetweenFaults, clusterHealthPolicy) { ChaosTargetFilter = chaosTargetFilter }; try { client.TestManager.StartChaosAsync(parameters).GetAwaiter().GetResult(); System.Threading.Thread.Sleep(TimeSpan.FromSeconds(30)); // Allow enough time for Chaos engine to start } catch (FabricChaosAlreadyRunningException) { Console.WriteLine("An instance of Chaos is already running in the cluster"); } var filter = new ChaosReportFilter(startTimeUtc, DateTime.MaxValue); var eventSet = new HashSet <ChaosEvent>(new ChaosEventComparer()); string continuationToken = null; while (true) { ChaosReport report; try { report = string.IsNullOrEmpty(continuationToken) ? client.TestManager.GetChaosReportAsync(filter).GetAwaiter().GetResult() : client.TestManager.GetChaosReportAsync(continuationToken).GetAwaiter().GetResult(); } catch (Exception e) { if (e is FabricTransientException) { Console.WriteLine("A transient exception happened: '{0}'", e); } else if (e is TimeoutException) { Console.WriteLine("A timeout exception happened: '{0}'", e); } else { throw; } Task.Delay(TimeSpan.FromSeconds(1.0)).GetAwaiter().GetResult(); continue; } continuationToken = report.ContinuationToken; foreach (var chaosEvent in report.History) { if (eventSet.Add(chaosEvent)) { Console.WriteLine(chaosEvent); } } // When Chaos stops, a StoppedEvent is created. // If a StoppedEvent is found, exit the loop. var lastEvent = report.History.LastOrDefault(); if (lastEvent is StoppedEvent) { break; } Task.Delay(TimeSpan.FromSeconds(1.0)).GetAwaiter().GetResult(); } } }
private void ApplyChaosTargetInclusionFilters(ChaosTargetFilter chaosTargetFilter) { this.ApplyNodeTypeInclusionFilter(chaosTargetFilter.NodeTypeInclusionList); this.ApplyApplicationInclusionFilter(chaosTargetFilter.ApplicationInclusionList); }
/// <inheritdoc /> public override object ReadJson( JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer) { ThrowIf.Null(reader, "reader"); if (reader.TokenType == JsonToken.Null) { return(null); } TimeSpan maxClusterStabilizationTimeout = ChaosConstants.DefaultClusterStabilizationTimeout; long maxConcurrentFaults = ChaosConstants.MaxConcurrentFaultsDefault; TimeSpan waitTimeBetweenIterations = ChaosConstants.WaitTimeBetweenIterationsDefault; TimeSpan waitTimeBetweenFaults = ChaosConstants.WaitTimeBetweenFaultsDefault; TimeSpan timeToRun = TimeSpan.FromSeconds(uint.MaxValue); bool enableMoveReplicaFaults = false; ClusterHealthPolicy healthPolicy = new ClusterHealthPolicy(); Dictionary <string, string> context = null; ChaosTargetFilter chaosTargetFilter = null; var chaosParametersJObject = JObject.Load(reader); this.ReadTimePeriod(chaosParametersJObject, JsonSerializerImplConstants.MaxClusterStabilizationTimeoutInSeconds, ref maxClusterStabilizationTimeout); JToken maxConcurrentFaultsJToken = chaosParametersJObject[JsonSerializerImplConstants.MaxConcurrentFaults]; if (maxConcurrentFaultsJToken != null) { maxConcurrentFaults = maxConcurrentFaultsJToken.Value <long>(); } this.ReadTimePeriod( chaosParametersJObject, JsonSerializerImplConstants.WaitTimeBetweenIterationsInSeconds, ref waitTimeBetweenIterations); this.ReadTimePeriod( chaosParametersJObject, JsonSerializerImplConstants.WaitTimeBetweenFaultsInSeconds, ref waitTimeBetweenFaults); this.ReadTimePeriod( chaosParametersJObject, JsonSerializerImplConstants.TimeToRunInSeconds, ref timeToRun); JToken enableMoveJToken = chaosParametersJObject[JsonSerializerImplConstants.EnableMoveReplicaFaults]; if (enableMoveJToken != null) { enableMoveReplicaFaults = enableMoveJToken.Value <bool>(); } JToken policyJToken = chaosParametersJObject[JsonSerializerImplConstants.ClusterHealthPolicy]; if (policyJToken != null) { healthPolicy = policyJToken.ToObject <ClusterHealthPolicy>(serializer); } JToken contextJToken = chaosParametersJObject[JsonSerializerImplConstants.Context]; if (contextJToken != null) { var contextMap = contextJToken[JsonSerializerImplConstants.Map]; if (contextMap != null) { context = contextMap.ToObject <Dictionary <string, string> >(serializer); } } JToken entityFilterJToken = chaosParametersJObject[JsonSerializerImplConstants.ChaosTargetFilter]; if (entityFilterJToken != null) { chaosTargetFilter = entityFilterJToken.ToObject <ChaosTargetFilter>(new JsonSerializer { NullValueHandling = NullValueHandling.Ignore }); } return(new ChaosParameters( maxClusterStabilizationTimeout, maxConcurrentFaults, enableMoveReplicaFaults, timeToRun, context, waitTimeBetweenIterations, waitTimeBetweenFaults, healthPolicy) { ChaosTargetFilter = chaosTargetFilter }); }
public static async Task <ChaosReport> RunTest(int minsTorun) { string clientCertThumb = "87b906f84a251c015d44ea188e2eff322d1c16f8"; string serverCertThumb = "87b906f84a251c015d44ea188e2eff322d1c16f8"; string CommonName = "memoryleak"; string connection = "sf-memoryleak.eastus.cloudapp.azure.com:19000"; var xc = GetCredentials(clientCertThumb, serverCertThumb, CommonName); using (var client = new FabricClient(xc, connection)) { var startTimeUtc = DateTime.UtcNow; var maxClusterStabilizationTimeout = TimeSpan.FromSeconds(30.0); var timeToRun = TimeSpan.FromMinutes(minsTorun); // The recommendation is to start with a value of 2 or 3 and to exercise caution while moving up. var maxConcurrentFaults = 3; var startContext = new Dictionary <string, string> { { "ReasonForStart", "Testing" } }; // Time-separation (in seconds) between two consecutive iterations of Chaos. The larger the value, the // lower the fault injection rate. var waitTimeBetweenIterations = TimeSpan.FromSeconds(1); // Wait time (in seconds) between consecutive faults within a single iteration. // The larger the value, the lower the overlapping between faults and the simpler the sequence of // state transitions that the cluster goes through. var waitTimeBetweenFaults = TimeSpan.FromSeconds(1); // Passed-in cluster health policy is used to validate health of the cluster in between Chaos iterations. var clusterHealthPolicy = new ClusterHealthPolicy { ConsiderWarningAsError = false, MaxPercentUnhealthyApplications = 100, MaxPercentUnhealthyNodes = 100 }; var nodetypeInclusionList = new List <string> { "nt2vm", "nt3vm" }; var applicationInclusionList = new List <string> { "fabric:/RequestHandling" }; // List of cluster entities to target for Chaos faults. var chaosTargetFilter = new ChaosTargetFilter { NodeTypeInclusionList = nodetypeInclusionList, //ApplicationInclusionList = applicationInclusionList, }; var parameters = new ChaosParameters( maxClusterStabilizationTimeout, maxConcurrentFaults, true, /* EnableMoveReplicaFault */ timeToRun, startContext, waitTimeBetweenIterations, waitTimeBetweenFaults, clusterHealthPolicy) { ChaosTargetFilter = chaosTargetFilter }; try { await client.TestManager.StartChaosAsync(parameters); } catch (FabricChaosAlreadyRunningException) { Console.WriteLine("An instance of Chaos is already running in the cluster."); await client.TestManager.StopChaosAsync(); throw new Exception("Chaos test already running"); } var filter = new ChaosReportFilter(startTimeUtc, DateTime.MaxValue); var eventSet = new HashSet <ChaosEvent>(new ChaosEventComparer()); string continuationToken = null; while (true) { ChaosReport report; try { report = string.IsNullOrEmpty(continuationToken) ? await client.TestManager.GetChaosReportAsync(filter) : await client.TestManager.GetChaosReportAsync(continuationToken); } catch (Exception e) { if (e is FabricTransientException) { Console.WriteLine("A transient exception happened: '{0}'", e); } else if (e is TimeoutException) { Console.WriteLine("A timeout exception happened: '{0}'", e); } else { throw; } Task.Delay(TimeSpan.FromSeconds(1.0)).GetAwaiter().GetResult(); continue; } continuationToken = report.ContinuationToken; foreach (var chaosEvent in report.History) { eventSet.Add(chaosEvent); } // When Chaos stops, a StoppedEvent is created. // If a StoppedEvent is found, exit the loop. var lastEvent = report.History.LastOrDefault(); if (lastEvent is StoppedEvent) { return(report); } Task.Delay(TimeSpan.FromSeconds(1.0)).GetAwaiter().GetResult(); } } }