/// <inheritdoc/>
        protected override void ProcessRecordInternal()
        {
            var clusterHealthPolicy = new ClusterHealthPolicy(
                considerWarningAsError: this.ConsiderWarningAsError,
                maxPercentUnhealthyNodes: this.MaxPercentUnhealthyNodes,
                maxPercentUnhealthyApplications: this.MaxPercentUnhealthyApplications,
                applicationTypeHealthPolicyMap: this.ApplicationTypeHealthPolicyMap);

            var chaosContext = new ChaosContext(
                map: this.Map?.ToDictionary <string, string>());

            var chaosTargetFilter = new ChaosTargetFilter(
                nodeTypeInclusionList: this.NodeTypeInclusionList,
                applicationInclusionList: this.ApplicationInclusionList);

            var chaosParameters = new ChaosParameters(
                timeToRunInSeconds: this.TimeToRunInSeconds,
                maxClusterStabilizationTimeoutInSeconds: this.MaxClusterStabilizationTimeoutInSeconds,
                maxConcurrentFaults: this.MaxConcurrentFaults,
                enableMoveReplicaFaults: this.EnableMoveReplicaFaults,
                waitTimeBetweenFaultsInSeconds: this.WaitTimeBetweenFaultsInSeconds,
                waitTimeBetweenIterationsInSeconds: this.WaitTimeBetweenIterationsInSeconds,
                clusterHealthPolicy: clusterHealthPolicy,
                context: chaosContext,
                chaosTargetFilter: chaosTargetFilter);

            this.ServiceFabricClient.ChaosClient.StartChaosAsync(
                chaosParameters: chaosParameters,
                serverTimeout: this.ServerTimeout,
                cancellationToken: this.CancellationToken).GetAwaiter().GetResult();

            Console.WriteLine("Success!");
        }
Exemplo n.º 2
0
        public static void WriteChaosStartedTraceEvent(ChaosParameters chaosParameters)
        {
            Requires.Argument <ChaosParameters>("chaosParameters", chaosParameters).NotNull();

            Guid   eventInstanceId     = Guid.NewGuid();
            long   maxConcurrentFaults = chaosParameters.MaxConcurrentFaults;
            double timeToRunInSeconds  = chaosParameters.TimeToRun.TotalSeconds;
            double maxClusterStabilizationTimeoutInSeconds = chaosParameters.MaxClusterStabilizationTimeout.TotalSeconds;
            double waitTimeBetweenIterationsInSeconds      = chaosParameters.WaitTimeBetweenIterations.TotalSeconds;
            double waitTimeBetweenFaultsInSeconds          = chaosParameters.WaitTimeBetweenFaults.TotalSeconds;
            bool   enableMoveReplicaFaults = chaosParameters.EnableMoveReplicaFaults;

            ChaosTargetFilter targetFilter            = chaosParameters.ChaosTargetFilter;
            IList <string>    nodeTypeList            = targetFilter != null ? targetFilter.NodeTypeInclusionList : null;
            IList <string>    applicationList         = targetFilter != null ? targetFilter.ApplicationInclusionList : null;
            string            nodeTypeListAsString    = string.Empty;
            string            applicationListAsString = string.Empty;

            if (targetFilter != null && nodeTypeList != null)
            {
                nodeTypeListAsString = string.Join(ListEntryDelimeter, nodeTypeList);
            }

            if (targetFilter != null && applicationList != null)
            {
                applicationListAsString = string.Join(ListEntryDelimeter, applicationList);
            }

            var clusterHealthPolicy = chaosParameters.ClusterHealthPolicy;

            string clusterHealthPolicyAsString = string.Empty;

            if (clusterHealthPolicy != null)
            {
                clusterHealthPolicyAsString = clusterHealthPolicy.ToString();
            }

            var context = chaosParameters.Context;

            string contextAsString = string.Empty;

            if (context != null)
            {
                contextAsString = string.Join(ListEntryDelimeter, context);
            }

            FabricEvents.Events.ChaosStartedEvent(
                eventInstanceId,
                maxConcurrentFaults,
                timeToRunInSeconds,
                maxClusterStabilizationTimeoutInSeconds,
                waitTimeBetweenIterationsInSeconds,
                waitTimeBetweenFaultsInSeconds,
                enableMoveReplicaFaults,
                nodeTypeListAsString,
                applicationListAsString,
                clusterHealthPolicyAsString,
                contextAsString);
        }
Exemplo n.º 3
0
 internal void ApplyChaosTargetFilter(ChaosTargetFilter chaosTargetFilter)
 {
     if (chaosTargetFilter != null)
     {
         this.ChaosTargetFilterPresent = true;
         this.ApplyChaosTargetInclusionFilters(chaosTargetFilter);
     }
 }
        internal GetClusterStateSnapshotAction(
            double chaosSnapshotTelemetrySamplingProbability,
            bool shouldFaultSystem,
            int maximumNumberOfRetries,
            ChaosTargetFilter chaosTargetFilter)
        {
            this.TelemetrySamplingProbability = chaosSnapshotTelemetrySamplingProbability;
            this.ShouldFaultSystem            = shouldFaultSystem;

            ThrowIf.IsTrue(maximumNumberOfRetries < 0, "Value must be non-negative.");

            this.MaximumNumberOfRetries = maximumNumberOfRetries;

            this.ChaosTargetFilter = chaosTargetFilter;
        }
        /// <summary>
        /// Serializes the object to JSON.
        /// </summary>
        /// <param name="writer">The <see cref="T: Newtonsoft.Json.JsonWriter" /> to write to.</param>
        /// <param name="obj">The object to serialize to JSON.</param>
        internal static void Serialize(JsonWriter writer, ChaosTargetFilter obj)
        {
            // Required properties are always serialized, optional properties are serialized when not null.
            writer.WriteStartObject();
            if (obj.NodeTypeInclusionList != null)
            {
                writer.WriteEnumerableProperty(obj.NodeTypeInclusionList, "NodeTypeInclusionList", (w, v) => writer.WriteStringValue(v));
            }

            if (obj.ApplicationInclusionList != null)
            {
                writer.WriteEnumerableProperty(obj.ApplicationInclusionList, "ApplicationInclusionList", (w, v) => writer.WriteStringValue(v));
            }

            writer.WriteEndObject();
        }
        public void ChaosParametersSerializationTest()
        {
            TimeSpan maxClusterStabilizationTimeout = TimeSpan.FromSeconds(997);
            long     maxConcurrentFaults            = 7;
            TimeSpan waitTimeBetweenIterations      = TimeSpan.FromSeconds(131);
            TimeSpan waitTimeBetweenFaults          = TimeSpan.FromSeconds(19);
            TimeSpan timeToRun = TimeSpan.FromSeconds(104729);
            bool     enableMoveReplicaFaults = true;

            var healthPolicy = new ClusterHealthPolicy
            {
                ConsiderWarningAsError          = false,
                MaxPercentUnhealthyNodes        = 10,
                MaxPercentUnhealthyApplications = 15
            };

            healthPolicy.ApplicationTypeHealthPolicyMap["TestApplicationTypePolicy"] = 11;

            var context = new Dictionary <string, string> {
                { "key1", "value1" }, { "key2", "value2" }
            };

            var chaosParameters = new ChaosParameters(
                maxClusterStabilizationTimeout,
                maxConcurrentFaults,
                enableMoveReplicaFaults,
                timeToRun,
                context,
                waitTimeBetweenIterations,
                waitTimeBetweenFaults,
                healthPolicy);

            var chaosTargetFilter      = new ChaosTargetFilter();
            var nodeTypeInclustionList = new List <string> {
                "NodeType1", "NodeType2"
            };
            var applicationInclusionList = new List <string> {
                "fabric:/app1", "fabric:/app2"
            };

            chaosTargetFilter.ApplicationInclusionList = applicationInclusionList;
            chaosTargetFilter.NodeTypeInclusionList    = nodeTypeInclustionList;

            this.TestUsingSerializer(this.Serializer, chaosParameters);
        }
Exemplo n.º 7
0
        static void Main(string[] args)
        {
            // README:
            //
            // Please ensure your cluster certificate is installed in
            // the 'CurrentUser' certificate store.
            //
            // REQUIRED STEPS:
            // - Paste your Service Fabric certificate's thumbprint below (line 52,53)
            // - Update the cluster domain name to match your SF cluster (line 54)
            // - Add your cluster node types to the inclusion list (line 102)

            string clientCertThumb   = "D6426E96E0169B60ED030E53FCD05EAC12AAA1E0";
            string serverCertThumb   = "D6426E96E0169B60ED030E53FCD05EAC12AAA1E0";
            string clusterDomainName = "dotjson.westeurope.cloudapp.azure.com";

            string commonName      = $"www.{clusterDomainName}";
            string clusterEndpoint = $"{clusterDomainName}:19000";

            var creds = GetCredentials(clientCertThumb, serverCertThumb, commonName);

            Console.WriteLine($"Connecting to cluster {clusterEndpoint} using certificate '{clientCertThumb}'.");
            using (var client = new FabricClient(creds, clusterEndpoint))
            {
                var startTimeUtc = DateTime.UtcNow;

                // The maximum amount of time to wait for all cluster entities to become stable and healthy.
                // Chaos executes in iterations and at the start of each iteration it validates the health of cluster entities.
                // During validation if a cluster entity is not stable and healthy within MaxClusterStabilizationTimeoutInSeconds, Chaos generates a validation failed event.
                var maxClusterStabilizationTimeout = TimeSpan.FromSeconds(30.0);

                var timeToRun = TimeSpan.FromMinutes(60.0);

                // MaxConcurrentFaults is the maximum number of concurrent faults induced per iteration.
                // Chaos executes in iterations and two consecutive iterations are separated by a validation phase.
                // The higher the concurrency, the more aggressive the injection of faults -- inducing more complex series of states to uncover bugs.
                // The recommendation is to start with a value of 2 or 3 and to exercise caution while moving up.
                var maxConcurrentFaults = 3;

                // Describes a map, which is a collection of (string, string) type key-value pairs. The map can be used to record information about
                // the Chaos run. There cannot be more than 100 such pairs and each string (key or value) can be at most 4095 characters long.
                // This map is set by the starter of the Chaos run to optionally store the context about the specific run.
                var startContext = new Dictionary <string, string> {
                    { "ReasonForStart", "Testing" }
                };

                // Time-separation (in seconds) between two consecutive iterations of Chaos. The larger the value, the lower the fault injection rate.
                var waitTimeBetweenIterations = TimeSpan.FromSeconds(10);

                // Wait time (in seconds) between consecutive faults within a single iteration.
                // The larger the value, the lower the overlapping between faults and the simpler the sequence of state transitions that the cluster goes through.
                // The recommendation is to start with a value between 1 and 5 and exercise caution while moving up.
                var waitTimeBetweenFaults = TimeSpan.Zero;

                // Passed-in cluster health policy is used to validate health of the cluster in between Chaos iterations.
                var clusterHealthPolicy = new ClusterHealthPolicy
                {
                    ConsiderWarningAsError          = false,
                    MaxPercentUnhealthyApplications = 100,
                    MaxPercentUnhealthyNodes        = 100
                };

                // All types of faults, restart node, restart code package, restart replica, move primary replica, and move secondary replica will happen
                // for nodes of type 'FrontEndType'
                var nodetypeInclusionList = new List <string> {
                    "nodetype0"
                };

                // In addition to the faults included by nodetypeInclusionList,
                // restart code package, restart replica, move primary replica, move secondary replica faults will happen for 'fabric:/TestApp2'
                // even if a replica or code package from 'fabric:/TestApp2' is residing on a node which is not of type included in nodeypeInclusionList.
                var applicationInclusionList = new List <string> {
                    "fabric:/Exchange"
                };

                // List of cluster entities to target for Chaos faults.
                var chaosTargetFilter = new ChaosTargetFilter
                {
                    NodeTypeInclusionList    = nodetypeInclusionList,
                    ApplicationInclusionList = applicationInclusionList
                };

                var parameters = new ChaosParameters(
                    maxClusterStabilizationTimeout,
                    maxConcurrentFaults,
                    true, /* EnableMoveReplicaFault */
                    timeToRun,
                    startContext,
                    waitTimeBetweenIterations,
                    waitTimeBetweenFaults,
                    clusterHealthPolicy)
                {
                    ChaosTargetFilter = chaosTargetFilter
                };

                try
                {
                    client.TestManager.StartChaosAsync(parameters).GetAwaiter().GetResult();
                    System.Threading.Thread.Sleep(TimeSpan.FromSeconds(30)); // Allow enough time for Chaos engine to start
                }
                catch (FabricChaosAlreadyRunningException)
                {
                    Console.WriteLine("An instance of Chaos is already running in the cluster");
                }

                var filter = new ChaosReportFilter(startTimeUtc, DateTime.MaxValue);

                var eventSet = new HashSet <ChaosEvent>(new ChaosEventComparer());

                string continuationToken = null;

                while (true)
                {
                    ChaosReport report;
                    try
                    {
                        report = string.IsNullOrEmpty(continuationToken)
                            ? client.TestManager.GetChaosReportAsync(filter).GetAwaiter().GetResult()
                            : client.TestManager.GetChaosReportAsync(continuationToken).GetAwaiter().GetResult();
                    }
                    catch (Exception e)
                    {
                        if (e is FabricTransientException)
                        {
                            Console.WriteLine("A transient exception happened: '{0}'", e);
                        }
                        else if (e is TimeoutException)
                        {
                            Console.WriteLine("A timeout exception happened: '{0}'", e);
                        }
                        else
                        {
                            throw;
                        }

                        Task.Delay(TimeSpan.FromSeconds(1.0)).GetAwaiter().GetResult();
                        continue;
                    }

                    continuationToken = report.ContinuationToken;

                    foreach (var chaosEvent in report.History)
                    {
                        if (eventSet.Add(chaosEvent))
                        {
                            Console.WriteLine(chaosEvent);
                        }
                    }

                    // When Chaos stops, a StoppedEvent is created.
                    // If a StoppedEvent is found, exit the loop.
                    var lastEvent = report.History.LastOrDefault();

                    if (lastEvent is StoppedEvent)
                    {
                        break;
                    }

                    Task.Delay(TimeSpan.FromSeconds(1.0)).GetAwaiter().GetResult();
                }
            }
        }
Exemplo n.º 8
0
 private void ApplyChaosTargetInclusionFilters(ChaosTargetFilter chaosTargetFilter)
 {
     this.ApplyNodeTypeInclusionFilter(chaosTargetFilter.NodeTypeInclusionList);
     this.ApplyApplicationInclusionFilter(chaosTargetFilter.ApplicationInclusionList);
 }
Exemplo n.º 9
0
        /// <inheritdoc />
        public override object ReadJson(
            JsonReader reader,
            Type objectType,
            object existingValue,
            JsonSerializer serializer)
        {
            ThrowIf.Null(reader, "reader");

            if (reader.TokenType == JsonToken.Null)
            {
                return(null);
            }

            TimeSpan                    maxClusterStabilizationTimeout = ChaosConstants.DefaultClusterStabilizationTimeout;
            long                        maxConcurrentFaults            = ChaosConstants.MaxConcurrentFaultsDefault;
            TimeSpan                    waitTimeBetweenIterations      = ChaosConstants.WaitTimeBetweenIterationsDefault;
            TimeSpan                    waitTimeBetweenFaults          = ChaosConstants.WaitTimeBetweenFaultsDefault;
            TimeSpan                    timeToRun = TimeSpan.FromSeconds(uint.MaxValue);
            bool                        enableMoveReplicaFaults = false;
            ClusterHealthPolicy         healthPolicy            = new ClusterHealthPolicy();
            Dictionary <string, string> context           = null;
            ChaosTargetFilter           chaosTargetFilter = null;

            var chaosParametersJObject = JObject.Load(reader);

            this.ReadTimePeriod(chaosParametersJObject, JsonSerializerImplConstants.MaxClusterStabilizationTimeoutInSeconds, ref maxClusterStabilizationTimeout);

            JToken maxConcurrentFaultsJToken = chaosParametersJObject[JsonSerializerImplConstants.MaxConcurrentFaults];

            if (maxConcurrentFaultsJToken != null)
            {
                maxConcurrentFaults = maxConcurrentFaultsJToken.Value <long>();
            }

            this.ReadTimePeriod(
                chaosParametersJObject,
                JsonSerializerImplConstants.WaitTimeBetweenIterationsInSeconds,
                ref waitTimeBetweenIterations);

            this.ReadTimePeriod(
                chaosParametersJObject,
                JsonSerializerImplConstants.WaitTimeBetweenFaultsInSeconds,
                ref waitTimeBetweenFaults);

            this.ReadTimePeriod(
                chaosParametersJObject,
                JsonSerializerImplConstants.TimeToRunInSeconds,
                ref timeToRun);

            JToken enableMoveJToken = chaosParametersJObject[JsonSerializerImplConstants.EnableMoveReplicaFaults];

            if (enableMoveJToken != null)
            {
                enableMoveReplicaFaults = enableMoveJToken.Value <bool>();
            }

            JToken policyJToken = chaosParametersJObject[JsonSerializerImplConstants.ClusterHealthPolicy];

            if (policyJToken != null)
            {
                healthPolicy = policyJToken.ToObject <ClusterHealthPolicy>(serializer);
            }

            JToken contextJToken = chaosParametersJObject[JsonSerializerImplConstants.Context];

            if (contextJToken != null)
            {
                var contextMap = contextJToken[JsonSerializerImplConstants.Map];
                if (contextMap != null)
                {
                    context = contextMap.ToObject <Dictionary <string, string> >(serializer);
                }
            }

            JToken entityFilterJToken = chaosParametersJObject[JsonSerializerImplConstants.ChaosTargetFilter];

            if (entityFilterJToken != null)
            {
                chaosTargetFilter = entityFilterJToken.ToObject <ChaosTargetFilter>(new JsonSerializer {
                    NullValueHandling = NullValueHandling.Ignore
                });
            }

            return(new ChaosParameters(
                       maxClusterStabilizationTimeout,
                       maxConcurrentFaults,
                       enableMoveReplicaFaults,
                       timeToRun,
                       context,
                       waitTimeBetweenIterations,
                       waitTimeBetweenFaults,
                       healthPolicy)
            {
                ChaosTargetFilter = chaosTargetFilter
            });
        }
Exemplo n.º 10
0
        public static async Task <ChaosReport> RunTest(int minsTorun)
        {
            string clientCertThumb = "87b906f84a251c015d44ea188e2eff322d1c16f8";
            string serverCertThumb = "87b906f84a251c015d44ea188e2eff322d1c16f8";
            string CommonName      = "memoryleak";
            string connection      = "sf-memoryleak.eastus.cloudapp.azure.com:19000";

            var xc = GetCredentials(clientCertThumb, serverCertThumb, CommonName);

            using (var client = new FabricClient(xc, connection))
            {
                var startTimeUtc = DateTime.UtcNow;
                var maxClusterStabilizationTimeout = TimeSpan.FromSeconds(30.0);
                var timeToRun = TimeSpan.FromMinutes(minsTorun);

                // The recommendation is to start with a value of 2 or 3 and to exercise caution while moving up.
                var maxConcurrentFaults = 3;

                var startContext = new Dictionary <string, string> {
                    { "ReasonForStart", "Testing" }
                };

                // Time-separation (in seconds) between two consecutive iterations of Chaos. The larger the value, the
                // lower the fault injection rate.
                var waitTimeBetweenIterations = TimeSpan.FromSeconds(1);

                // Wait time (in seconds) between consecutive faults within a single iteration.
                // The larger the value, the lower the overlapping between faults and the simpler the sequence of
                // state transitions that the cluster goes through.
                var waitTimeBetweenFaults = TimeSpan.FromSeconds(1);

                // Passed-in cluster health policy is used to validate health of the cluster in between Chaos iterations.
                var clusterHealthPolicy = new ClusterHealthPolicy
                {
                    ConsiderWarningAsError          = false,
                    MaxPercentUnhealthyApplications = 100,
                    MaxPercentUnhealthyNodes        = 100
                };

                var nodetypeInclusionList = new List <string> {
                    "nt2vm", "nt3vm"
                };
                var applicationInclusionList = new List <string> {
                    "fabric:/RequestHandling"
                };

                // List of cluster entities to target for Chaos faults.
                var chaosTargetFilter = new ChaosTargetFilter
                {
                    NodeTypeInclusionList = nodetypeInclusionList,
                    //ApplicationInclusionList = applicationInclusionList,
                };

                var parameters = new ChaosParameters(
                    maxClusterStabilizationTimeout,
                    maxConcurrentFaults,
                    true, /* EnableMoveReplicaFault */
                    timeToRun,
                    startContext,
                    waitTimeBetweenIterations,
                    waitTimeBetweenFaults,
                    clusterHealthPolicy)
                {
                    ChaosTargetFilter = chaosTargetFilter
                };

                try
                {
                    await client.TestManager.StartChaosAsync(parameters);
                }
                catch (FabricChaosAlreadyRunningException)
                {
                    Console.WriteLine("An instance of Chaos is already running in the cluster.");
                    await client.TestManager.StopChaosAsync();

                    throw new Exception("Chaos test already running");
                }

                var filter = new ChaosReportFilter(startTimeUtc, DateTime.MaxValue);

                var eventSet = new HashSet <ChaosEvent>(new ChaosEventComparer());

                string continuationToken = null;

                while (true)
                {
                    ChaosReport report;
                    try
                    {
                        report = string.IsNullOrEmpty(continuationToken)
                            ? await client.TestManager.GetChaosReportAsync(filter)
                            : await client.TestManager.GetChaosReportAsync(continuationToken);
                    }
                    catch (Exception e)
                    {
                        if (e is FabricTransientException)
                        {
                            Console.WriteLine("A transient exception happened: '{0}'", e);
                        }
                        else if (e is TimeoutException)
                        {
                            Console.WriteLine("A timeout exception happened: '{0}'", e);
                        }
                        else
                        {
                            throw;
                        }

                        Task.Delay(TimeSpan.FromSeconds(1.0)).GetAwaiter().GetResult();
                        continue;
                    }

                    continuationToken = report.ContinuationToken;

                    foreach (var chaosEvent in report.History)
                    {
                        eventSet.Add(chaosEvent);
                    }

                    // When Chaos stops, a StoppedEvent is created.
                    // If a StoppedEvent is found, exit the loop.
                    var lastEvent = report.History.LastOrDefault();

                    if (lastEvent is StoppedEvent)
                    {
                        return(report);
                    }

                    Task.Delay(TimeSpan.FromSeconds(1.0)).GetAwaiter().GetResult();
                }
            }
        }