Пример #1
0
        private async Task SetScheduleInternalAsync(ChaosScheduleDescription scheduleDescription, CancellationToken cancellationToken)
        {
            // Must only be called when inside the semaphore
            TestabilityTrace.TraceSource.WriteInfo(TraceComponent, "Enter SetScheduleInternalAsync");

            var peak = this.PeakMoveState(Command.SetSchedule);

            if (peak.Equals(SchedulerState.NoChaosSchedulePending))
            {
                await ChaosSchedulerUtil.VerifyChaosScheduleAsync(scheduleDescription.Schedule, this.fabricClient, cancellationToken);

                TestabilityTrace.TraceSource.WriteInfo(TraceComponent, "schedule verified");

                this.scheduleDescription = scheduleDescription;
                await this.WriteScheduleToReliableStoreAsync(scheduleDescription, cancellationToken).ConfigureAwait(false);

                this.eventInstancesEnumerator = new ChaosScheduleEventInstancesEnumerator(this.scheduleDescription.Schedule, DateTime.UtcNow);
                this.eventInstancesEnumerator.MoveNext();

                await this.TryMoveStateAsync(Command.SetSchedule, cancellationToken).ConfigureAwait(false);

                this.CheckStateAndThrowOnError(SchedulerState.NoChaosSchedulePending);
            }
            else if (peak.Equals(SchedulerState.ChaosScheduleActive))
            {
                TestabilityTrace.TraceSource.WriteWarning(TraceComponent, "Attempting to set schedule when chaos was running");

                ChaosUtil.ThrowAlreadyRunning();
            }
        }
Пример #2
0
        public static async Task VerifyChaosScheduleAsync(
            ChaosSchedule schedule,
            FabricClient fabricClient,
            CancellationToken cancellationToken)
        {
            if (schedule == null)
            {
                throw new System.ArgumentNullException("Schedule", StringResources.ChaosScheduler_ScheduleIsNull);
            }

            if (schedule.StartDate == null)
            {
                throw new System.ArgumentNullException("StartDate", StringResources.ChaosScheduler_ScheduleStartDateIsNull);
            }

            if (schedule.ExpiryDate == null)
            {
                throw new System.ArgumentNullException("ExpiryDate", StringResources.ChaosScheduler_ScheduleExpiryDateIsNull);
            }

            if (schedule.StartDate < ChaosConstants.FileTimeMinDateTime)
            {
                throw new System.ArgumentException(string.Format(StringResources.ChaosScheduler_ScheduleStartDateBeforeFileTimeEpoch, schedule.StartDate), "StartDate");
            }

            if (schedule.ExpiryDate < ChaosConstants.FileTimeMinDateTime)
            {
                throw new System.ArgumentException(string.Format(StringResources.ChaosScheduler_ScheduleExpiryDateBeforeFileTimeEpoch, schedule.ExpiryDate), "ExpiryDate");
            }

            if (schedule.ExpiryDate < schedule.StartDate)
            {
                throw new System.ArgumentException(string.Format(StringResources.ChaosScheduler_ScheduleExpiryDateBeforeStartDate, schedule.ExpiryDate, schedule.StartDate), "ExpiryDate");
            }

            if (schedule.ChaosParametersDictionary == null)
            {
                throw new System.ArgumentNullException("ChaosParametersDictionary", StringResources.ChaosScheduler_ScheduleParametersDictionaryIsNull);
            }

            foreach (var chaosParamDictionaryEntry in schedule.ChaosParametersDictionary)
            {
                await ChaosUtil.ValidateChaosTargetFilterAsync(
                    fabricClient,
                    chaosParamDictionaryEntry.Value,
                    new TimeSpan(0, 1, 0),
                    new TimeSpan(0, 1, 0),
                    cancellationToken).ConfigureAwait(false);
            }

            if (schedule.Jobs == null)
            {
                throw new System.ArgumentNullException("Jobs", StringResources.ChaosScheduler_ScheduleJobsIsNull);
            }

            // Validate each of the items before validating the combination of the items
            foreach (var job in schedule.Jobs)
            {
                ChaosSchedulerUtil.VerifyChaosScheduleJob(job);
            }

            ChaosSchedulerUtil.FindMissingChaosParameterReferences(schedule);
            ChaosSchedulerUtil.FindScheduleConflicts(schedule);
        }
Пример #3
0
        internal async Task ExecuteIterationsWithPauseAsync()
        {
            TestabilityTrace.TraceSource.WriteNoise("StartTrek", "Enter ExecuteIterationsWithPauseAsync, datetimeutc={0}", DateTime.UtcNow);

            this.stopwatch = new Stopwatch();
            this.stopwatch.Start();

            Exception capturedException = null;

            TimeSpan waitTime = this.ChaosParameters.WaitTimeBetweenIterations;

            while (!this.cancellationToken.IsCancellationRequested)
            {
                // If this is not the beginning of a fresh Chaos run, before starting a new iteration,
                // consult the NextIterationTimeStampRD to find out if there is some
                // residual wait time from the previous iteration, if there is then wait that amount
                var nextIterationTimeStampUtc = await this.StateManager.GetUtcTimeStampAsync(
                    FASConstants.NextItearationTimeStampRDName,
                    FASConstants.NextItearationTimeStampKey,
                    this.partition,
                    this.cancellationToken).ConfigureAwait(false);

                var residualWaitTime = nextIterationTimeStampUtc.Subtract(DateTime.UtcNow);

                if (residualWaitTime > TimeSpan.Zero)
                {
                    await this.StateManager.RegisterChaosEventAndUpdateChaosStatusAsync(
                        new WaitingEvent(DateTime.UtcNow, StringHelper.Format(StringResources.ChaosInfo_ResidualWaitingFromPreviousIteration, residualWaitTime)),
                        ChaosStatus.Running,
                        this.partition,
                        this.cancellationToken,
                        () =>
                    {
                        TestabilityTrace.TraceSource.WriteInfo(TraceType, "Registering WaitingEvent for waiting '{0}' left over from the previous iteration.", residualWaitTime);
                    }).ConfigureAwait(false);

                    await Task.Delay(residualWaitTime, this.cancellationToken).ConfigureAwait(false);
                }

                try
                {
                    if (await this.IsClusterReadyForFaultsAsync(this.cancellationToken).ConfigureAwait(false))
                    {
                        System.Fabric.Common.TimeoutHelper timer = new System.Fabric.Common.TimeoutHelper(this.ChaosParameters.MaxClusterStabilizationTimeout);

                        StringBuilder validationReport = new StringBuilder();

                        var clusterReport =
                            await
                            this.validationHelper.ValidateClusterHealthAsync(
                                timer.GetRemainingTime(),
                                this.cancellationToken).ConfigureAwait(false);

                        if (clusterReport.ValidationFailed)
                        {
                            // quadratic with an upper bound of DefaultMaximumBackoffForChaosIterations
                            waitTime += (waitTime >= FASConstants.DefaultMaximumBackoffForChaosIterations) ? TimeSpan.Zero : waitTime;

                            var serviceReport =
                                await
                                this.validationHelper.ValidateAllServicesAsync(
                                    timer.GetRemainingTime(),
                                    this.cancellationToken).ConfigureAwait(false);

                            if (serviceReport.ValidationFailed)
                            {
                                TestabilityTrace.TraceSource.WriteInfo(TraceType, "Even though some services are unhealthy or unstable, going to induce faults, because the cluster is healthy.");

                                TestabilityTrace.TraceSource.WriteInfo(TraceType, "Failure reason: \n'{0}'", serviceReport.FailureReason);

                                validationReport.Append(serviceReport.FailureReason);
                            }

                            StringBuilder reportBuilder = new StringBuilder();
                            reportBuilder.Append(StringHelper.Format(StringResources.ChaosInfo_WaitingNotice, waitTime));
                            reportBuilder.AppendLine();
                            reportBuilder.AppendLine(clusterReport.FailureReason);

                            validationReport.Insert(0, reportBuilder.ToString());

                            TestabilityTrace.TraceSource.WriteInfo(TraceType, "Cluster validation failed for '{0}'.", clusterReport.FailureReason);

                            var validationFailedEvent = new ValidationFailedEvent(DateTime.UtcNow, validationReport.ToString());

                            // record validation failed event
                            await this.StateManager.RegisterChaosEventAndUpdateChaosStatusAsync(
                                validationFailedEvent,
                                ChaosStatus.Running,
                                this.partition,
                                this.cancellationToken,
                                () =>
                            {
                                FabricEvents.Events.ChaosValidationFailed(
                                    Guid.NewGuid().ToString(),
                                    validationFailedEvent.TimeStampUtc.Ticks,
                                    validationFailedEvent.Reason);
                            }).ConfigureAwait(false);

                            TestabilityTrace.TraceSource.WriteInfo(TraceType, "Pausing for '{0}' before performing next check.", waitTime);
                        }
                        else
                        {
                            waitTime = this.ChaosParameters.WaitTimeBetweenIterations;

                            await this.StateManager.RegisterCurrentStatusAsync(
                                ChaosStatus.Running,
                                this.partition,
                                this.cancellationToken).ConfigureAwait(false);

                            var timestampOfNextIteration = DateTime.UtcNow.Add(waitTime);

                            await this.StateManager.SetUtcTimeStampAsync(
                                FASConstants.NextItearationTimeStampRDName,
                                FASConstants.NextItearationTimeStampKey,
                                timestampOfNextIteration,
                                this.partition,
                                this.cancellationToken).ConfigureAwait(false);

                            await this.ExecuteFaultIterationAsync(this.cancellationToken).ConfigureAwait(false);

                            TestabilityTrace.TraceSource.WriteInfo(TraceType, "Pausing for '{0}' before executing next iteration.", waitTime);
                        }
                    }
                }
                catch (Exception exception)
                {
                    capturedException = exception;

                    // quadratic with an upper bound of DefaultMaximumBackoffForChaosIterations
                    waitTime += (waitTime >= FASConstants.DefaultMaximumBackoffForChaosIterations) ? TimeSpan.Zero : waitTime;

                    var timestampOfNextIteration = DateTime.UtcNow.Add(waitTime);

                    await this.StateManager.SetUtcTimeStampAsync(
                        FASConstants.NextItearationTimeStampRDName,
                        FASConstants.NextItearationTimeStampKey,
                        timestampOfNextIteration,
                        this.partition,
                        this.cancellationToken).ConfigureAwait(false);

                    TestabilityTrace.TraceSource.WriteInfo(TraceType, "Exception occurred in the ChaosTestScenario loop: {0}, cancellationreq: {1}", capturedException, this.cancellationToken.IsCancellationRequested);
                }

                if (capturedException != null)
                {
                    var ae = capturedException as AggregateException;
                    if (ae != null)
                    {
                        capturedException = ae.Flatten().InnerException;
                    }

                    if (!ChaosUtil.IsExpectedException(capturedException))
                    {
                        string testErrorEventMessage = capturedException.Message;

                        if (capturedException is FabricChaosEngineException)
                        {
                            testErrorEventMessage = StringResources.ChaosError_UnexpectedInChaosEngine;
                        }

                        await this.StateManager.RegisterChaosEventAndUpdateChaosStatusAsync(
                            new TestErrorEvent(DateTime.UtcNow, testErrorEventMessage),
                            ChaosStatus.Running,
                            this.partition,
                            this.cancellationToken).ConfigureAwait(false);

                        TestabilityTrace.TraceSource.WriteInfo(TraceType, "Unexpected exception '{0}' was turned into TestErrorEvent.", capturedException);
                    }

                    // handled the exception, now clear it
                    capturedException = null;
                }

                if (this.testMode && (this.ChaosParameters.Context != null && this.ChaosParameters.Context.ContainsKey(ChaosConstants.FailoverAfterChaosFaultsKey)))
                {
                    this.partition.ReportFault(FaultType.Transient);
                }

                await this.StateManager.RegisterChaosEventAndUpdateChaosStatusAsync(
                    new WaitingEvent(DateTime.UtcNow, StringHelper.Format(StringResources.ChaosInfo_WaitingBetweenIterations, waitTime)),
                    ChaosStatus.Running,
                    this.partition,
                    this.cancellationToken,
                    () =>
                {
                    TestabilityTrace.TraceSource.WriteInfo(TraceType, "Registering WaitingEvent for waiting '{0}' between iterations.", waitTime);
                }).ConfigureAwait(false);

                await Task.Delay(waitTime, this.cancellationToken).ConfigureAwait(false);

                this.IterationsCompleted++;
            }

            TestabilityTrace.TraceSource.WriteInfo(TraceType, "Session has completed. \nTotal iterations: {0}. Total elapsed time: {1}", this.IterationsCompleted, this.GetElapsedTime());
        }