private async Task SetScheduleInternalAsync(ChaosScheduleDescription scheduleDescription, CancellationToken cancellationToken) { // Must only be called when inside the semaphore TestabilityTrace.TraceSource.WriteInfo(TraceComponent, "Enter SetScheduleInternalAsync"); var peak = this.PeakMoveState(Command.SetSchedule); if (peak.Equals(SchedulerState.NoChaosSchedulePending)) { await ChaosSchedulerUtil.VerifyChaosScheduleAsync(scheduleDescription.Schedule, this.fabricClient, cancellationToken); TestabilityTrace.TraceSource.WriteInfo(TraceComponent, "schedule verified"); this.scheduleDescription = scheduleDescription; await this.WriteScheduleToReliableStoreAsync(scheduleDescription, cancellationToken).ConfigureAwait(false); this.eventInstancesEnumerator = new ChaosScheduleEventInstancesEnumerator(this.scheduleDescription.Schedule, DateTime.UtcNow); this.eventInstancesEnumerator.MoveNext(); await this.TryMoveStateAsync(Command.SetSchedule, cancellationToken).ConfigureAwait(false); this.CheckStateAndThrowOnError(SchedulerState.NoChaosSchedulePending); } else if (peak.Equals(SchedulerState.ChaosScheduleActive)) { TestabilityTrace.TraceSource.WriteWarning(TraceComponent, "Attempting to set schedule when chaos was running"); ChaosUtil.ThrowAlreadyRunning(); } }
public static async Task VerifyChaosScheduleAsync( ChaosSchedule schedule, FabricClient fabricClient, CancellationToken cancellationToken) { if (schedule == null) { throw new System.ArgumentNullException("Schedule", StringResources.ChaosScheduler_ScheduleIsNull); } if (schedule.StartDate == null) { throw new System.ArgumentNullException("StartDate", StringResources.ChaosScheduler_ScheduleStartDateIsNull); } if (schedule.ExpiryDate == null) { throw new System.ArgumentNullException("ExpiryDate", StringResources.ChaosScheduler_ScheduleExpiryDateIsNull); } if (schedule.StartDate < ChaosConstants.FileTimeMinDateTime) { throw new System.ArgumentException(string.Format(StringResources.ChaosScheduler_ScheduleStartDateBeforeFileTimeEpoch, schedule.StartDate), "StartDate"); } if (schedule.ExpiryDate < ChaosConstants.FileTimeMinDateTime) { throw new System.ArgumentException(string.Format(StringResources.ChaosScheduler_ScheduleExpiryDateBeforeFileTimeEpoch, schedule.ExpiryDate), "ExpiryDate"); } if (schedule.ExpiryDate < schedule.StartDate) { throw new System.ArgumentException(string.Format(StringResources.ChaosScheduler_ScheduleExpiryDateBeforeStartDate, schedule.ExpiryDate, schedule.StartDate), "ExpiryDate"); } if (schedule.ChaosParametersDictionary == null) { throw new System.ArgumentNullException("ChaosParametersDictionary", StringResources.ChaosScheduler_ScheduleParametersDictionaryIsNull); } foreach (var chaosParamDictionaryEntry in schedule.ChaosParametersDictionary) { await ChaosUtil.ValidateChaosTargetFilterAsync( fabricClient, chaosParamDictionaryEntry.Value, new TimeSpan(0, 1, 0), new TimeSpan(0, 1, 0), cancellationToken).ConfigureAwait(false); } if (schedule.Jobs == null) { throw new System.ArgumentNullException("Jobs", StringResources.ChaosScheduler_ScheduleJobsIsNull); } // Validate each of the items before validating the combination of the items foreach (var job in schedule.Jobs) { ChaosSchedulerUtil.VerifyChaosScheduleJob(job); } ChaosSchedulerUtil.FindMissingChaosParameterReferences(schedule); ChaosSchedulerUtil.FindScheduleConflicts(schedule); }
internal async Task ExecuteIterationsWithPauseAsync() { TestabilityTrace.TraceSource.WriteNoise("StartTrek", "Enter ExecuteIterationsWithPauseAsync, datetimeutc={0}", DateTime.UtcNow); this.stopwatch = new Stopwatch(); this.stopwatch.Start(); Exception capturedException = null; TimeSpan waitTime = this.ChaosParameters.WaitTimeBetweenIterations; while (!this.cancellationToken.IsCancellationRequested) { // If this is not the beginning of a fresh Chaos run, before starting a new iteration, // consult the NextIterationTimeStampRD to find out if there is some // residual wait time from the previous iteration, if there is then wait that amount var nextIterationTimeStampUtc = await this.StateManager.GetUtcTimeStampAsync( FASConstants.NextItearationTimeStampRDName, FASConstants.NextItearationTimeStampKey, this.partition, this.cancellationToken).ConfigureAwait(false); var residualWaitTime = nextIterationTimeStampUtc.Subtract(DateTime.UtcNow); if (residualWaitTime > TimeSpan.Zero) { await this.StateManager.RegisterChaosEventAndUpdateChaosStatusAsync( new WaitingEvent(DateTime.UtcNow, StringHelper.Format(StringResources.ChaosInfo_ResidualWaitingFromPreviousIteration, residualWaitTime)), ChaosStatus.Running, this.partition, this.cancellationToken, () => { TestabilityTrace.TraceSource.WriteInfo(TraceType, "Registering WaitingEvent for waiting '{0}' left over from the previous iteration.", residualWaitTime); }).ConfigureAwait(false); await Task.Delay(residualWaitTime, this.cancellationToken).ConfigureAwait(false); } try { if (await this.IsClusterReadyForFaultsAsync(this.cancellationToken).ConfigureAwait(false)) { System.Fabric.Common.TimeoutHelper timer = new System.Fabric.Common.TimeoutHelper(this.ChaosParameters.MaxClusterStabilizationTimeout); StringBuilder validationReport = new StringBuilder(); var clusterReport = await this.validationHelper.ValidateClusterHealthAsync( timer.GetRemainingTime(), this.cancellationToken).ConfigureAwait(false); if (clusterReport.ValidationFailed) { // quadratic with an upper bound of DefaultMaximumBackoffForChaosIterations waitTime += (waitTime >= FASConstants.DefaultMaximumBackoffForChaosIterations) ? TimeSpan.Zero : waitTime; var serviceReport = await this.validationHelper.ValidateAllServicesAsync( timer.GetRemainingTime(), this.cancellationToken).ConfigureAwait(false); if (serviceReport.ValidationFailed) { TestabilityTrace.TraceSource.WriteInfo(TraceType, "Even though some services are unhealthy or unstable, going to induce faults, because the cluster is healthy."); TestabilityTrace.TraceSource.WriteInfo(TraceType, "Failure reason: \n'{0}'", serviceReport.FailureReason); validationReport.Append(serviceReport.FailureReason); } StringBuilder reportBuilder = new StringBuilder(); reportBuilder.Append(StringHelper.Format(StringResources.ChaosInfo_WaitingNotice, waitTime)); reportBuilder.AppendLine(); reportBuilder.AppendLine(clusterReport.FailureReason); validationReport.Insert(0, reportBuilder.ToString()); TestabilityTrace.TraceSource.WriteInfo(TraceType, "Cluster validation failed for '{0}'.", clusterReport.FailureReason); var validationFailedEvent = new ValidationFailedEvent(DateTime.UtcNow, validationReport.ToString()); // record validation failed event await this.StateManager.RegisterChaosEventAndUpdateChaosStatusAsync( validationFailedEvent, ChaosStatus.Running, this.partition, this.cancellationToken, () => { FabricEvents.Events.ChaosValidationFailed( Guid.NewGuid().ToString(), validationFailedEvent.TimeStampUtc.Ticks, validationFailedEvent.Reason); }).ConfigureAwait(false); TestabilityTrace.TraceSource.WriteInfo(TraceType, "Pausing for '{0}' before performing next check.", waitTime); } else { waitTime = this.ChaosParameters.WaitTimeBetweenIterations; await this.StateManager.RegisterCurrentStatusAsync( ChaosStatus.Running, this.partition, this.cancellationToken).ConfigureAwait(false); var timestampOfNextIteration = DateTime.UtcNow.Add(waitTime); await this.StateManager.SetUtcTimeStampAsync( FASConstants.NextItearationTimeStampRDName, FASConstants.NextItearationTimeStampKey, timestampOfNextIteration, this.partition, this.cancellationToken).ConfigureAwait(false); await this.ExecuteFaultIterationAsync(this.cancellationToken).ConfigureAwait(false); TestabilityTrace.TraceSource.WriteInfo(TraceType, "Pausing for '{0}' before executing next iteration.", waitTime); } } } catch (Exception exception) { capturedException = exception; // quadratic with an upper bound of DefaultMaximumBackoffForChaosIterations waitTime += (waitTime >= FASConstants.DefaultMaximumBackoffForChaosIterations) ? TimeSpan.Zero : waitTime; var timestampOfNextIteration = DateTime.UtcNow.Add(waitTime); await this.StateManager.SetUtcTimeStampAsync( FASConstants.NextItearationTimeStampRDName, FASConstants.NextItearationTimeStampKey, timestampOfNextIteration, this.partition, this.cancellationToken).ConfigureAwait(false); TestabilityTrace.TraceSource.WriteInfo(TraceType, "Exception occurred in the ChaosTestScenario loop: {0}, cancellationreq: {1}", capturedException, this.cancellationToken.IsCancellationRequested); } if (capturedException != null) { var ae = capturedException as AggregateException; if (ae != null) { capturedException = ae.Flatten().InnerException; } if (!ChaosUtil.IsExpectedException(capturedException)) { string testErrorEventMessage = capturedException.Message; if (capturedException is FabricChaosEngineException) { testErrorEventMessage = StringResources.ChaosError_UnexpectedInChaosEngine; } await this.StateManager.RegisterChaosEventAndUpdateChaosStatusAsync( new TestErrorEvent(DateTime.UtcNow, testErrorEventMessage), ChaosStatus.Running, this.partition, this.cancellationToken).ConfigureAwait(false); TestabilityTrace.TraceSource.WriteInfo(TraceType, "Unexpected exception '{0}' was turned into TestErrorEvent.", capturedException); } // handled the exception, now clear it capturedException = null; } if (this.testMode && (this.ChaosParameters.Context != null && this.ChaosParameters.Context.ContainsKey(ChaosConstants.FailoverAfterChaosFaultsKey))) { this.partition.ReportFault(FaultType.Transient); } await this.StateManager.RegisterChaosEventAndUpdateChaosStatusAsync( new WaitingEvent(DateTime.UtcNow, StringHelper.Format(StringResources.ChaosInfo_WaitingBetweenIterations, waitTime)), ChaosStatus.Running, this.partition, this.cancellationToken, () => { TestabilityTrace.TraceSource.WriteInfo(TraceType, "Registering WaitingEvent for waiting '{0}' between iterations.", waitTime); }).ConfigureAwait(false); await Task.Delay(waitTime, this.cancellationToken).ConfigureAwait(false); this.IterationsCompleted++; } TestabilityTrace.TraceSource.WriteInfo(TraceType, "Session has completed. \nTotal iterations: {0}. Total elapsed time: {1}", this.IterationsCompleted, this.GetElapsedTime()); }