private void CheckStateAndThrowOnError(SchedulerState expectedState) { if (!this.state.Equals(expectedState)) { throw new InvalidOperationException(string.Format(StringResources.ChaosScheduler_InconsistentState, this.state, expectedState)); } }
public override bool Equals(object obj) { SchedulerState other = obj as SchedulerState; return(other != null && this.ScheduleStatus == other.ScheduleStatus && this.ChaosStatus == other.ChaosStatus); }
internal ChaosScheduler(IReliableStateManager stateManager, IStatefulServicePartition partition) { this.StateManager = stateManager; this.partition = partition; this.stateSemaphore = new SemaphoreSlim(1, 1); this.state = new SchedulerState(SchedulerState.NoChaosScheduleStopped); this.scheduleDescription = new ChaosScheduleDescription(); }
private async Task WriteStateToReliableStoreAsync(SchedulerState schedulerState, CancellationToken cancellationToken) { TestabilityTrace.TraceSource.WriteInfo(TraceComponent, "Enter WriteStateToReliableStoreAsync"); this.StatusDictionary = this.StatusDictionary ?? await this.StateManager.GetOrAddAsync <IReliableDictionary <string, byte[]> >(FASConstants.ChaosSchedulerStateName).ConfigureAwait(false); using (ITransaction tx = this.StateManager.CreateTransaction()) { await FaultAnalysisServiceUtility.RunAndReportFaultOnRepeatedFailure( Guid.Empty, () => this.StatusDictionary.SetAsync(tx, FASConstants.ChaosSchedulerStatusDictionaryStateKey, schedulerState.ToBytes()), this.partition, "WriteStateToReliableStoreAsync", FASConstants.MaxRetriesForReliableDictionary, cancellationToken).ConfigureAwait(false); await tx.CommitAsync().ConfigureAwait(false); } }
/// <summary> /// Recover by setting schedule to empty and status to stopped. No Chaos will be running. /// </summary> private async Task RecoverFromDefault(CancellationToken cancellationToken) { // Can't restart Chaos if it was running, make sure a stop event is registered. await this.chaosMessageProcessor.RegisterStoppedEventForRestartFailureAsync().ConfigureAwait(false); SchedulerState schedulerState = new SchedulerState(SchedulerState.NoChaosScheduleStopped); ChaosScheduleDescription chaosScheduleDescription = new ChaosScheduleDescription(); await ChaosSchedulerUtil.VerifyChaosScheduleAsync(chaosScheduleDescription.Schedule, this.fabricClient, cancellationToken); // Normally, the schedule and status should not be set directly but this is an exception as // this is a setup step and we are only setting the state to the initial entry state this.scheduleDescription = chaosScheduleDescription; this.state = schedulerState; await this.WriteScheduleToReliableStoreAsync(this.scheduleDescription, cancellationToken).ConfigureAwait(false); await this.WriteStateToReliableStoreAsync(this.state, cancellationToken).ConfigureAwait(false); }
private async Task TryMoveStateAsync(Command command, CancellationToken cancellationToken) { // Must only be called when inside the semaphore TestabilityTrace.TraceSource.WriteInfo(TraceComponent, "Enter TryMoveStateAsync"); SchedulerState newState; if (ChaosScheduler.TransitionTable.TryGetValue(new StateTransition(this.state, command), out newState)) { await this.WriteStateToReliableStoreAsync(newState, cancellationToken).ConfigureAwait(false); TestabilityTrace.TraceSource.WriteInfo(TraceComponent, "State changed from {0} to {1}. Reason {2}.", this.state, newState, command); this.state = newState; } else { TestabilityTrace.TraceSource.WriteWarning(TraceComponent, "Transition with command {0} from state {1} is not valid", command, this.state); } }
public SchedulerState(SchedulerState other) { this.ScheduleStatus = other.ScheduleStatus; this.ChaosStatus = other.ChaosStatus; }
public StateTransition(ChaosScheduleStatus scheduleStatus, ChaosStatus chaosStatus, Command command) { this.state = new SchedulerState(scheduleStatus, chaosStatus); this.command = command; }
public StateTransition(SchedulerState state, Command command) { this.state = state; this.command = command; }
/// <summary> /// Attempt to recover from status of the Chaos schedule and Chaos scheduler status from RD. Chaos will be running if it should be running. /// </summary> /// <returns>boolean representing if the recovery was successful.</returns> private async Task <bool> TryRecoveryFromSchedule(CancellationToken cancellationToken) { TestabilityTrace.TraceSource.WriteInfo(TraceComponent, "TryRecoveryFromSchedule entered."); SchedulerState schedulerState = new SchedulerState(SchedulerState.NoChaosScheduleStopped); ChaosScheduleDescription chaosScheduleDescription = new ChaosScheduleDescription(); this.StatusDictionary = this.StatusDictionary ?? await this.StateManager.GetOrAddAsync <IReliableDictionary <string, byte[]> >(FASConstants.ChaosSchedulerStateName).ConfigureAwait(false); using (ITransaction tx = this.StateManager.CreateTransaction()) { var schedulerResult = await FaultAnalysisServiceUtility.RunAndReportFaultOnRepeatedFailure <ConditionalValue <byte[]> >( Guid.Empty, () => this.StatusDictionary.TryGetValueAsync(tx, FASConstants.ChaosSchedulerStatusDictionaryScheduleKey), this.partition, "RestartRecoveryAsync", FASConstants.MaxRetriesForReliableDictionary, cancellationToken).ConfigureAwait(false); var schedulerStateResult = await FaultAnalysisServiceUtility.RunAndReportFaultOnRepeatedFailure <ConditionalValue <byte[]> >( Guid.Empty, () => this.StatusDictionary.TryGetValueAsync(tx, FASConstants.ChaosSchedulerStatusDictionaryStateKey), this.partition, "RestartRecoveryAsync", FASConstants.MaxRetriesForReliableDictionary, cancellationToken).ConfigureAwait(false); if (!schedulerResult.HasValue || !schedulerStateResult.HasValue) { TestabilityTrace.TraceSource.WriteInfo(TraceComponent, "TryRecoveryFromSchedule failed to recover. Scheduler state or schedule was non existent."); return(false); } chaosScheduleDescription.FromBytes(schedulerResult.Value); schedulerState.FromBytes(schedulerStateResult.Value); await tx.CommitAsync().ConfigureAwait(false); } try { if (schedulerState.ScheduleStatus.Equals(ChaosScheduleStatus.Pending)) { TestabilityTrace.TraceSource.WriteInfo(TraceComponent, "TryRecoveryFromSchedule scheduler state is pending"); await this.SetScheduleInternalAsync(chaosScheduleDescription, cancellationToken).ConfigureAwait(false); } else if (schedulerState.ScheduleStatus.Equals(ChaosScheduleStatus.Active)) { TestabilityTrace.TraceSource.WriteInfo(TraceComponent, "TryRecoveryFromSchedule scheduler state is active"); await this.SetScheduleAndTryResumeAsync(chaosScheduleDescription, cancellationToken).ConfigureAwait(false); } // expire and stopped ChaosScheduleStatus will result in no action being taken // the schedule and status is still correct in the RD because that is where the values were read from } catch (System.ArgumentException ex) { string exceptionMessage = string.Format("RestartRecoveryAsync - failed to recover chaos schedule. Reason {0}", ex.Message); TestabilityTrace.TraceSource.WriteError(TraceComponent, exceptionMessage); ChaosUtility.ThrowOrAssertIfTrue("ChaosScheduler::RestartRecoveryAsync", true, exceptionMessage); } return(true); }
private async Task StartAndRunAsync(CancellationToken cancellationToken) { if (!this.stateSemaphore.Wait(ChaosConstants.SchedulerLockWaitMilliseconds, cancellationToken)) { TestabilityTrace.TraceSource.WriteWarning(TraceComponent, "StartAndRunAsync - initial setup - did not acquire lock in provisioned time. Not progressing."); return; } try { TestabilityTrace.TraceSource.WriteInfo(TraceComponent, "stateSemaphore Acquired by scheduler in StartAndRunAsync initialization"); await this.WriteStateToReliableStoreAsync(this.state, cancellationToken).ConfigureAwait(false); } catch (Exception ex) { string exceptionMessage = string.Format("StartAndRunAsync - failed to set initial state. Exception:{0}", ex.Message); TestabilityTrace.TraceSource.WriteError(TraceComponent, exceptionMessage); ChaosUtility.ThrowOrAssertIfTrue("ChaosScheduler::StartAndRunAsync", true, exceptionMessage); return; } finally { this.stateSemaphore.Release(); TestabilityTrace.TraceSource.WriteInfo(TraceComponent, "stateSemaphore Released by scheduler in StartAndRunAsync initialization"); } while (!cancellationToken.IsCancellationRequested) { if (!this.stateSemaphore.Wait(ChaosConstants.SchedulerLockWaitMilliseconds, cancellationToken)) { TestabilityTrace.TraceSource.WriteWarning(TraceComponent, "StartAndRunAsync - scheduler loop - did not acquire lock in provisioned time."); continue; } try { TestabilityTrace.TraceSource.WriteInfo(TraceComponent, "stateSemaphore Acquired by scheduler in StartAndRunAsync work loop"); SchedulerState stateSnapshot = new SchedulerState(this.state); if (this.state.Equals(SchedulerState.NoChaosSchedulePending)) { await this.TryMatureScheduleAsync(cancellationToken).ConfigureAwait(false); } else if (this.state.Equals(SchedulerState.NoChaosScheduleActive)) { await this.TryExpireSchedule(cancellationToken).ConfigureAwait(false); await this.TryStartChaosAsync(cancellationToken).ConfigureAwait(false); } else if (this.state.Equals(SchedulerState.ChaosScheduleActive)) { await this.TryExpireSchedule(cancellationToken).ConfigureAwait(false); await this.TryFinishChaosAsync(cancellationToken).ConfigureAwait(false); } //// Scheduler takes no action on all other states. } catch (InvalidOperationException ex) { TestabilityTrace.TraceSource.WriteError(TraceComponent, "Scheduler encountered a bad state transaction {0}", ex.Message); await this.WriteStateToReliableStoreAsync(SchedulerState.NoChaosScheduleStopped, cancellationToken).ConfigureAwait(false); await this.chaosMessageProcessor.ProcessStopChaosOldAsync(true).ConfigureAwait(false); this.state = new SchedulerState(SchedulerState.NoChaosScheduleStopped); } finally { this.stateSemaphore.Release(); TestabilityTrace.TraceSource.WriteInfo(TraceComponent, "stateSemaphore Released by scheduler in StartAndRunAsync work loop"); } await Task.Delay(TimeSpan.FromMilliseconds(ChaosConstants.SchedulerCycleWaitIntervalMilliseconds), cancellationToken).ConfigureAwait(false); } }