private async Task <ValidationOutcome> ValidateAndScaleAsync(RuleContext context, IRedisInstance primary, IRedisInstance secondary) { // Last refresh time may be arbitrarily long, either because the rule hasn't been run for a long time, or // because there was an autoscale that happened before. Hence, we need to refresh what we know. await Task.WhenAll(primary.RefreshAsync(context.CancellationToken), secondary.RefreshAsync(context.CancellationToken)).ThrowIfFailureAsync(); // We are willing to scale iff: // 1. The instance is ready to scale // 2. The other instance is not being scaled, but may be not ready to scale if (!primary.IsReadyToScale) { Emit(context, "Autoscale", Severity.Warning, $"Instance `{primary.Name}` is undergoing maintenance or autoscaling operation. State=[{primary.State}]"); if (primary.IsFailed) { return(ValidationOutcome.PrimaryFailed); } else { return(ValidationOutcome.PrimaryUndergoingAutoscale); } } if (!secondary.IsReadyToScale && !secondary.IsFailed) { Emit(context, "Autoscale", Severity.Warning, $"Instance `{secondary.Name}` is undergoing maintenance or autoscaling operation. State=[{secondary.State}]"); return(ValidationOutcome.SecondaryUndergoingAutoscale); } await AttemptToScaleAsync(context, primary, context.CancellationToken); return(ValidationOutcome.Success); }
private async Task <ValidationOutcome> ValidateAndScaleAsync(RuleContext context, IRedisInstance primary, IRedisInstance secondary, bool allowFailedStateReporting) { // Last refresh time may be arbitrarily long, either because the rule hasn't been run for a long time, or // because there was an autoscale that happened before. Hence, we need to refresh what we know. await Task.WhenAll(primary.RefreshAsync(context.CancellationToken), secondary.RefreshAsync(context.CancellationToken)).ThrowIfFailureAsync(); if (allowFailedStateReporting) { if (!primary.IsReadyToScale) { Emit(context, "Autoscale", Severity.Warning, $"Instance `{primary.Name}` is undergoing maintenance or autoscaling operation. State=[{primary.State}]"); await CreateIcmForFailedStateIfNeededAsync(primary); } if (!secondary.IsReadyToScale) { Emit(context, "Autoscale", Severity.Warning, $"Instance `{secondary.Name}` is undergoing maintenance or autoscaling operation. State=[{secondary.State}]"); await CreateIcmForFailedStateIfNeededAsync(secondary); } } // Both instances in a failed state means we need to open a Sev 2 against our own rotation to get them // fixed as quickly as possible. if (primary.IsFailed && secondary.IsFailed) { try { await EmitIcmAsync( severity : _configuration.Environment.IsProduction()? 2 : 3, title : $"Redis instances {_primaryRedisInstance.Name} and {_secondaryRedisInstance.Name} are in a failed state", description : TwoFailedInstancesDescription, machines : null, correlationIds : null, cacheTimeToLive : _configuration.IcmIncidentCacheTtl); } catch (Exception e) { _configuration.Logger.Error($"Failed to emit IcM for failed instances {primary.Name} and {secondary.Name}: {e}"); } } // We are willing to scale iff: // 1. The instance is ready to scale // 2. The other instance is not being scaled, but may be not ready to scale if (!primary.IsReadyToScale) { if (primary.IsFailed) { return(ValidationOutcome.PrimaryFailed); } else { return(ValidationOutcome.PrimaryUndergoingAutoscale); } } if (!secondary.IsReadyToScale && !secondary.IsFailed) { return(ValidationOutcome.SecondaryUndergoingAutoscale); } await AttemptToScaleAsync(context, primary); return(ValidationOutcome.Success); }