public async Task <long> CreateRepairTaskAsync(Guid activityId, IRepairTask repairTask) { repairTask.Validate("repairTask"); var startTime = DateTimeOffset.UtcNow; try { // TODO, using the overload without timeout and cancellation token for now since there is some max timeout limit // being exercised somewhere. if timeout provided is more than that, repair task creation fails long result = await repairManager.CreateRepairTaskAsync(Unwrap(repairTask)).ConfigureAwait(false); traceType.WriteInfo( "Created repair task. Result: {0}, repair task: {1}", result, repairTask.ToJson()); activityLogger.LogChangeState(activityId, repairTask, RepairTaskState.Invalid, repairTask.State, TimeSpan.Zero, repairTask.ToJson(), repairTask.TaskId); activityLogger.LogOperation(activityId, startTime, OperationResult.Success, null, repairTask.ToJson()); return(result); } catch (Exception ex) { traceType.WriteError("Unable to create repair task. Errors: {0}", ex.GetMessage()); activityLogger.LogOperation(activityId, startTime, OperationResult.Failure, ex, repairTask.ToJson()); throw; } }
private bool IsBlocked(JobBlockingPolicy policy, IMappedTenantJob mappedTenantJob) { var job = mappedTenantJob.TenantJob; switch (policy) { case JobBlockingPolicy.BlockNone: return(false); case JobBlockingPolicy.BlockAllJobs: case JobBlockingPolicy.BlockAllNewJobs: return(true); case JobBlockingPolicy.BlockNewMaintenanceJob: return(job.IsRepairJobType()); case JobBlockingPolicy.BlockNewUpdateJob: return(job.IsUpdateJobType()); case JobBlockingPolicy.BlockNewImpactfulTenantUpdateJobs: return(job.IsTenantUpdateJobType() && (mappedTenantJob.ImpactedNodeCount > 0)); case JobBlockingPolicy.BlockNewImpactfulPlatformUpdateJobs: return(job.IsPlatformUpdateJobType() && (mappedTenantJob.ImpactedNodeCount > 0)); case JobBlockingPolicy.BlockNewImpactfulUpdateJobs: return(job.IsUpdateJobType() && (mappedTenantJob.ImpactedNodeCount > 0)); } traceType.WriteError("Invalid value for job blocking policy. Not approving any job till this is resolved. Policy: {0}", policy); return(true); }
private async Task PostDocumentToWireServerAsync(byte[] payload) { using (var client = new HttpClient()) { HttpContent content = new ByteArrayContent(payload); // for some reason, just Add throws an exception, whereas TryAddwithoutValidation works bool added1 = content.Headers.TryAddWithoutValidation(WebContentTypeKey, WebContentTypeValue); bool added2 = content.Headers.TryAddWithoutValidation(WebContentLengthKey, payload.Length.ToString()); client.DefaultRequestHeaders .Accept .Add(new MediaTypeWithQualityHeaderValue("application/octet-stream")); traceType.WriteNoise( "Added headers before post document, url: {0}{1}" + "Content headers: {2}, TryAddWithoutValidation status: {3}, {4}{5}" + "Request headers: {6}", zeroSdkUri.OriginalString, Environment.NewLine, content.Headers.ToJson(), added1, added2, Environment.NewLine, client.DefaultRequestHeaders.ToJson()); using (HttpResponseMessage responseMessage = await client.PostAsync(zeroSdkUri, content).ConfigureAwait(false)) { if (responseMessage.IsSuccessStatusCode) { traceType.WriteInfo("Post document returned HTTP status code: {0}", responseMessage.StatusCode); return; } string message = "Post document failed with HTTP status code {0}{1}Response message: {2}".ToString( responseMessage.StatusCode, Environment.NewLine, responseMessage.ToJson()); traceType.WriteError("{0}", message); if (responseMessage.StatusCode == HttpStatusCode.GatewayTimeout || responseMessage.StatusCode == HttpStatusCode.RequestTimeout) { throw new TimeoutException(message); } throw new ManagementException(message); } } }
/// <remarks> /// Returning bool instead of void since we want to not 'throw' from this method to preserve exception stack info. /// </remarks> private bool HandleCoordinatorException(Exception ex, DateTimeOffset lastSuccessfulRunAt) { if (ex is ManagementChannelTerminallyUnhealthyException) { return(false); } var warningThreshold = TimeSpan.FromSeconds(configSection.ReadConfigValue(Constants.ConfigKeys.CoordinatorFailureWarningThreshold, 120)); var maxRetryDuration = TimeSpan.FromSeconds(configSection.ReadConfigValue(Constants.ConfigKeys.CoordinatorFailureRetryDuration, 900)); var now = DateTimeOffset.UtcNow; var elapsed = now - lastSuccessfulRunAt; if (elapsed > maxRetryDuration) { var message = "Not retrying further since 'CoordinatorFailureRetryDuration' of {0} has been exceeded".ToString(maxRetryDuration); traceType.WriteError("Error while processing policy agent document. {0}. Exception: {1}", message, ex); return(false); } string warningText = null; if (elapsed > warningThreshold) { UpdateCoordinatorHealthStatus( HealthState.Warning, "Azure coordinator encountered errors. Last successful run was at: {0:O}. Last error: {1}" .ToString(lastSuccessfulRunAt, ex)); warningText = "Health warning reported since 'CoordinatorFailureWarningThreshold' of {0} has exceeded. ".ToString( warningThreshold); } traceType.WriteWarning( "Error while processing policy agent document. Last successful run was at: {0:O}. {1}Retrying until 'CoordinatorFailureRetryDuration' of {2} is exceeded. Exception: {3}", lastSuccessfulRunAt, warningText, maxRetryDuration, ex); return(true); }
public AutopilotInfrastructureCoordinatorFactory( Uri serviceName, IConfigStore configStore, string configSectionName, Guid partitionId, long replicaId) { this.serviceName = serviceName.Validate("serviceName"); configStore.Validate("configStore"); configSectionName.Validate("configSectionName"); this.configSection = new ConfigSection(new TraceType(Constants.TraceTypeName + "Config"), configStore, configSectionName); this.partitionId = partitionId; this.replicaId = replicaId; this.env = new CoordinatorEnvironment(this.serviceName.AbsoluteUri, this.configSection, string.Empty); TraceType factoryTraceType = this.env.CreateTraceType("Factory"); factoryTraceType.WriteInfo( "Autopilot coordinator factory created; AppRoot = '{0}'", Environment.GetEnvironmentVariable("AppRoot")); try { if (APRuntime.IsInitialized) { factoryTraceType.WriteInfo("APRuntime is already initialized"); } else { factoryTraceType.WriteInfo("Initializing APRuntime"); APRuntime.Initialize(); factoryTraceType.WriteInfo("Initialized APRuntime successfully"); } } catch (Exception e) { factoryTraceType.WriteError("Failed to initialize APRuntime: {0}", e); throw; } }
private async Task ProcessAskModeAsync(CancellationToken cancellationToken) { try { await ProcessAskModeCoreAsync(cancellationToken).ConfigureAwait(false); } catch (Exception ex) { if (cancellationToken.IsCancellationRequested && (ex is OperationCanceledException)) { traceType.WriteInfo("ProcessAskModeAsync cancelled"); } else { traceType.WriteError("Error in ProcessAskModeAsync. Exiting process. Exception: {0}", ex); // the Program's Main method is waiting on this global event. Once set, it exits the process. // Workaround over using Environment.Exit which isn't supported on CoreCLR. Please see this // method's comments for more. ProcessCloser.ExitEvent.Set(); } } }