private async Task <bool> TryPopulateNewJobFields() { // Build query based on new search params // Find supported, but not yet searchable params var possibleNotYetIndexedParams = _supportedSearchParameterDefinitionManager.GetSearchParametersRequiringReindexing(); var notYetIndexedParams = new List <SearchParameterInfo>(); var resourceList = new HashSet <string>(); // filter list of SearchParameters by the target resource types if (_reindexJobRecord.TargetResourceTypes.Any()) { foreach (var searchParam in possibleNotYetIndexedParams) { var searchParamResourceTypes = GetDerivedResourceTypes(searchParam.BaseResourceTypes); var matchingResourceTypes = searchParamResourceTypes.Intersect(_reindexJobRecord.TargetResourceTypes); if (matchingResourceTypes.Any()) { notYetIndexedParams.Add(searchParam); // add matching resource types to the set of resource types which we will reindex resourceList.UnionWith(matchingResourceTypes); } else { _logger.LogInformation("Search parameter {url} is not being reindexed as it does not match the target types of reindex job {reindexid}.", searchParam.Url, _reindexJobRecord.Id); } } } else { notYetIndexedParams.AddRange(possibleNotYetIndexedParams); // From the param list, get the list of necessary resources which should be // included in our query foreach (var param in notYetIndexedParams) { var searchParamResourceTypes = GetDerivedResourceTypes(param.BaseResourceTypes); resourceList.UnionWith(searchParamResourceTypes); } } // if there are not any parameters which are supported but not yet indexed, then we have nothing to do if (!notYetIndexedParams.Any()) { _reindexJobRecord.Error.Add(new OperationOutcomeIssue( OperationOutcomeConstants.IssueSeverity.Information, OperationOutcomeConstants.IssueType.Informational, Core.Resources.NoSearchParametersNeededToBeIndexed)); _reindexJobRecord.CanceledTime = Clock.UtcNow; await MoveToFinalStatusAsync(OperationStatus.Canceled); return(false); } // Save the list of resource types in the reindexjob document foreach (var resource in resourceList) { _reindexJobRecord.Resources.Add(resource); } // save the list of search parameters to the reindexjob document foreach (var searchParams in notYetIndexedParams.Select(p => p.Url.OriginalString)) { _reindexJobRecord.SearchParams.Add(searchParams); } await CalculateAndSetTotalAndResourceCounts(); if (_reindexJobRecord.Count == 0) { _reindexJobRecord.Error.Add(new OperationOutcomeIssue( OperationOutcomeConstants.IssueSeverity.Information, OperationOutcomeConstants.IssueType.Informational, Core.Resources.NoResourcesNeedToBeReindexed)); await UpdateParametersAndCompleteJob(); return(false); } // Generate separate queries for each resource type and add them to query list. foreach (string resourceType in _reindexJobRecord.Resources) { // Checking resource specific counts is a performance improvement, // so if an entry for this resource failed to get added to the count dictionary, run a query anyways if (!_reindexJobRecord.ResourceCounts.ContainsKey(resourceType) || _reindexJobRecord.ResourceCounts[resourceType] > 0) { var query = new ReindexJobQueryStatus(resourceType, continuationToken: null) { LastModified = Clock.UtcNow, Status = OperationStatus.Queued, }; _reindexJobRecord.QueryList.TryAdd(query, 1); } } await UpdateJobAsync(); _throttleController.UpdateDatastoreUsage(); return(true); }
/// <inheritdoc /> public async Task ExecuteAsync(ReindexJobRecord reindexJobRecord, WeakETag weakETag, CancellationToken cancellationToken) { EnsureArg.IsNotNull(reindexJobRecord, nameof(reindexJobRecord)); EnsureArg.IsNotNull(weakETag, nameof(weakETag)); _reindexJobRecord = reindexJobRecord; _weakETag = weakETag; var jobSemaphore = new SemaphoreSlim(1, 1); var existingFhirRequestContext = _contextAccessor.FhirRequestContext; try { // Add a request context so Datastore consumption can be added var fhirRequestContext = new FhirRequestContext( method: OperationsConstants.Reindex, uriString: "$reindex", baseUriString: "$reindex", correlationId: _reindexJobRecord.Id, requestHeaders: new Dictionary <string, StringValues>(), responseHeaders: new Dictionary <string, StringValues>()) { IsBackgroundTask = true, AuditEventType = OperationsConstants.Reindex, }; _contextAccessor.FhirRequestContext = fhirRequestContext; using (IScoped <IFhirDataStore> store = _fhirDataStoreFactory()) { var provisionedCapacity = await store.Value.GetProvisionedDataStoreCapacityAsync(cancellationToken); _throttleController.Initialize(_reindexJobRecord, provisionedCapacity); } if (_reindexJobRecord.Status != OperationStatus.Running || _reindexJobRecord.StartTime == null) { // update job record to running _reindexJobRecord.Status = OperationStatus.Running; _reindexJobRecord.StartTime = Clock.UtcNow; await UpdateJobAsync(cancellationToken); } // If we are resuming a job, we can detect that by checking the progress info from the job record. // If no queries have been added to the progress then this is a new job if (_reindexJobRecord.QueryList?.Count == 0) { // Build query based on new search params // Find supported, but not yet searchable params var notYetIndexedParams = _supportedSearchParameterDefinitionManager.GetSearchParametersRequiringReindexing(); // if there are not any parameters which are supported but not yet indexed, then we have nothing to do if (!notYetIndexedParams.Any()) { _reindexJobRecord.Error.Add(new OperationOutcomeIssue( OperationOutcomeConstants.IssueSeverity.Information, OperationOutcomeConstants.IssueType.Informational, Resources.NoSearchParametersNeededToBeIndexed)); _reindexJobRecord.CanceledTime = DateTimeOffset.UtcNow; await CompleteJobAsync(OperationStatus.Canceled, cancellationToken); return; } // From the param list, get the list of necessary resources which should be // included in our query var resourceList = new HashSet <string>(); foreach (var param in notYetIndexedParams) { foreach (var baseResourceType in param.BaseResourceTypes) { if (baseResourceType == KnownResourceTypes.Resource) { resourceList.UnionWith(_modelInfoProvider.GetResourceTypeNames().ToHashSet()); // We added all possible resource types, so no need to continue break; } if (baseResourceType == KnownResourceTypes.DomainResource) { var domainResourceChildResourceTypes = _modelInfoProvider.GetResourceTypeNames().ToHashSet(); // Remove types that inherit from Resource directly domainResourceChildResourceTypes.Remove(KnownResourceTypes.Binary); domainResourceChildResourceTypes.Remove(KnownResourceTypes.Bundle); domainResourceChildResourceTypes.Remove(KnownResourceTypes.Parameters); resourceList.UnionWith(domainResourceChildResourceTypes); } else { resourceList.UnionWith(new[] { baseResourceType }); } } } _reindexJobRecord.Resources.AddRange(resourceList); _reindexJobRecord.SearchParams.AddRange(notYetIndexedParams.Select(p => p.Url.ToString())); await CalculateTotalAndResourceCounts(cancellationToken); if (_reindexJobRecord.Count == 0) { _reindexJobRecord.Error.Add(new OperationOutcomeIssue( OperationOutcomeConstants.IssueSeverity.Information, OperationOutcomeConstants.IssueType.Informational, Resources.NoResourcesNeedToBeReindexed)); await UpdateParametersAndCompleteJob(cancellationToken); return; } // Generate separate queries for each resource type and add them to query list. foreach (string resourceType in _reindexJobRecord.Resources) { // Checking resource specific counts is a performance improvement, // so if an entry for this resource failed to get added to the count dictionary, run a query anyways if (!_reindexJobRecord.ResourceCounts.ContainsKey(resourceType) || _reindexJobRecord.ResourceCounts[resourceType] > 0) { var query = new ReindexJobQueryStatus(resourceType, continuationToken: null) { LastModified = Clock.UtcNow, Status = OperationStatus.Queued, }; _reindexJobRecord.QueryList.TryAdd(query, 1); } } await UpdateJobAsync(cancellationToken); _throttleController.UpdateDatastoreUsage(); } var queryTasks = new List <Task <ReindexJobQueryStatus> >(); var queryCancellationTokens = new Dictionary <ReindexJobQueryStatus, CancellationTokenSource>(); // while not all queries are finished while (_reindexJobRecord.QueryList.Keys.Where(q => q.Status == OperationStatus.Queued || q.Status == OperationStatus.Running).Any()) { if (_reindexJobRecord.QueryList.Keys.Where(q => q.Status == OperationStatus.Queued).Any()) { // grab the next query from the list which is labeled as queued and run it var query = _reindexJobRecord.QueryList.Keys.Where(q => q.Status == OperationStatus.Queued).OrderBy(q => q.LastModified).FirstOrDefault(); CancellationTokenSource queryTokensSource = new CancellationTokenSource(); queryCancellationTokens.TryAdd(query, queryTokensSource); #pragma warning disable CS4014 // Suppressed as we want to continue execution and begin processing the next query while this continues to run queryTasks.Add(ProcessQueryAsync(query, jobSemaphore, queryTokensSource.Token)); #pragma warning restore CS4014 // Because this call is not awaited, execution of the current method continues before the call is completed _logger.LogInformation($"Reindex job task created {queryTasks.Count} Tasks"); } // reset stale queries to pending var staleQueries = _reindexJobRecord.QueryList.Keys.Where( q => q.Status == OperationStatus.Running && q.LastModified < Clock.UtcNow - _reindexJobConfiguration.JobHeartbeatTimeoutThreshold); foreach (var staleQuery in staleQueries) { await jobSemaphore.WaitAsync(); try { // if this query has a created task, cancel it if (queryCancellationTokens.TryGetValue(staleQuery, out var tokenSource)) { try { tokenSource.Cancel(false); } catch { // may throw exception if the task is disposed } } staleQuery.Status = OperationStatus.Queued; await UpdateJobAsync(cancellationToken); } finally { jobSemaphore.Release(); } } var averageDbConsumption = _throttleController.UpdateDatastoreUsage(); _logger.LogInformation($"Reindex avaerage DB consumption: {averageDbConsumption}"); var throttleDelayTime = _throttleController.GetThrottleBasedDelay(); _logger.LogInformation($"Reindex throttle delay: {throttleDelayTime}"); await Task.Delay(_reindexJobRecord.QueryDelayIntervalInMilliseconds + throttleDelayTime); // Remove all finished tasks from the collections of tasks // and cancellationTokens if (queryTasks.Count >= reindexJobRecord.MaximumConcurrency) { var taskArray = queryTasks.ToArray(); Task.WaitAny(taskArray); var finishedTasks = queryTasks.Where(t => t.IsCompleted).ToArray(); foreach (var finishedTask in finishedTasks) { queryTasks.Remove(finishedTask); queryCancellationTokens.Remove(await finishedTask); } } // if our received CancellationToken is cancelled we should // pass that cancellation request onto all the cancellationTokens // for the currently executing threads if (cancellationToken.IsCancellationRequested) { foreach (var tokenSource in queryCancellationTokens.Values) { tokenSource.Cancel(false); } } } Task.WaitAll(queryTasks.ToArray()); await jobSemaphore.WaitAsync(); try { await CheckJobCompletionStatus(cancellationToken); } finally { jobSemaphore.Release(); } } catch (JobConflictException) { // The reindex job was updated externally. _logger.LogInformation("The job was updated by another process."); } catch (Exception ex) { await jobSemaphore.WaitAsync(); try { _reindexJobRecord.Error.Add(new OperationOutcomeIssue( OperationOutcomeConstants.IssueSeverity.Error, OperationOutcomeConstants.IssueType.Exception, ex.Message)); _reindexJobRecord.FailureCount++; _logger.LogError(ex, "Encountered an unhandled exception. The job failure count increased to {failureCount}.", _reindexJobRecord.FailureCount); await UpdateJobAsync(cancellationToken); if (_reindexJobRecord.FailureCount >= _reindexJobConfiguration.ConsecutiveFailuresThreshold) { await CompleteJobAsync(OperationStatus.Failed, cancellationToken); } else { _reindexJobRecord.Status = OperationStatus.Queued; await UpdateJobAsync(cancellationToken); } } finally { jobSemaphore.Release(); } } finally { jobSemaphore.Dispose(); _contextAccessor.FhirRequestContext = existingFhirRequestContext; } }
private async Task <bool> TryPopulateNewJobFields() { // Build query based on new search params // Find supported, but not yet searchable params var notYetIndexedParams = _supportedSearchParameterDefinitionManager.GetSearchParametersRequiringReindexing(); // if there are not any parameters which are supported but not yet indexed, then we have nothing to do if (!notYetIndexedParams.Any()) { _reindexJobRecord.Error.Add(new OperationOutcomeIssue( OperationOutcomeConstants.IssueSeverity.Information, OperationOutcomeConstants.IssueType.Informational, Resources.NoSearchParametersNeededToBeIndexed)); _reindexJobRecord.CanceledTime = Clock.UtcNow; await MoveToFinalStatusAsync(OperationStatus.Canceled); return(false); } // From the param list, get the list of necessary resources which should be // included in our query var resourceList = new HashSet <string>(); foreach (var param in notYetIndexedParams) { foreach (var baseResourceType in param.BaseResourceTypes) { if (baseResourceType == KnownResourceTypes.Resource) { resourceList.UnionWith(_modelInfoProvider.GetResourceTypeNames().ToHashSet()); // We added all possible resource types, so no need to continue break; } if (baseResourceType == KnownResourceTypes.DomainResource) { var domainResourceChildResourceTypes = _modelInfoProvider.GetResourceTypeNames().ToHashSet(); // Remove types that inherit from Resource directly domainResourceChildResourceTypes.Remove(KnownResourceTypes.Binary); domainResourceChildResourceTypes.Remove(KnownResourceTypes.Bundle); domainResourceChildResourceTypes.Remove(KnownResourceTypes.Parameters); resourceList.UnionWith(domainResourceChildResourceTypes); } else { resourceList.UnionWith(new[] { baseResourceType }); } } } foreach (var resource in resourceList) { _reindexJobRecord.Resources.Add(resource); } foreach (var searchParams in notYetIndexedParams.Select(p => p.Url.ToString())) { _reindexJobRecord.SearchParams.Add(searchParams); } await CalculateTotalAndResourceCounts(); if (_reindexJobRecord.Count == 0) { _reindexJobRecord.Error.Add(new OperationOutcomeIssue( OperationOutcomeConstants.IssueSeverity.Information, OperationOutcomeConstants.IssueType.Informational, Resources.NoResourcesNeedToBeReindexed)); await UpdateParametersAndCompleteJob(); return(false); } // Generate separate queries for each resource type and add them to query list. foreach (string resourceType in _reindexJobRecord.Resources) { // Checking resource specific counts is a performance improvement, // so if an entry for this resource failed to get added to the count dictionary, run a query anyways if (!_reindexJobRecord.ResourceCounts.ContainsKey(resourceType) || _reindexJobRecord.ResourceCounts[resourceType] > 0) { var query = new ReindexJobQueryStatus(resourceType, continuationToken: null) { LastModified = Clock.UtcNow, Status = OperationStatus.Queued, }; _reindexJobRecord.QueryList.TryAdd(query, 1); } } await UpdateJobAsync(); _throttleController.UpdateDatastoreUsage(); return(true); }