private async Task <BatchIndexingOptions> GetBatchOptionsAsync(IndexDocumentConfiguration configuration, IndexingOptions options) { var result = new BatchIndexingOptions { DocumentType = options.DocumentType, DocumentIds = options.DocumentIds, StartDate = options.StartDate, EndDate = options.EndDate, BatchSize = options.BatchSize, PrimaryDocumentBuilder = configuration.DocumentSource.DocumentBuilder, SecondaryDocumentBuilders = configuration.RelatedSources?.Where(s => s.DocumentBuilder != null).Select(s => s.DocumentBuilder).ToList(), }; if (options.DocumentIds != null) { result.TotalCount = options.DocumentIds.Count; } else { result.ChangesProvidersAndTotalCounts = await GetChangesProvidersAndTotalCountsAsync(configuration, options.StartDate, options.EndDate); result.TotalCount = result.ChangesProvidersAndTotalCounts.Sum(p => p.TotalCount); } return(result); }
protected virtual async Task <IList <IndexDocumentChange> > GetChangesAsync(BatchIndexingOptions batchOptions, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); // Request changes only from those providers that reported total count greater than the skip value var tasks = batchOptions.ChangesProvidersAndTotalCounts .Where(p => batchOptions.Skip < p.TotalCount) .Select(p => p.Provider.GetChangesAsync(batchOptions.StartDate, batchOptions.EndDate, batchOptions.Skip, batchOptions.BatchSize)); var results = await Task.WhenAll(tasks); var result = results.Where(r => r != null).SelectMany(r => r).ToList(); return(result); }
protected virtual async Task <BatchIndexingResult> ProcessBatchAsync(BatchIndexingOptions batchOptions, CancellationToken cancellationToken) { var result = new BatchIndexingResult(); if (batchOptions.DocumentIds != null) { var documentIds = batchOptions.DocumentIds.Skip((int)batchOptions.Skip).Take(batchOptions.BatchSize).ToList(); result.IndexingResult = await ProcessDocumentsAsync(IndexDocumentChangeType.Modified, documentIds, batchOptions, cancellationToken); result.ProcessedCount += documentIds.Count; } else { var changes = await GetChangesAsync(batchOptions, cancellationToken); result.IndexingResult = await ProcessChangesAsync(changes, batchOptions, cancellationToken); result.ProcessedCount += changes.Count; } return(result); }
protected virtual async Task <IndexingResult> ProcessDocumentsAsync(IndexDocumentChangeType changeType, IList <string> documentIds, BatchIndexingOptions batchOptions, ICancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); IndexingResult result = null; if (changeType == IndexDocumentChangeType.Deleted) { result = await DeleteDocumentsAsync(batchOptions.DocumentType, documentIds.ToArray()); } else if (changeType == IndexDocumentChangeType.Modified) { result = await IndexDocumentsAsync(batchOptions.DocumentType, documentIds, batchOptions.PrimaryDocumentBuilder, batchOptions.SecondaryDocumentBuilders, cancellationToken); } return(result); }
protected virtual async Task <IndexingResult> ProcessChangesAsync(IEnumerable <IndexDocumentChange> changes, BatchIndexingOptions batchOptions, ICancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); var result = new IndexingResult(); var groups = GetLatestChangesForEachDocumentGroupedByChangeType(changes); foreach (var group in groups) { var changeType = group.Key; var documentIds = group.Value; var groupResult = await ProcessDocumentsAsync(changeType, documentIds, batchOptions, cancellationToken); if (groupResult?.Items != null) { if (result.Items == null) { result.Items = new List <IndexingResultItem>(); } result.Items.AddRange(groupResult.Items); } } return(result); }
protected virtual async Task ProcessConfigurationAsync(IndexDocumentConfiguration configuration, IndexingOptions options, Action <IndexingProgress> progressCallback, ICancellationToken cancellationToken) { if (configuration == null) { throw new ArgumentNullException(nameof(configuration)); } if (string.IsNullOrEmpty(configuration.DocumentType)) { throw new ArgumentNullException($"{nameof(configuration)}.{nameof(configuration.DocumentType)}"); } if (configuration.DocumentSource == null) { throw new ArgumentNullException($"{nameof(configuration)}.{nameof(configuration.DocumentSource)}"); } if (configuration.DocumentSource.ChangesProvider == null) { throw new ArgumentNullException($"{nameof(configuration)}.{nameof(configuration.DocumentSource)}.{nameof(configuration.DocumentSource.ChangesProvider)}"); } if (configuration.DocumentSource.DocumentBuilder == null) { throw new ArgumentNullException($"{nameof(configuration)}.{nameof(configuration.DocumentSource)}.{nameof(configuration.DocumentSource.DocumentBuilder)}"); } cancellationToken.ThrowIfCancellationRequested(); var documentType = options.DocumentType; progressCallback?.Invoke(new IndexingProgress($"{documentType}: calculating total count", documentType)); var batchOptions = new BatchIndexingOptions { DocumentType = options.DocumentType, PrimaryDocumentBuilder = configuration.DocumentSource.DocumentBuilder, SecondaryDocumentBuilders = configuration.RelatedSources ?.Where(s => s.DocumentBuilder != null) .Select(s => s.DocumentBuilder) .ToList(), }; var feeds = await GetChangeFeeds(configuration, options); // Try to get total count to indicate progress. Some feeds don't have a total count. var totalCount = feeds.Any(x => x.TotalCount == null) ? (long?)null : feeds.Sum(x => x.TotalCount ?? 0); long processedCount = 0; var changes = await GetNextChangesAsync(feeds); while (changes.Any()) { IList <string> errors = null; if (_backgroundWorker == null) { var indexingResult = await ProcessChangesAsync(changes, batchOptions, cancellationToken); errors = GetIndexingErrors(indexingResult); } else { // We're executing a job to index all documents or the changes since a specific time. // Priority for this indexation work should be quite low. var documentIds = changes .Select(x => x.DocumentId) .Distinct() .ToArray(); _backgroundWorker.IndexDocuments(configuration.DocumentType, documentIds, IndexingPriority.Background); } processedCount += changes.Count; var description = totalCount != null ? $"{documentType}: {processedCount} of {totalCount} have been indexed" : $"{documentType}: {processedCount} have been indexed"; progressCallback?.Invoke(new IndexingProgress(description, documentType, totalCount, processedCount, errors)); changes = await GetNextChangesAsync(feeds); } progressCallback?.Invoke(new IndexingProgress($"{documentType}: indexation finished", documentType, totalCount ?? processedCount, processedCount)); }