private async Task <IndexRequest> Index(InProgressIndexRequest request) { var siteHost = GetHost(request.Url); var siteHostWithDotBefore = '.' + siteHost; var siteMapUrl = new Uri(request.Url, "/sitemap.xml"); var siteMap = await siteMapGetter.GetSiteMap(siteMapUrl); if (pagesPerSiteLimiter.IsLimitReached(siteMap.Links.Length)) { return(request.SetError(GetTooManyPagesErrorMessage( request.Url, siteMap.Links.Length, pagesPerSiteLimiter.PagesPerSiteLimit) )); } var urlsToIndex = new ConcurrentDictionary <Uri, byte>(); urlsToIndex.TryAdd(request.Url, default); siteMap.Links .Where(uri => (uri.Host == siteHost || uri.Host.EndsWith(siteHostWithDotBefore)) && uri != request.Url) .Distinct() .ForEach(uri => urlsToIndex.TryAdd(uri, default)); request.UpdatePagesCounts(0, urlsToIndex.Count); indexRequestsQueue.Update(request); Result <string> indexingResult; var indexedUrls = new ConcurrentDictionary <Uri, byte>(); var isUrlFromRequestIndexed = false; var indexedUrlsRoughCount = 0; var semaphore = new SemaphoreSlim(32); var indexingTasks = new ConcurrentDictionary <Task, byte>(); var completedIndexingTasks = new ConcurrentStack <Task <Result <string> > >(); while (!urlsToIndex.IsEmpty) { Uri currentUrl; if (!isUrlFromRequestIndexed) { currentUrl = request.Url; isUrlFromRequestIndexed = true; } else { currentUrl = urlsToIndex.Keys.First(); } urlsToIndex.TryRemove(currentUrl, out _); indexedUrls.TryAdd(currentUrl, default); if (semaphore.CurrentCount == 0) { var indexedPagesCount = indexedUrls.Count; var foundPagesCount = indexedPagesCount + urlsToIndex.Count; request.UpdatePagesCounts(indexedPagesCount, foundPagesCount); #pragma warning disable 4014 indexRequestsQueue.UpdateAsync(request); #pragma warning restore 4014 } semaphore.Wait(); indexingTasks.TryAdd( IndexPage(currentUrl, request, siteHost, siteHostWithDotBefore, urlsToIndex, indexedUrls) .ContinueWith(task => { semaphore.Release(); indexingTasks.TryRemove(task, out _); completedIndexingTasks.Push(task); }), default ); indexingResult = CheckResultOfCompletedTasks(); if (indexingResult.IsFailure) { return(request.SetError(indexingResult.Error)); } while (urlsToIndex.IsEmpty) { var indexingTask = indexingTasks.Keys.FirstOrDefault(); if (indexingTask == null) { break; } indexingTasks.TryRemove(indexingTask, out _); indexingTask.Wait(); } if (indexedUrls.Count / 200 > indexedUrlsRoughCount / 200) { GC.Collect(); indexedUrlsRoughCount = indexedUrls.Count; } // ReSharper disable once InvertIf if (pagesPerSiteLimiter.IsLimitReached(indexedUrls.Count)) { Task.WaitAll(indexingTasks.Keys.ToArray()); _client.DeleteMany( indexedUrls.Keys .Where(uri => uri != request.Url) .Select(uri => uri.ToString()), _options.DocumentsIndexName ); return(request.SetError(GetTooManyPagesErrorMessage( request.Url, indexedUrls.Count, pagesPerSiteLimiter.PagesPerSiteLimit) )); } } Task.WaitAll(indexingTasks.Keys.ToArray()); indexingResult = CheckResultOfCompletedTasks(); if (indexingResult.IsFailure) { return(request.SetError(indexingResult.Error)); } Debug.Assert(urlsToIndex.Count == 0, "После завершения индексации остались непроиндексированные страницы"); request.UpdatePagesCounts(indexedUrls.Count, indexedUrls.Count); return(request.SetIndexed(DateTime.UtcNow)); Result <string> CheckResultOfCompletedTasks() { while (completedIndexingTasks.TryPop(out var completedTask)) { var result = completedTask.Result; if (result.IsFailure) { return(result); } } return(Result <string> .Success()); } }