public UriSources GetOrCreateSources(StandardizedUri uri) { if (!_uris.TryGetValue(uri, out var sources)) { sources = new UriSources(uri); _uris[uri] = sources; } return(sources); }
private async Task Visit(StandardizedUri uri) { UriSources sources = null; string html = ""; try { (Uri finalUri, string content) = await DownloadPageAsync(uri.Standardized); html = content; sources = _linkTracker.CombineIfRedirection(uri, finalUri); } catch (HttpRequestException ex) { sources = _linkTracker.GetOrCreateSources(uri); sources.Status = SpiderPageStatus.Error; sources.Error = ex.Message; Debug.WriteLine($"Error downloading page ({uri}): {ex.Message}"); } if (sources.Status != SpiderPageStatus.Unvisited) { return; } if (!IsHtml(html)) { sources.Status = SpiderPageStatus.Excluded; return; } sources.Status = SpiderPageStatus.Visited; if (_options.FollowLinks) { ParseLinks(sources.Uri.Uri, html).ForEach(FoundLink); } }