private async Task <List <string> > ProcessResponse(HttpResponseMessage response) { var html = await response.Content.ReadAsStringAsync(); if (string.IsNullOrWhiteSpace(html)) { return(new List <string>()); } var doc = new HtmlDocument(); doc.LoadHtml(html); if (doc.DocumentNode == null) { return(new List <string>()); } var next = doc.DocumentNode.SelectNodes("//a"); if (next == null || next.Count == 0) { return(new List <string>()); } var mainDomain = _baseUrl.UrlGetDomain(); var nextBach = new List <string>(); foreach (var link in next) { var url = link.GetAttributeValue("href", string.Empty); if (url.IsNullOrEmpty()) { continue; } if (url.StartsWith("/") && !url.StartsWith("//")) { url = (_baseUrl + url).UrlFixUrl(); } else if (url.StartsWith("/www.", StringComparison.InvariantCultureIgnoreCase)) { url = _baseUrl.ToUri().Scheme + url; } else if (url.StartsWith("//")) { url = _baseUrl.ToUri().Scheme + url; } var domain = url.UrlGetDomain(); if (domain != mainDomain) { continue; } if (_results.Any(d => d.Url.Equals(url, StringComparison.InvariantCultureIgnoreCase))) { continue; } if (_crawlBag.Any(d => d.Equals(url, StringComparison.InvariantCultureIgnoreCase))) { continue; } nextBach.Add(url); _crawlBag.Add(url); } var contentParser = new PageContentParser(); var pageContent = contentParser.GetPageContnent(doc); var md5 = pageContent.Md5Get(); var pageResult = new CrawlingPageResult(response.RequestMessage.RequestUri.OriginalString) { HttpStatusCode = response.StatusCode, MD5 = md5 }; _results.Add(pageResult); return(nextBach); }