public List <string> GetPages(string url) { url = url.ToLower(); if (_urls.Contains(url)) { return(_urls); } _urls.Add(url); try { var html = _client.DownloadString(url); var anchors = _document.GetAnchors(html); foreach (var anchor in anchors) { var childUrl = anchor.GetHref(); if (_website.IsMailToUrl(childUrl)) { continue; } if (_website.IsExternalUrl(childUrl)) { continue; } if (!_website.IsHtmlUrl(childUrl)) { continue; } if (_website.IsSiteRelativeUrl(childUrl)) { childUrl = _website.GetAbsoluteFromSiteRelativeUrl(childUrl); } if (_website.IsPageRelativeUrl(childUrl)) { childUrl = _website.GetAbsoluteFromPageRelativeUrl(url, childUrl); } GetPages(childUrl); } } catch (WebException ex) { _log.LogFail(url, ex); } return(_urls); }
public void Check(string url) { _logger.LogPage(url); var html = _client.DownloadString(url); var anchors = _document.GetAnchors(html); foreach (var anchor in anchors) { var childUrl = anchor.GetHref(); if (_website.IsMailToUrl(childUrl)) { continue; } if (_website.IsSiteRelativeUrl(childUrl)) { childUrl = _website.GetAbsoluteFromSiteRelativeUrl(childUrl); } if (_website.IsPageRelativeUrl(childUrl)) { childUrl = _website.GetAbsoluteFromPageRelativeUrl(url, childUrl); } if (_dictionary.HasLink(childUrl)) { _logger.LogPass(childUrl); continue; } try { _client.DownloadString(childUrl); _dictionary.AddLink(childUrl); _logger.LogPass(childUrl); } catch (WebException ex) { _logger.LogFail(childUrl, ex); } } }