public async Task <PageCheckResult> GetPageResult(string pageUrl) { await _throttler.WaitAsync(); try { string rawHtml = await _client.GetRawResultOfBasicGetRequestAsync(pageUrl); var page = new Page(rawHtml, pageUrl); var pageResult = new PageCheckResult(pageUrl, page.Title); CheckForOutdated(page, pageResult); CheckForProblematicWords(page, pageResult); await CheckForBadLinks(page.InPostExternalLinks, pageResult); return(pageResult); } catch (Exception ex) { return(new PageCheckResult(pageUrl, string.Empty, "This page generated an exception on parsing.")); } finally { _throttler.Release(); } }
private void CheckForOutdated(Page page, PageCheckResult pageResult) { if (ContainsNonCurrentYear(page.Title) || page.H1s.Any(h1 => ContainsNonCurrentYear(h1))) { pageResult.AddIssue("Is possibly outdated"); } }
public async Task <PageCheckResult> GetPageResultAsync(string url) { var rawHtml = await _client.GetRawResultOfBasicGetRequestAsync(url); var page = new Page(rawHtml, url); var pageResult = new PageCheckResult(url, page.Title); CheckForOutdated(page, pageResult); return(pageResult); }
private async Task CheckForBadLinks(IEnumerable <Link> inPostExternalLinks, PageCheckResult pageResult) { var nonSslLinks = inPostExternalLinks.Where(ipel => ipel.Url.StartsWith("http://")); pageResult.AddIssues(nonSslLinks.Select(nssl => $"Non-ssl link {nssl.Url} with anchor text {nssl.AnchorText}.")); var linkCheckingTasks = new List <Task <string> >(inPostExternalLinks.Select(ipl => GetIssuesWithLink(ipl))); await Task.WhenAll(linkCheckingTasks); var badResponseCodeLinkProblems = linkCheckingTasks.Select(lct => lct.Result).Where(lcr => !string.IsNullOrEmpty(lcr)); pageResult.AddIssues(badResponseCodeLinkProblems); }
private void CheckForProblematicWords(Page page, PageCheckResult pageResult) { var problemWords = GetProblematicWords(page).ToList(); pageResult.AddIssues(problemWords.Select(pw => $"Contains term \"{pw}\"")); }
public void AddPageCheckResult(PageCheckResult result) { _pageCheckResults.Add(result); }