Пример #1
0
        public async Task <PageCheckResult> GetPageResult(string pageUrl)
        {
            await _throttler.WaitAsync();

            try
            {
                string rawHtml = await _client.GetRawResultOfBasicGetRequestAsync(pageUrl);

                var page       = new Page(rawHtml, pageUrl);
                var pageResult = new PageCheckResult(pageUrl, page.Title);

                CheckForOutdated(page, pageResult);
                CheckForProblematicWords(page, pageResult);
                await CheckForBadLinks(page.InPostExternalLinks, pageResult);

                return(pageResult);
            }
            catch (Exception ex)
            {
                return(new PageCheckResult(pageUrl, string.Empty, "This page generated an exception on parsing."));
            }
            finally
            {
                _throttler.Release();
            }
        }
Пример #2
0
 private void CheckForOutdated(Page page, PageCheckResult pageResult)
 {
     if (ContainsNonCurrentYear(page.Title) || page.H1s.Any(h1 => ContainsNonCurrentYear(h1)))
     {
         pageResult.AddIssue("Is possibly outdated");
     }
 }
Пример #3
0
        public async Task <PageCheckResult> GetPageResultAsync(string url)
        {
            var rawHtml = await _client.GetRawResultOfBasicGetRequestAsync(url);

            var page       = new Page(rawHtml, url);
            var pageResult = new PageCheckResult(url, page.Title);

            CheckForOutdated(page, pageResult);

            return(pageResult);
        }
Пример #4
0
        private async Task CheckForBadLinks(IEnumerable <Link> inPostExternalLinks, PageCheckResult pageResult)
        {
            var nonSslLinks = inPostExternalLinks.Where(ipel => ipel.Url.StartsWith("http://"));

            pageResult.AddIssues(nonSslLinks.Select(nssl => $"Non-ssl link {nssl.Url} with anchor text {nssl.AnchorText}."));

            var linkCheckingTasks = new List <Task <string> >(inPostExternalLinks.Select(ipl => GetIssuesWithLink(ipl)));

            await Task.WhenAll(linkCheckingTasks);

            var badResponseCodeLinkProblems = linkCheckingTasks.Select(lct => lct.Result).Where(lcr => !string.IsNullOrEmpty(lcr));

            pageResult.AddIssues(badResponseCodeLinkProblems);
        }
Пример #5
0
        private void CheckForProblematicWords(Page page, PageCheckResult pageResult)
        {
            var problemWords = GetProblematicWords(page).ToList();

            pageResult.AddIssues(problemWords.Select(pw => $"Contains term \"{pw}\""));
        }
Пример #6
0
 public void AddPageCheckResult(PageCheckResult result)
 {
     _pageCheckResults.Add(result);
 }