protected async Task <bool> FixVoting(IHtmlDocument doc)
        {
            var votingDiv = doc.QuerySelector("div.voting");

            if (votingDiv == null)
            {
                return(false);
            }

            var linkElement = votingDiv.QuerySelector("a[id*='poll']");

            if (linkElement == null)
            {
                return(false);
            }

            var signatureElement = votingDiv.QuerySelector("input[name='signature']");
            var signature        = signatureElement.GetAttribute("value");

            var url = linkElement.GetAttribute("href");

            url += "&js&signature=" + signature;
            var resource = new DownloadResource()
            {
                Url = url
            };
            var res = await _dataDownloader.Download(resource);

            var resString = res.DownloadedData.AsAnsiString();
            var match     = Regex.Match(resString, @"get\('(\w+)'\)\.innerHTML\s+=\s+'([^']*)'");

            if (!match.Success)
            {
                return(false);
            }

            var divId      = match.Groups[1].Value;
            var newHtml    = match.Groups[2].Value.Replace(@"\""", @"""");
            var replaceDiv = doc.QuerySelector($"#{divId}");

            if (replaceDiv == null)
            {
                return(false);
            }
            replaceDiv.InnerHtml = newHtml;
            votingDiv.QuerySelector("span[id*='spanpollaction']")?.Remove();

            return(true);
        }
Esempio n. 2
0
        private async Task <bool> ScanDateUrlsAsync(IEnumerable <DiaryDatePage> datePages, CancellationToken cancellationToken)
        {
            var pattern = $@"({_options.DiaryName}\.diary\.ru\/p\w+\.htm).{{0,30}}URL";

            foreach (var datePage in datePages)
            {
                cancellationToken.ThrowIfCancellationRequested();
                var downloadResult = await _downloader.Download(datePage, false, _options.RequestDelay);

                var html    = downloadResult.DownloadedData.AsAnsiString();
                var matches = Regex.Matches(html, pattern, RegexOptions.IgnoreCase);
                foreach (Match m in matches)
                {
                    cancellationToken.ThrowIfCancellationRequested();
                    await DownloadPostAsync("http://" + m.Groups[1].Value, datePage.PostDate);
                }

                Progress.IncrementInt(ScrapeProgressNames.DatePagesProcessed, 1);
                await _downloadExistingChecker.AddProcessedDataAsync(datePage);
            }
            return(true);
        }