Ejemplo n.º 1
0
        private async Task CrawlMainPageAsync(Uri root, IHtmlDocument document, SejmCrawlerOptions options)
        {
            var currentTermOfOffice = new TermOfOffice
            {
                Name = GetNameFromPage(root, document),
                Url  = root
            };

            if (options.SearchTermOfServices)
            {
                NewTermOfOfficeFound(currentTermOfOffice);
            }

            var crawlData = new CrawlData(this, root, options);

            if (!options.SkipArchiwum)
            {
                await ProcessArchiwumAsync(root, document, crawlData)
                .ConfigureAwait(false);
            }
            else
            {
                logger.LogInformation("Skipping archiwum.");
            }

            if (options.SearchDeputies)
            {
                await ProcessDeputies(document, currentTermOfOffice, crawlData)
                .ConfigureAwait(false);
            }
            else
            {
                logger.LogInformation("Skipping current deputies.");
            }
        }
Ejemplo n.º 2
0
        public CrawlData(IDataNotifier dataNotifier, Uri rootUrl,
                         SejmCrawlerOptions options)
        {
            Require.NotNull(dataNotifier, nameof(dataNotifier));
            Require.NotNull(rootUrl, nameof(rootUrl));
            Require.NotNull(options, nameof(options));

            DataNotifier = dataNotifier;
            RootUrl      = rootUrl;
            Options      = options;
        }
Ejemplo n.º 3
0
        public async Task CrawlAsync(Uri root, SejmCrawlerOptions options)
        {
            Require.NotNull(root, nameof(root));
            Require.NotNull(options, nameof(options));

            logger.LogInformation("Start crawling {Url}.", root);
            var perfWatch = Stopwatch.StartNew();

            var mainPage = await pageRequester.MakeRequestAsync(root)
                           .ConfigureAwait(false);

            var document = mainPage.AngleSharpHtmlDocument;

            await CrawlMainPageAsync(root, document, options)
            .ConfigureAwait(false);

            logger.LogInformation("Crawling {Url} finished. Took {Elapsed}", root, perfWatch.Elapsed);
        }