Exemplo n.º 1
0
        public List <JobUrl> ExtractPageUrls(int pageLimit = 20)
        {
            int pageCounter     = 0;
            var continueParsing = true;
            var results         = new List <JobUrl>();


            while (continueParsing && pageCounter < pageLimit)
            {
                string html;
                try
                {
                    //Have some delay in parsing
                    Thread.Sleep(_sleepTime);

                    pageCounter += 1;
                    Log.Information("Scraping page {pageIndex}", pageCounter);

                    // make this async
                    html = _httpClient.GetStringAsync(_httpClient.BaseAddress.ToString() + pageCounter).Result;
                }
                catch (Exception ex)
                {
                    // html is empty
                    html = "";
                }

                var pageResults = _scraper.ExtractPageUrls(html);

                // no more found - stop parsing
                if (!pageResults.Any())
                {
                    Log.Information("Finished scraping page urls");
                    continueParsing = false;
                }

                results.AddRange(pageResults);
            }

            return(results);
        }