private void ScrapeUrl(int start) { UpdateStatus("Scraping urls.", true); int intervalFrom = int.Parse(txtScrapUrlsFrom.Text); int intervalTo = int.Parse(txtScrapUrlsTo.Text); urlWorkerSleepsFor = random.Next(intervalFrom, intervalTo); try { ScrapeWorker worker = new ScrapeWorker(); googleResults = worker.ScrapeUrlsAsync(BuildUrl(keywords.Text, start)); HtmlParser.ParseUrls(googleResults).ForEach(x => urls.Add(x)); BindDataGrid(); UpdateStatus("Scraping done."); StartTimer(); workerTimer.Start(); UpdateWorkerStatus(null, new EventArgs()); } catch (Exception ex) { UpdateStatus(ex.Message); string autosavePath = Config.GetAutosavePath(); if (!string.IsNullOrEmpty(autosavePath)) { Save(Config.GetAutosavePath()); } } }
private async void ScrapeDetails(bool skipChecking = false) { UpdateStatus("Scraping details.", true); int emailScraperSleepsFrom = int.Parse(txtScrapEmailsFrom.Text); int emailScraperSleepsTo = int.Parse(txtScrapEmailsTo.Text); emailWorkerSleepsFor = random.Next(emailScraperSleepsFrom, emailScraperSleepsTo); try { ScrapeWorker worker = new ScrapeWorker(); string rawHtmlText = await worker.ScrapeDetailsAsync(urls[index].Root); var emails = keywords.Text.Split(' ').Where(x => x.Contains("@")); if (skipChecking) { urls[index].Email = HtmlParser.ParseEmail(rawHtmlText, emails); urls[index].PhoneNumber = HtmlParser.ParsePhone(rawHtmlText); urls[index].CompanyName = HtmlParser.ParseCompanyName(rawHtmlText); } else { if (cbhEmail.Checked) { urls[index].Email = HtmlParser.ParseEmail(rawHtmlText, emails); } if (cbhPhoneNumber.Checked) { urls[index].PhoneNumber = HtmlParser.ParsePhone(rawHtmlText); } if (cbhCompanyName.Checked) { urls[index].CompanyName = HtmlParser.ParseCompanyName(rawHtmlText); } } dataGrid.Rows[index].Selected = true; BindDataGrid(); if (!skipChecking) { UpdateDetailsScraperWorkerStatus(null, new EventArgs()); } } catch (Exception ex) { UpdateStatus(ex.Message); } StartTimer(); }
public Startup(IHostingEnvironment env) { // start worker threads (background) // these could be on own separate worker boxes for (int i = 0; i < _maxThreads; i++) { var worker = new ScrapeWorker(); var thread = new Thread(new ThreadStart(worker.Run)) { Name = $"alpha_{i}" }; thread.Start(); } var builder = new ConfigurationBuilder() .SetBasePath(env.ContentRootPath) .AddJsonFile("appsettings.json", optional: false, reloadOnChange: true) .AddJsonFile($"appsettings.{env.EnvironmentName}.json", optional: true) .AddEnvironmentVariables(); Configuration = builder.Build(); }