Example #1
0
        private void ScrapeUrl(int start)
        {
            UpdateStatus("Scraping urls.", true);
            int intervalFrom = int.Parse(txtScrapUrlsFrom.Text);
            int intervalTo   = int.Parse(txtScrapUrlsTo.Text);

            urlWorkerSleepsFor = random.Next(intervalFrom, intervalTo);

            try
            {
                ScrapeWorker worker = new ScrapeWorker();

                googleResults = worker.ScrapeUrlsAsync(BuildUrl(keywords.Text, start));
                HtmlParser.ParseUrls(googleResults).ForEach(x => urls.Add(x));

                BindDataGrid();

                UpdateStatus("Scraping done.");
                StartTimer();

                workerTimer.Start();
                UpdateWorkerStatus(null, new EventArgs());
            }
            catch (Exception ex)
            {
                UpdateStatus(ex.Message);
                string autosavePath = Config.GetAutosavePath();

                if (!string.IsNullOrEmpty(autosavePath))
                {
                    Save(Config.GetAutosavePath());
                }
            }
        }
Example #2
0
        private async void ScrapeDetails(bool skipChecking = false)
        {
            UpdateStatus("Scraping details.", true);
            int emailScraperSleepsFrom = int.Parse(txtScrapEmailsFrom.Text);
            int emailScraperSleepsTo   = int.Parse(txtScrapEmailsTo.Text);

            emailWorkerSleepsFor = random.Next(emailScraperSleepsFrom, emailScraperSleepsTo);

            try
            {
                ScrapeWorker worker = new ScrapeWorker();

                string rawHtmlText = await worker.ScrapeDetailsAsync(urls[index].Root);

                var emails = keywords.Text.Split(' ').Where(x => x.Contains("@"));

                if (skipChecking)
                {
                    urls[index].Email       = HtmlParser.ParseEmail(rawHtmlText, emails);
                    urls[index].PhoneNumber = HtmlParser.ParsePhone(rawHtmlText);
                    urls[index].CompanyName = HtmlParser.ParseCompanyName(rawHtmlText);
                }
                else
                {
                    if (cbhEmail.Checked)
                    {
                        urls[index].Email = HtmlParser.ParseEmail(rawHtmlText, emails);
                    }
                    if (cbhPhoneNumber.Checked)
                    {
                        urls[index].PhoneNumber = HtmlParser.ParsePhone(rawHtmlText);
                    }
                    if (cbhCompanyName.Checked)
                    {
                        urls[index].CompanyName = HtmlParser.ParseCompanyName(rawHtmlText);
                    }
                }

                dataGrid.Rows[index].Selected = true;

                BindDataGrid();



                if (!skipChecking)
                {
                    UpdateDetailsScraperWorkerStatus(null, new EventArgs());
                }
            }
            catch (Exception ex)
            {
                UpdateStatus(ex.Message);
            }

            StartTimer();
        }
Example #3
0
        public Startup(IHostingEnvironment env)
        {
            // start worker threads (background)
            // these could be on own separate worker boxes
            for (int i = 0; i < _maxThreads; i++)
            {
                var worker = new ScrapeWorker();
                var thread = new Thread(new ThreadStart(worker.Run))
                {
                    Name = $"alpha_{i}"
                };
                thread.Start();
            }

            var builder = new ConfigurationBuilder()
                          .SetBasePath(env.ContentRootPath)
                          .AddJsonFile("appsettings.json", optional: false, reloadOnChange: true)
                          .AddJsonFile($"appsettings.{env.EnvironmentName}.json", optional: true)
                          .AddEnvironmentVariables();

            Configuration = builder.Build();
        }