public async Task <IActionResult> Calc(string BREF, ScraperModel scraper) { { var data = await HttpClientFactory.Create().GetStringAsync(_Idata.GoogleURL()); // http get request - html code in stored in data variable var split_data = data.Split(_Idata.GoogleResultSplit()); // split string and place into array - string represents google search result (normal formatting) var paid_ads = data.Split(_Idata.PaidAdSplit()); // split string and place into array - string represents google search result (paid add formating) var googleP_counter = 0; var paid_ads_counter = 0; var total = 0; foreach (string s in split_data) { try { googleP_counter += 1; if (s.Contains(_Idata.InfotrackURL())) //looking for a string match in the first 100 good search results { total = googleP_counter; ViewBag.google = total; } else { } } catch (Exception ex) { Console.WriteLine(ex.Message); //at a later date - map exception to a different view } } foreach (string p_ads in paid_ads) { paid_ads_counter += 1; } ViewBag.ads = paid_ads_counter; ViewBag.total = total - paid_ads_counter; var new_model = new ScraperModel { input = "infotrack", position = total, paid_ads = paid_ads_counter, date = DateTime.Now }; _context.Add(new_model); await _context.SaveChangesAsync(); } return(View()); }
public async void Begin() { Console.WriteLine("Scraper Started"); while (true) { using (var dbContext = new ScraperContext()) { ScrapedUri nextTarget = await dbContext.ScrapeUri.Where(s => s.Scraped == false && s.ScrapeAttempts < 3).FirstOrDefaultAsync(); if (nextTarget != default) { WebPage webpage = await TryScrapeWebPage(nextTarget.AbsoluteUri); if (webpage != null) { HtmlNode[] linkNodes = webpage.Html.CssSelect("a").ToArray(); for (int x = 0; x < linkNodes.Length; x++) { string link = linkNodes[x].GetAttributeValue("href"); if (link != null && link != "") { Uri uri = LinkValidation.Validate(link, nextTarget.AbsoluteUri); if (uri != null) { ScrapedUri scrapeUri = new ScrapedUri { AbsoluteUri = uri.AbsoluteUri, Scheme = uri.Scheme, Host = uri.Host, QueryParams = uri.Query, FileType = GetFileType(uri.Segments[uri.Segments.Length - 1]), ScrapeDataTime = DateTime.UtcNow }; dbContext.Add(scrapeUri); Console.WriteLine("Adding:" + scrapeUri.AbsoluteUri); } } } nextTarget.Scraped = true; } nextTarget.ScrapeAttempts++; await dbContext.SaveChangesAsync(); } else { Console.WriteLine("No more Uris to scrape"); break; } } } Console.WriteLine("Scraper Finished"); }
public async Task <IActionResult> Create([Bind("ID,input,position,paid_ads,date")] ScraperModel scraperModel) { if (ModelState.IsValid) { _context.Add(scraperModel); await _context.SaveChangesAsync(); return(RedirectToAction(nameof(Index))); } return(View(scraperModel)); }
public Scraper() { using (var dbContext = new ScraperContext()) { if (dbContext.ScrapeUri.Count() == 0) { Console.WriteLine("Adding start point"); ScrapedUri scrapeUri = new ScrapedUri { AbsoluteUri = "http://demo.com", Scheme = "http", Host = "demo.com", QueryParams = "" }; dbContext.Add(scrapeUri); } } }