示例#1
0
        public async Task <IActionResult> Calc(string BREF, ScraperModel scraper)
        {
            {
                var data = await HttpClientFactory.Create().GetStringAsync(_Idata.GoogleURL()); // http get request - html code in stored in data variable

                var split_data       = data.Split(_Idata.GoogleResultSplit());                  // split string and place into array - string represents google search result (normal formatting)
                var paid_ads         = data.Split(_Idata.PaidAdSplit());                        // split string and place into array - string represents google search result (paid add formating)
                var googleP_counter  = 0;
                var paid_ads_counter = 0;
                var total            = 0;
                foreach (string s in split_data)
                {
                    try
                    {
                        googleP_counter += 1;
                        if (s.Contains(_Idata.InfotrackURL())) //looking for a string match in the first 100 good search results
                        {
                            total          = googleP_counter;
                            ViewBag.google = total;
                        }
                        else
                        {
                        }
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(ex.Message); //at a later date - map exception to a different view
                    }
                }
                foreach (string p_ads in paid_ads)
                {
                    paid_ads_counter += 1;
                }


                ViewBag.ads   = paid_ads_counter;
                ViewBag.total = total - paid_ads_counter;

                var new_model = new ScraperModel {
                    input = "infotrack", position = total, paid_ads = paid_ads_counter, date = DateTime.Now
                };
                _context.Add(new_model);

                await _context.SaveChangesAsync();
            }
            return(View());
        }
示例#2
0
        public async void Begin()
        {
            Console.WriteLine("Scraper Started");

            while (true)
            {
                using (var dbContext = new ScraperContext())
                {
                    ScrapedUri nextTarget = await dbContext.ScrapeUri.Where(s => s.Scraped == false && s.ScrapeAttempts < 3).FirstOrDefaultAsync();

                    if (nextTarget != default)
                    {
                        WebPage webpage = await TryScrapeWebPage(nextTarget.AbsoluteUri);

                        if (webpage != null)
                        {
                            HtmlNode[] linkNodes = webpage.Html.CssSelect("a").ToArray();


                            for (int x = 0; x < linkNodes.Length; x++)
                            {
                                string link = linkNodes[x].GetAttributeValue("href");
                                if (link != null && link != "")
                                {
                                    Uri uri = LinkValidation.Validate(link, nextTarget.AbsoluteUri);

                                    if (uri != null)
                                    {
                                        ScrapedUri scrapeUri = new ScrapedUri
                                        {
                                            AbsoluteUri    = uri.AbsoluteUri,
                                            Scheme         = uri.Scheme,
                                            Host           = uri.Host,
                                            QueryParams    = uri.Query,
                                            FileType       = GetFileType(uri.Segments[uri.Segments.Length - 1]),
                                            ScrapeDataTime = DateTime.UtcNow
                                        };

                                        dbContext.Add(scrapeUri);
                                        Console.WriteLine("Adding:" + scrapeUri.AbsoluteUri);
                                    }
                                }
                            }
                            nextTarget.Scraped = true;
                        }

                        nextTarget.ScrapeAttempts++;
                        await dbContext.SaveChangesAsync();
                    }
                    else
                    {
                        Console.WriteLine("No more Uris to scrape");
                        break;
                    }
                }
            }
            Console.WriteLine("Scraper Finished");
        }
        public async Task <IActionResult> Create([Bind("ID,input,position,paid_ads,date")] ScraperModel scraperModel)
        {
            if (ModelState.IsValid)
            {
                _context.Add(scraperModel);
                await _context.SaveChangesAsync();

                return(RedirectToAction(nameof(Index)));
            }
            return(View(scraperModel));
        }
示例#4
0
 public Scraper()
 {
     using (var dbContext = new ScraperContext())
     {
         if (dbContext.ScrapeUri.Count() == 0)
         {
             Console.WriteLine("Adding start point");
             ScrapedUri scrapeUri = new ScrapedUri
             {
                 AbsoluteUri = "http://demo.com",
                 Scheme      = "http",
                 Host        = "demo.com",
                 QueryParams = ""
             };
             dbContext.Add(scrapeUri);
         }
     }
 }