public async Task <IActionResult> Get() { try { var pages = await _scanPageService.GetAllAsync(); return(Json(pages)); } catch (Exception ex) { return(BadRequest(ex.Message)); } }
public async Task ScrapAsync() { Logger.Information("Start ScrapAsync"); IEnumerable <Type> scraperTypes = ScrapExtensions.GetScraperTypes(); IEnumerable <ScanPageDto> scanPages = _scanPageService.GetAllAsync().Result.Where(x => x.Active).ToList(); IEnumerable <Ad> adsDb = await _adRepository.GetAllAsync(); foreach (ScanPageDto scanPage in scanPages) { Logger.Information($"Start scrap page, url = '{scanPage.UrlAddress}'"); Type scrapClass = scraperTypes .FirstOrDefault(x => x.Name.ToLower() .Replace("Scraper", "") .Contains(scanPage.Host.ToLower())); if (scrapClass == null) { throw new Exception( $"Invalid scan page, UrlAddress='{scanPage.UrlAddress}', Page='{scanPage.Host}'."); } scraperInstance = Activator.CreateInstance(scrapClass) as IScraper; HtmlDocument scrapedDoc = ScrapExtensions.ScrapUrl(scanPage.UrlAddress); if (scrapedDoc == null) { throw new Exception( $"Problem with scrap page = '{scanPage.UrlAddress}', scrapClass='{scrapClass.Name}'."); } List <Ad> ads = scraperInstance.ParseHomePage(scrapedDoc, scanPage); foreach (Ad ad in ads) { bool isInDb = adsDb.Any(x => x.IdAds == ad.IdAds); if (!isInDb) { HtmlDocument scrapedSubPage = ScrapExtensions.ScrapUrl(ad.Url); ad.AdDetails = scraperInstance.ParseDetailsPage(scrapedSubPage, ad); await _adRepository.AddAsync(ad); } } Logger.Information($"Complited page='{scanPage.UrlAddress}', scraped '{ads.Count}' pages."); } Logger.Information("End ScrapAsync"); }
public async Task SeedAsync() { var users = await _userService.GetAllAsync(); if (!users.Any()) { Logger.Debug("Initializing users.."); for (int i = 0; i <= 10; i++) { string username = $"user{i}"; Logger.Debug($"Adding user: '******'."); CreateUserDto newUser = new CreateUserDto { Email = $"user{i}@test.com", Password = "******", Username = username, Role = "user" }; await _authService.RegisterAsync(newUser); } for (int i = 0; i <= 3; i++) { string username = $"admin{i}"; Logger.Debug($"Adding admin: '{username}'."); CreateUserDto newUser = new CreateUserDto { Email = $"admin{i}@test.com", Password = "******", Username = username, Role = "admin" }; await _authService.RegisterAsync(newUser); } } else { Logger.Debug("Users was already initialized."); } var pages = await _scanPageService.GetAllAsync(); if (!pages.Any()) { Logger.Debug("Initializing scan pages.."); ScanPageDto page = new ScanPageDto() { Active = true, Host = "Gumtree", HostUrl = "https://www.gumtree.pl", UrlAddress = "https://www.gumtree.pl/s-mieszkania-i-domy-sprzedam-i-kupie/warszawa/v1c9073l3200008p1" }; await _scanPageService.AddAsync(page); ScanPageDto pageOlx = new ScanPageDto() { Active = true, Host = "Olx", HostUrl = "https://www.olx.pl", UrlAddress = "https://www.olx.pl/nieruchomosci/mieszkania/sprzedaz/warszawa/" }; await _scanPageService.AddAsync(pageOlx); ScanPageDto pageOtodom = new ScanPageDto() { Active = false, Host = "Otodom", HostUrl = "https://www.otodom.pl", UrlAddress = "https://www.otodom.pl/sprzedaz/mieszkanie/warszawa/" }; await _scanPageService.AddAsync(pageOtodom); } else { Logger.Debug("Scan pages was already initialized."); } var ads = await _adService.GetAllAsync(); if (!ads.Any()) { Logger.Debug($"Scraping..."); await _scraperService.ScrapAsync(); } else { Logger.Debug("Scraper was already initialized."); } Logger.Debug("Data was initialized."); }