예제 #1
0
        public async Task <IActionResult> Get()
        {
            try
            {
                var pages = await _scanPageService.GetAllAsync();

                return(Json(pages));
            }
            catch (Exception ex)
            {
                return(BadRequest(ex.Message));
            }
        }
예제 #2
0
        public async Task ScrapAsync()
        {
            Logger.Information("Start ScrapAsync");
            IEnumerable <Type>        scraperTypes = ScrapExtensions.GetScraperTypes();
            IEnumerable <ScanPageDto> scanPages    = _scanPageService.GetAllAsync().Result.Where(x => x.Active).ToList();
            IEnumerable <Ad>          adsDb        = await _adRepository.GetAllAsync();

            foreach (ScanPageDto scanPage in scanPages)
            {
                Logger.Information($"Start scrap page, url = '{scanPage.UrlAddress}'");

                Type scrapClass = scraperTypes
                                  .FirstOrDefault(x => x.Name.ToLower()
                                                  .Replace("Scraper", "")
                                                  .Contains(scanPage.Host.ToLower()));
                if (scrapClass == null)
                {
                    throw new Exception(
                              $"Invalid scan page, UrlAddress='{scanPage.UrlAddress}', Page='{scanPage.Host}'.");
                }

                scraperInstance = Activator.CreateInstance(scrapClass) as IScraper;

                HtmlDocument scrapedDoc = ScrapExtensions.ScrapUrl(scanPage.UrlAddress);
                if (scrapedDoc == null)
                {
                    throw new Exception(
                              $"Problem with scrap page = '{scanPage.UrlAddress}', scrapClass='{scrapClass.Name}'.");
                }

                List <Ad> ads = scraperInstance.ParseHomePage(scrapedDoc, scanPage);

                foreach (Ad ad in ads)
                {
                    bool isInDb = adsDb.Any(x => x.IdAds == ad.IdAds);
                    if (!isInDb)
                    {
                        HtmlDocument scrapedSubPage = ScrapExtensions.ScrapUrl(ad.Url);
                        ad.AdDetails = scraperInstance.ParseDetailsPage(scrapedSubPage, ad);

                        await _adRepository.AddAsync(ad);
                    }
                }
                Logger.Information($"Complited page='{scanPage.UrlAddress}', scraped '{ads.Count}' pages.");
            }
            Logger.Information("End ScrapAsync");
        }
예제 #3
0
        public async Task SeedAsync()
        {
            var users = await _userService.GetAllAsync();

            if (!users.Any())
            {
                Logger.Debug("Initializing users..");
                for (int i = 0; i <= 10; i++)
                {
                    string username = $"user{i}";
                    Logger.Debug($"Adding user: '******'.");
                    CreateUserDto newUser = new CreateUserDto
                    {
                        Email    = $"user{i}@test.com",
                        Password = "******",
                        Username = username,
                        Role     = "user"
                    };

                    await _authService.RegisterAsync(newUser);
                }
                for (int i = 0; i <= 3; i++)
                {
                    string username = $"admin{i}";
                    Logger.Debug($"Adding admin: '{username}'.");
                    CreateUserDto newUser = new CreateUserDto
                    {
                        Email    = $"admin{i}@test.com",
                        Password = "******",
                        Username = username,
                        Role     = "admin"
                    };

                    await _authService.RegisterAsync(newUser);
                }
            }
            else
            {
                Logger.Debug("Users was already initialized.");
            }

            var pages = await _scanPageService.GetAllAsync();

            if (!pages.Any())
            {
                Logger.Debug("Initializing scan pages..");
                ScanPageDto page = new ScanPageDto()
                {
                    Active     = true,
                    Host       = "Gumtree",
                    HostUrl    = "https://www.gumtree.pl",
                    UrlAddress =
                        "https://www.gumtree.pl/s-mieszkania-i-domy-sprzedam-i-kupie/warszawa/v1c9073l3200008p1"
                };
                await _scanPageService.AddAsync(page);

                ScanPageDto pageOlx = new ScanPageDto()
                {
                    Active     = true,
                    Host       = "Olx",
                    HostUrl    = "https://www.olx.pl",
                    UrlAddress = "https://www.olx.pl/nieruchomosci/mieszkania/sprzedaz/warszawa/"
                };
                await _scanPageService.AddAsync(pageOlx);

                ScanPageDto pageOtodom = new ScanPageDto()
                {
                    Active     = false,
                    Host       = "Otodom",
                    HostUrl    = "https://www.otodom.pl",
                    UrlAddress = "https://www.otodom.pl/sprzedaz/mieszkanie/warszawa/"
                };
                await _scanPageService.AddAsync(pageOtodom);
            }
            else
            {
                Logger.Debug("Scan pages was already initialized.");
            }

            var ads = await _adService.GetAllAsync();

            if (!ads.Any())
            {
                Logger.Debug($"Scraping...");
                await _scraperService.ScrapAsync();
            }
            else
            {
                Logger.Debug("Scraper was already initialized.");
            }


            Logger.Debug("Data was initialized.");
        }