Пример #1
0
        public List <String> GetUrlsToCrawl(string baseUrl)
        {
            if (!Uri.IsWellFormedUriString(baseUrl, UriKind.Absolute))
            {
                //TODO : use regex to check format of produict id
                //B082XY23D5
                //https://www.amazon.com/product-reviews/B082XY23D5

                baseUrl = $"https://www.amazon.com/product-reviews/{baseUrl}";
            }

            //TODO : use injection of dependency to use scrapper
            Scrapper scrapper   = new Scrapper();
            int      nbComments = scrapper.GetNbComments(this.GetHtmlContent(baseUrl));


            //Check of scrapping if it's only 10 comments by page
            Int32 nbPageOfComment = nbComments % 10;

            List <string> urlToCrawl = new List <string>();

            String patternPage = "?pageNumber=";

            for (int i = 0; i < nbPageOfComment; i++)
            {
                urlToCrawl.Add($"{baseUrl}/{patternPage}{i}");
            }

            return(urlToCrawl);
        }
Пример #2
0
 public Crawler(Scrapper scrapper)
 {
     this.scrapper = scrapper;
 }