public List <String> GetUrlsToCrawl(string baseUrl) { if (!Uri.IsWellFormedUriString(baseUrl, UriKind.Absolute)) { //TODO : use regex to check format of produict id //B082XY23D5 //https://www.amazon.com/product-reviews/B082XY23D5 baseUrl = $"https://www.amazon.com/product-reviews/{baseUrl}"; } //TODO : use injection of dependency to use scrapper Scrapper scrapper = new Scrapper(); int nbComments = scrapper.GetNbComments(this.GetHtmlContent(baseUrl)); //Check of scrapping if it's only 10 comments by page Int32 nbPageOfComment = nbComments % 10; List <string> urlToCrawl = new List <string>(); String patternPage = "?pageNumber="; for (int i = 0; i < nbPageOfComment; i++) { urlToCrawl.Add($"{baseUrl}/{patternPage}{i}"); } return(urlToCrawl); }
public Crawler(Scrapper scrapper) { this.scrapper = scrapper; }