public ICollection <ScrapedUser> ScrapeUsersByStrategy(int usersToScrape, IScrapingMethod scrapingStrategy) { var listOfUrls = GetFollowersURLsFromUserProfile().ToList(); var maxThreads = GetNumberOfCores() - 1; var batchSize = Convert.ToInt16(Convert.ToDouble(listOfUrls.Count) / Convert.ToDouble(maxThreads)); var listWithListsOfUrls = new List <List <string> >(); foreach (var batch in listOfUrls.Batch(batchSize)) { listWithListsOfUrls.Add(batch.ToList()); } var listOfTasks = new List <Task <ICollection <ScrapedUser> > >(); foreach (var list in listWithListsOfUrls) { var task = Task <ICollection <ScrapedUser> > .Factory.StartNew(() => GetResultsFromScraping(scrapingStrategy, list)); listOfTasks.Add(task); } Task.WaitAll(listOfTasks.ToArray()); List <ScrapedUser> results = new List <ScrapedUser>(); foreach (var task in listOfTasks) { results.AddRange(task.Result); } DriverExtensions.KillProcesses(); return(results); }
public ICollection <string> GetFollowersURLsFromUserProfile() { IWebElement followersElement = InstaDriver.FindElementByXPath("//a[contains(@href,'followers')]"); followersElement.Click(); IWebElement mainList = InstaDriver.FindElementByXPath("//*[contains(@class,'isgrP')]"); ScrollDown(30); var listOfURLs = new List <string>(); foreach (var userElement in InstaDriver.FindElementsByXPath("//a[contains(@class,'_2dbep qNELH kIKUG')]")) { listOfURLs.Add(userElement.GetAttribute("href")); } LogHelper.Log($"Started to scrape users of {_userProfileURL}"); InstaDriver.Quit(); DriverExtensions.KillProcesses(); return(listOfURLs); }