예제 #1
0
        public ICollection <ScrapedUser> ScrapeUsersByStrategy(int usersToScrape, IScrapingMethod scrapingStrategy)
        {
            var listOfUrls = GetFollowersURLsFromUserProfile().ToList();
            var maxThreads = GetNumberOfCores() - 1;
            var batchSize  = Convert.ToInt16(Convert.ToDouble(listOfUrls.Count) / Convert.ToDouble(maxThreads));

            var listWithListsOfUrls = new List <List <string> >();

            foreach (var batch in listOfUrls.Batch(batchSize))
            {
                listWithListsOfUrls.Add(batch.ToList());
            }

            var listOfTasks = new List <Task <ICollection <ScrapedUser> > >();

            foreach (var list in listWithListsOfUrls)
            {
                var task = Task <ICollection <ScrapedUser> > .Factory.StartNew(() => GetResultsFromScraping(scrapingStrategy, list));

                listOfTasks.Add(task);
            }

            Task.WaitAll(listOfTasks.ToArray());
            List <ScrapedUser> results = new List <ScrapedUser>();

            foreach (var task in listOfTasks)
            {
                results.AddRange(task.Result);
            }
            DriverExtensions.KillProcesses();

            return(results);
        }
예제 #2
0
        public ICollection <string> GetFollowersURLsFromUserProfile()
        {
            IWebElement followersElement = InstaDriver.FindElementByXPath("//a[contains(@href,'followers')]");

            followersElement.Click();
            IWebElement mainList = InstaDriver.FindElementByXPath("//*[contains(@class,'isgrP')]");

            ScrollDown(30);

            var listOfURLs = new List <string>();

            foreach (var userElement in InstaDriver.FindElementsByXPath("//a[contains(@class,'_2dbep qNELH kIKUG')]"))
            {
                listOfURLs.Add(userElement.GetAttribute("href"));
            }

            LogHelper.Log($"Started to scrape users of {_userProfileURL}");

            InstaDriver.Quit();
            DriverExtensions.KillProcesses();

            return(listOfURLs);
        }