public static void StandardDownloadFromFrontpage() { ModelRepository repo = new ModelRepository(); List <string> listOfPosts = new List <string>(); string repoName = "pollitikaNew.db"; Logger.Info("Opening data store: " + repoName); repo.OpenDataStore(repoName); Logger.Info("\nFETCHING POSTS FROM FRONTPAGE:"); for (int j = 0; j <= 500; j += 100) { for (int i = 80; i < 100; i++) // došli smo do 60 { Logger.InfoFormat(" DOING FRONT PAGE - {0}", j + i); var listPosts = FrontPageAnalyzer.GetPostLinksFromFrontPage(j + i); listOfPosts.AddRange(listPosts); } } Logger.Info("\nLIST OF POSTS TO ANALYZE:"); for (int i = 0; i < listOfPosts.Count; i++) { Logger.Info((i + 1).ToString() + ". " + listOfPosts[i]); } ContinuousMultiThreadedScrapper.AnalyzeListOfPosts_Multithreaded(listOfPosts, repo, true, true); PrintStatistics(repo); repo.UpdateDataStore(); }
public static void AnalyzeUsersPosts(string inDirWithLists, ModelRepository repo) { // učitavaj jedan po jedan txt fajl iz direktorija DirectoryInfo d = new DirectoryInfo(inDirWithLists); FileInfo[] Files = d.GetFiles("*.txt"); List <ScrapingBrowser> listLoggedBrowsers = new List <ScrapingBrowser>(); Logger.Info("Logging in browsers"); const int MaxConcurrentBrowsers = 8; for (int i = 0; i < MaxConcurrentBrowsers; i++) { listLoggedBrowsers.Add(Utility.GetLoggedBrowser()); } int c = 0; int notSavedPosts = 0; foreach (FileInfo file in Files) { Logger.Info("WORKING FILE: " + file.FullName); // učitaj listu svih postova od korisnika List <string> listPosts = LoadListOfPostsFromFile(file.FullName); List <string> postsToAdd = new List <string>(); // izbacit ćemo postove koji su već u bazi foreach (string s in listPosts) { if (repo.PostAlreadyExists(s) == false) { postsToAdd.Add(s); } } notSavedPosts += postsToAdd.Count; // analiziraj sve postove i dodaj ih u bazu ContinuousMultiThreadedScrapper.AnalyzeListOfPosts_Multithreaded_OneBatch(postsToAdd, repo, listLoggedBrowsers, false, true); // prebaci fajl s postovima od korisnika u Done File.Move(file.FullName, "../../../Data/UsersLists/Done/" + file.Name); if (notSavedPosts > 100) { Logger.Info("Updating store"); repo.UpdateDataStore(); notSavedPosts = 0; } } }