示例#1
0
        public static void StandardDownloadFromFrontpage()
        {
            ModelRepository repo        = new ModelRepository();
            List <string>   listOfPosts = new List <string>();

            string repoName = "pollitikaNew.db";

            Logger.Info("Opening data store: " + repoName);
            repo.OpenDataStore(repoName);

            Logger.Info("\nFETCHING POSTS FROM FRONTPAGE:");
            for (int j = 0; j <= 500; j += 100)
            {
                for (int i = 80; i < 100; i++)           // došli smo do 60
                {
                    Logger.InfoFormat("  DOING FRONT PAGE - {0}", j + i);
                    var listPosts = FrontPageAnalyzer.GetPostLinksFromFrontPage(j + i);
                    listOfPosts.AddRange(listPosts);
                }
            }

            Logger.Info("\nLIST OF POSTS TO ANALYZE:");
            for (int i = 0; i < listOfPosts.Count; i++)
            {
                Logger.Info((i + 1).ToString() + ". " + listOfPosts[i]);
            }

            ContinuousMultiThreadedScrapper.AnalyzeListOfPosts_Multithreaded(listOfPosts, repo, true, true);

            PrintStatistics(repo);
            repo.UpdateDataStore();
        }
示例#2
0
        public static void AnalyzeUsersPosts(string inDirWithLists, ModelRepository repo)
        {
            // učitavaj jedan po jedan txt fajl iz direktorija
            DirectoryInfo d = new DirectoryInfo(inDirWithLists);

            FileInfo[] Files = d.GetFiles("*.txt");

            List <ScrapingBrowser> listLoggedBrowsers = new List <ScrapingBrowser>();

            Logger.Info("Logging in browsers");
            const int MaxConcurrentBrowsers = 8;

            for (int i = 0; i < MaxConcurrentBrowsers; i++)
            {
                listLoggedBrowsers.Add(Utility.GetLoggedBrowser());
            }

            int c             = 0;
            int notSavedPosts = 0;

            foreach (FileInfo file in Files)
            {
                Logger.Info("WORKING FILE: " + file.FullName);

                // učitaj listu svih postova od korisnika
                List <string> listPosts  = LoadListOfPostsFromFile(file.FullName);
                List <string> postsToAdd = new List <string>();

                // izbacit ćemo postove koji su već u bazi
                foreach (string s in listPosts)
                {
                    if (repo.PostAlreadyExists(s) == false)
                    {
                        postsToAdd.Add(s);
                    }
                }

                notSavedPosts += postsToAdd.Count;

                // analiziraj sve postove i dodaj ih u bazu
                ContinuousMultiThreadedScrapper.AnalyzeListOfPosts_Multithreaded_OneBatch(postsToAdd, repo, listLoggedBrowsers, false, true);

                // prebaci fajl s postovima od korisnika u Done
                File.Move(file.FullName, "../../../Data/UsersLists/Done/" + file.Name);

                if (notSavedPosts > 100)
                {
                    Logger.Info("Updating store");

                    repo.UpdateDataStore();

                    notSavedPosts = 0;
                }
            }
        }