Пример #1
0
        private static void Crawl(string urlString, CrawlStruct data)
        {
            //Defaults to searching left side

            //Clears old crawl content
            if (data.overwrite)
            {
                try {
                    Directory.Delete(data.outputFolder, true);
                    Console.WriteLine("Deleted old scan files.");
                    Directory.CreateDirectory(data.outputFolder);
                } catch (IOException exception) {
                    Directory.CreateDirectory(data.outputFolder);
                    Console.WriteLine("First scan- Not files to delete.");
                }
            }
            Directory.CreateDirectory(data.outputFolder);
            //Spawns the markov chain
            if (shouldCrawl)
            {
                if (data.iterative)
                {
                    CrawlWorker evenCrawl = new CrawlWorker(0, urlString, 0, false, data);
                    CrawlWorker oddCrawl  = new CrawlWorker(0, urlString, 0, true, data);
                    SpawnCrawler(evenCrawl);
                    SpawnCrawler(oddCrawl);
                }
                else
                {
                    CrawlWorker crawlWorker = new CrawlWorker(0, urlString, 0, false, data);
                    SpawnCrawler(crawlWorker);
                }
                //Automatic saving
                Thread saveThread = new Thread(() => {
                    while (true)
                    {
                        Thread.Sleep((int)TimeSpan.FromMinutes(SAVERATE).TotalMilliseconds);
                        saveScanState();
                    }
                });
                saveThread.IsBackground = true;
                saveThread.Start();
                //Checks pages crawled
                Thread titleThread = new Thread(() => {
                    while (true)
                    {
                        Console.Title = ("Scanned " + pagesCrawled + " pages(infrequently updates), saved " + timesSaved + " backups of pages visited");
                        Thread.Sleep((int)TimeSpan.FromSeconds(1).TotalMilliseconds);
                    }
                });
                titleThread.IsBackground = true;
                titleThread.Start();
                //waits for the threads to complete
                Thread.Sleep(1000);
                while (true)
                {
                    while (crawlTasks.Count > 0)
                    {
                        Task <int> finishedTask = crawlTasks.Dequeue();
                        pagesCrawled += finishedTask.Result;
                    }
                    SaveQueue.killService();
                    break;
                }
                //Starts chain generation
                Console.WriteLine("Done! Scanned " + pagesCrawled + " pages.");
            }
            if (printMarkov)
            {
                Console.WriteLine("Printing sentence from generated chain... ");
                markovChain.addWords(File.ReadAllText(data.outputFolder + "textDigest" + ".txt"));
                markovChain.generateSentence(markovSentences);
            }
            //Save scan results
            saveScanState();
        }
 public DownloadManager(CrawlStruct data)
 {
     DownloadManager.data = data;
     webStringUtils       = new WebStringUtils(data.outputFolder);
     saveQueue            = new SaveQueue();
 }