private static void Crawl(string urlString, CrawlStruct data) { //Defaults to searching left side //Clears old crawl content if (data.overwrite) { try { Directory.Delete(data.outputFolder, true); Console.WriteLine("Deleted old scan files."); Directory.CreateDirectory(data.outputFolder); } catch (IOException exception) { Directory.CreateDirectory(data.outputFolder); Console.WriteLine("First scan- Not files to delete."); } } Directory.CreateDirectory(data.outputFolder); //Spawns the markov chain if (shouldCrawl) { if (data.iterative) { CrawlWorker evenCrawl = new CrawlWorker(0, urlString, 0, false, data); CrawlWorker oddCrawl = new CrawlWorker(0, urlString, 0, true, data); SpawnCrawler(evenCrawl); SpawnCrawler(oddCrawl); } else { CrawlWorker crawlWorker = new CrawlWorker(0, urlString, 0, false, data); SpawnCrawler(crawlWorker); } //Automatic saving Thread saveThread = new Thread(() => { while (true) { Thread.Sleep((int)TimeSpan.FromMinutes(SAVERATE).TotalMilliseconds); saveScanState(); } }); saveThread.IsBackground = true; saveThread.Start(); //Checks pages crawled Thread titleThread = new Thread(() => { while (true) { Console.Title = ("Scanned " + pagesCrawled + " pages(infrequently updates), saved " + timesSaved + " backups of pages visited"); Thread.Sleep((int)TimeSpan.FromSeconds(1).TotalMilliseconds); } }); titleThread.IsBackground = true; titleThread.Start(); //waits for the threads to complete Thread.Sleep(1000); while (true) { while (crawlTasks.Count > 0) { Task <int> finishedTask = crawlTasks.Dequeue(); pagesCrawled += finishedTask.Result; } SaveQueue.killService(); break; } //Starts chain generation Console.WriteLine("Done! Scanned " + pagesCrawled + " pages."); } if (printMarkov) { Console.WriteLine("Printing sentence from generated chain... "); markovChain.addWords(File.ReadAllText(data.outputFolder + "textDigest" + ".txt")); markovChain.generateSentence(markovSentences); } //Save scan results saveScanState(); }
public DownloadManager(CrawlStruct data) { DownloadManager.data = data; webStringUtils = new WebStringUtils(data.outputFolder); saveQueue = new SaveQueue(); }