예제 #1
0
        public static void SpawnCrawler(CrawlWorker crawlWorker)
        {
            Task <int> newCrawler = crawlThreadFactory.StartNew(crawlWorker.Run);

            crawlTasks.Enqueue(newCrawler);
            //todo threading code
        }
예제 #2
0
        public void createHelper()
        {
            CrawlWorker newHelper = new CrawlWorker((id + 1), workingURL, workingDepth, !sidedness, data);

            WebCrawler.SpawnCrawler(newHelper);
            //WebCrawler.SpawnCrawler(newHelper);
        }
예제 #3
0
        private static void Crawl(string urlString, CrawlStruct data)
        {
            //Defaults to searching left side

            //Clears old crawl content
            if (data.overwrite)
            {
                try {
                    Directory.Delete(data.outputFolder, true);
                    Console.WriteLine("Deleted old scan files.");
                    Directory.CreateDirectory(data.outputFolder);
                } catch (IOException exception) {
                    Directory.CreateDirectory(data.outputFolder);
                    Console.WriteLine("First scan- Not files to delete.");
                }
            }
            Directory.CreateDirectory(data.outputFolder);
            //Spawns the markov chain
            if (shouldCrawl)
            {
                if (data.iterative)
                {
                    CrawlWorker evenCrawl = new CrawlWorker(0, urlString, 0, false, data);
                    CrawlWorker oddCrawl  = new CrawlWorker(0, urlString, 0, true, data);
                    SpawnCrawler(evenCrawl);
                    SpawnCrawler(oddCrawl);
                }
                else
                {
                    CrawlWorker crawlWorker = new CrawlWorker(0, urlString, 0, false, data);
                    SpawnCrawler(crawlWorker);
                }
                //Automatic saving
                Thread saveThread = new Thread(() => {
                    while (true)
                    {
                        Thread.Sleep((int)TimeSpan.FromMinutes(SAVERATE).TotalMilliseconds);
                        saveScanState();
                    }
                });
                saveThread.IsBackground = true;
                saveThread.Start();
                //Checks pages crawled
                Thread titleThread = new Thread(() => {
                    while (true)
                    {
                        Console.Title = ("Scanned " + pagesCrawled + " pages(infrequently updates), saved " + timesSaved + " backups of pages visited");
                        Thread.Sleep((int)TimeSpan.FromSeconds(1).TotalMilliseconds);
                    }
                });
                titleThread.IsBackground = true;
                titleThread.Start();
                //waits for the threads to complete
                Thread.Sleep(1000);
                while (true)
                {
                    while (crawlTasks.Count > 0)
                    {
                        Task <int> finishedTask = crawlTasks.Dequeue();
                        pagesCrawled += finishedTask.Result;
                    }
                    SaveQueue.killService();
                    break;
                }
                //Starts chain generation
                Console.WriteLine("Done! Scanned " + pagesCrawled + " pages.");
            }
            if (printMarkov)
            {
                Console.WriteLine("Printing sentence from generated chain... ");
                markovChain.addWords(File.ReadAllText(data.outputFolder + "textDigest" + ".txt"));
                markovChain.generateSentence(markovSentences);
            }
            //Save scan results
            saveScanState();
        }