static void Main(string[] args) { MongodbAccess mongo = new MongodbAccess(); HashSet <string> crawled_urls = mongo.GetCrawledURLs(); Util.log("{0} urls have been crawled.", crawled_urls.Count); Random r = new Random(99); HashSet <string> urls = GetWaitingURLs(crawled_urls); foreach (string url in urls) { WebPage page = GetWebPage(url); if (page == null) { continue; } mongo.InsertWebPage(page); Util.log("crawl {0} done.", url); if (r.Next(100) > 80) { System.Threading.Thread.Sleep(500); } } /* * HashSet<string> urls = GetWaitingURLs(null); * Console.WriteLine(urls.Count); * foreach (string url in urls) * { * WebPage page = GetWebPage(url); * if (page == null) continue; * Console.WriteLine("{0} {1}", page.title, page.published_time); * } * */ }
static void Main(string[] args) { MongodbAccess mongo = new MongodbAccess(); HashSet<string> crawled_urls = mongo.GetCrawledURLs(); Util.log("{0} urls have been crawled.", crawled_urls.Count); Random r = new Random(99); HashSet<string> urls = GetWaitingURLs(crawled_urls); foreach (string url in urls) { WebPage page = GetWebPage(url); if (page == null) continue; mongo.InsertWebPage(page); Util.log("crawl {0} done.", url); if (r.Next(100) > 80) { System.Threading.Thread.Sleep(500); } } /* HashSet<string> urls = GetWaitingURLs(null); Console.WriteLine(urls.Count); foreach (string url in urls) { WebPage page = GetWebPage(url); if (page == null) continue; Console.WriteLine("{0} {1}", page.title, page.published_time); } * */ }