static void Main(string[] args) { ConfigManager.IsUseProxy = false; ConcurrentQueue<string> urlQueue = new ConcurrentQueue<string>(); ConcurrentQueue<CommentCrawlJob> comJobQueue = new ConcurrentQueue<CommentCrawlJob>(); List<WeiBoContentCrawler> contentCrawlerList = new List<WeiBoContentCrawler>(); List<WeiBoCommentCrawler> commentCrawlerList = new List<WeiBoCommentCrawler>(); string exePath = System.IO.Path.Combine(Environment.CurrentDirectory, "appconfig.xml"); XElement rootNode = XElement.Load(exePath); var crawlers = from node in rootNode.Descendants("crawler") select node; var database = rootNode.Descendants("database").First(); string sqlConnString = "Database={0};Data Source={1};User Id={2};Password={3}"; sqlConnString = String.Format(sqlConnString, database.Descendants("database").First().Value, database.Descendants("host").First().Value, database.Descendants("user").First().Value, database.Descendants("password").First().Value); MySqlHelper.ConnectionStringLocalTransaction = sqlConnString; foreach (var crawler in crawlers) { string id = crawler.Attribute("id").Value; string type = crawler.Attribute("type").Value; var cookies = from cookie in crawler.Element("cookies").Descendants() select cookie; CookieCollection cookieCol = new CookieCollection(); foreach (var cookie in cookies) { Cookie c = new Cookie(cookie.Attribute("key").Value, cookie.Attribute("value").Value); c.Domain = ".weibo.cn"; cookieCol.Add(c); } Uri proxyUri = new Uri( crawler.Element("proxy").Attribute("url").Value ); if (type == "ContentCrawler") { WeiBoContentCrawler c = new WeiBoContentCrawler(urlQueue, comJobQueue, cookieCol, proxyUri); contentCrawlerList.Add(c); } else { WeiBoCommentCrawler c = new WeiBoCommentCrawler(comJobQueue, cookieCol, proxyUri); commentCrawlerList.Add(c); } } foreach( WeiBoContentCrawler c in contentCrawlerList ){ Thread t = new Thread(c.Run); t.Start(); } foreach (WeiBoCommentCrawler c in commentCrawlerList) { Thread t = new Thread(c.Run); t.Start(); } //urlQueue.Enqueue("http://weibo.com/1635106672/zAn0lrPAZ"); string sql = "SELECT * FROM WeiBoList where com_finish = false"; MySqlDataReader reader = MySqlHelper.ExecuteReader(sql, null); while (reader.Read()) { // if (!reader.IsDBNull(4)) // continue; string url = reader.GetString(2); urlQueue.Enqueue(url); } reader.Close(); urlQueue.Enqueue("exit"); }
static void Main(string[] args) { ConfigManager.IsUseProxy = false; ConcurrentQueue<string> urlQueue = new ConcurrentQueue<string>(); ConcurrentQueue<CommentCrawlJob> comJobQueue = new ConcurrentQueue<CommentCrawlJob>(); List<WeiBoContentCrawler> contentCrawlerList = new List<WeiBoContentCrawler>(); List<WeiBoCommentCrawler> commentCrawlerList = new List<WeiBoCommentCrawler>(); string exePath = System.IO.Path.Combine(Environment.CurrentDirectory, "appconfig.xml"); XElement rootNode = XElement.Load(exePath); var crawlers = from node in rootNode.Descendants("crawler") select node; foreach (var crawler in crawlers) { string id = crawler.Attribute("id").Value; string type = crawler.Attribute("type").Value; var cookies = from cookie in crawler.Element("cookies").Descendants() select cookie; CookieCollection cookieCol = new CookieCollection(); foreach (var cookie in cookies) { Cookie c = new Cookie(cookie.Attribute("key").Value, cookie.Attribute("value").Value); c.Domain = ".weibo.cn"; cookieCol.Add(c); } Uri proxyUri = new Uri( crawler.Element("proxy").Attribute("url").Value ); if (type == "ContentCrawler") { WeiBoContentCrawler c = new WeiBoContentCrawler(urlQueue, comJobQueue, cookieCol, proxyUri); contentCrawlerList.Add(c); } else { WeiBoCommentCrawler c = new WeiBoCommentCrawler(comJobQueue, cookieCol, proxyUri); commentCrawlerList.Add(c); } } foreach (var c in contentCrawlerList) { c.CommentCrawlerList = commentCrawlerList; } WeiBoManager.Init(); Uri baseAddress = new Uri("http://localhost:6525/ContentCrawl"); ContentCrawlService service = new ContentCrawlService(contentCrawlerList,commentCrawlerList); using (ServiceHost host = new ServiceHost(service, baseAddress)) { ServiceMetadataBehavior smb = new ServiceMetadataBehavior(); smb.HttpGetEnabled = true; smb.MetadataExporter.PolicyVersion = PolicyVersion.Policy15; host.Description.Behaviors.Add(smb); var behavior = host.Description.Behaviors.Find<ServiceBehaviorAttribute>(); behavior.InstanceContextMode = InstanceContextMode.Single; host.Open(); Console.WriteLine("The ContentCrawl Service is ready at: {0}", baseAddress); Console.WriteLine("Press <Enter> to stop the service"); Console.ReadKey(); host.Close(); } }