/// <summary> /// 线程执行方法 /// </summary> /// <param name="threadIndex"></param> protected void CrawlProcess(object threadIndex) { var currentThreadIndex = (int)threadIndex; //Redis工作队列 ITodoQueue <UrlInfo> workQueue = new RedisWorkQueue(); while (true) { var info = workQueue.GetNext(); // 根据队列中取出数据是否为null & 空闲线程的数量,判断线程是睡眠还是退出 if (info == null) { threadStatus[currentThreadIndex] = true; if (!threadStatus.Any(t => t == false)) { Console.WriteLine(currentThreadIndex + "处理完啦"); break; } Thread.Sleep(2000); continue; } threadStatus[currentThreadIndex] = false; HttpWebRequest request = null; HttpWebResponse response = null; try { if (!BloomVisitedQueue.Contains(info.Key)) { //todo 检查是否在数据库且有更新 request = WebRequest.Create(info.UrlString) as HttpWebRequest; ConfigRequest(request); Console.WriteLine("分析网页"); Console.WriteLine("把超链接加入todo"); BloomVisitedQueue.Add(info.Key); Console.WriteLine("把本连接加入visited"); } else { Console.WriteLine(info.Key + "...完成过了"); } } catch (Exception) { //日志 //把弹出的info加入回work队列 workQueue.Add(info); } //try //{ // //// 1~5 秒随机间隔的自动限速 // //if (this.Settings.AutoSpeedLimit) // //{ // // int span = this.random.Next(1000, 5000); // // Thread.Sleep(span); // //} // // 创建并配置Web请求 // request = WebRequest.Create(urlInfo.UrlString) as HttpWebRequest; // this.ConfigRequest(request); // if (request != null) // { // response = request.GetResponse() as HttpWebResponse; // } // if (response != null) // { // this.PersistenceCookie(response); // Stream stream = null; // // 如果页面压缩,则解压数据流 // if (response.ContentEncoding == "gzip") // { // var responseStream = response.GetResponseStream(); // if (responseStream != null) // { // stream = new GZipStream(responseStream, CompressionMode.Decompress); // } // } // else // { // stream = response.GetResponseStream(); // } // using (stream) // { // string html = this.ParseContent(stream, response.CharacterSet); // this.ParseLinks(urlInfo, html); // if (this.DataReceivedEvent != null) // { // this.DataReceivedEvent( // new DataReceivedEventArgs // { // Url = urlInfo.UrlString, // Depth = urlInfo.Depth, // Html = html // }); // } // if (stream != null) // { // stream.Close(); // } // } // } //} //catch (Exception exception) //{ // if (this.CrawlErrorEvent != null) // { // if (urlInfo != null) // { // this.CrawlErrorEvent( // new CrawlErrorEventArgs {Url = urlInfo.UrlString, Exception = exception}); // } // } //} //finally //{ // if (request != null) // { // request.Abort(); // } // if (response != null) // { // response.Close(); // } //} } }