private void BaiduWebWebSpider() { #region 处理关键字 MySqlCmd cmd = new MySqlCmd(); //得到关键字列表 DataTable dtkey; DataTable dtParts; if (selectKID == -1) { //全部 dtkey = cmd.GetTabel("select * from Keywords"); } else { dtkey = cmd.GetTabel("select * from Keywords where kid=" + selectKID); } dtParts = cmd.GetTabel("SELECT * FROM partword"); #endregion HtmlParse.Parse parse = new HtmlParse.Parse(); //parse.ReportCatchProcess += new HtmlParse.ReportCatchProcessEventHandler(BaiduWeb_ReportCatchProcess); List<ModelReleaseInfo> webDatas = new List<ModelReleaseInfo>(); //按关键字循环 for (int kw = 0; kw < dtkey.Rows.Count; kw++) { //处理关键字 if (selectKID != -1 && selectKwName != "全部") { if (dtkey.Rows[kw]["name"].ToString().Trim() != selectKwName) continue; } string keyword = dtkey.Rows[kw]["KeyWord"].ToString().Trim(); string keyTitle = dtkey.Rows[kw]["Name"].ToString().Trim(); int kid = 0; int.TryParse(dtkey.Rows[kw]["kid"].ToString().Trim(), out kid); #region 百度检索 for (int i = 0; i < 5; i++) { String encodeKey = CrawlHtml.UrlEncode(keyword); string url = string.Format(@"http://www.baidu.com/s?wd={0}&pn={1}&ie=utf-8", encodeKey, i * 10); string html = HtmlUtil.HttpGet(url, Encoding.UTF8); List<ModelReleaseInfo> mris = parse.ParseBaiduWeb(html, keyword, kid); if (mris != null && mris.Count() > 0) { DataPersistenceControl.GetInstance().Add(mris); } else { break; } //防止拉黑 Thread.Sleep(Interval50s); } #endregion #region bing检索 for (int i = 0; i < 10; i++) { //组成查询字串 string url = string.Format("http://cn.bing.com/search?q={0}&first={1}&FORM=PERE", keyword, i * 10 + 1); string html = HtmlUtil.HttpGet(url, Encoding.UTF8); List<ModelReleaseInfo> mris = parse.ParseBingWeb(html, keyword, kid); if (mris != null && mris.Count() > 0) { //写入数据库 DataPersistenceControl.GetInstance().Add(mris); } else { break; } //防止拉黑 Thread.Sleep(Interval30s); } #endregion #region 搜狗检索 for (int i = 0; i < 10; i++) { //组成查询字串 string url = string.Format("http://www.sogou.com/web?query={0}&page={1}&ie=utf8", keyword, i + 1); string html = HtmlUtil.HttpGet(url, Encoding.UTF8); List<ModelReleaseInfo> mris = parse.ParseSogouWeb(html, keyword, kid); if (mris != null && mris.Count() > 0) { //写入数据库 DataPersistenceControl.GetInstance().Add(mris); } else { break; } //防止拉黑 Thread.Sleep(Interval2m); } #endregion #region 中搜检索 for (int i = 0; i < 10; i++) { //组成查询字串 string url = string.Format("http://www.zhongsou.com/third?w={0}&b={1}", keyword, i + 1); string html = HtmlUtil.HttpGet(url, Encoding.Default); List<ModelReleaseInfo> mris = parse.ParseZhongsouWeb(html, keyword, kid); if (mris != null && mris.Count() > 0) { //写入数据库 DataPersistenceControl.GetInstance().Add(mris); } else { break; } //防止拉黑 Thread.Sleep(Interval2m); } #endregion #region 好搜检索 for (int i = 0; i < 10; i++) { //组成查询字串 string url = string.Format("http://www.haosou.com/s?q={0}&pn={1}", keyword, i + 1); string html = HtmlUtil.HttpGet(url, Encoding.UTF8); List<ModelReleaseInfo> mris = parse.ParseHaosouWeb(html, keyword, kid); if (mris != null && mris.Count() > 0) { //写入数据库 DataPersistenceControl.GetInstance().Add(mris); } else { break; } //防止拉黑 Thread.Sleep(Interval50s); } #endregion } }