Esempio n. 1
0
        private void WeiboWebSpider()
        {
            #region 处理关键字
            MySqlCmd cmd = new MySqlCmd();
            //得到关键字列表
            DataTable dtkey;
            DataTable dtParts;
            if (selectKID == -1)
            {
                //全部
                dtkey = cmd.GetTabel("select * from Keywords");
            }
            else
            {
                dtkey = cmd.GetTabel("select * from Keywords where kid=" + selectKID);
            }
            dtParts = cmd.GetTabel("SELECT * FROM partword");
            #endregion

            HtmlParse.Parse parse = new HtmlParse.Parse();
            //parse.ReportCatchProcess += new HtmlParse.ReportCatchProcessEventHandler(Weibo_ReportCatchProcess);
            List<ModelReleaseInfo> webDatas = new List<ModelReleaseInfo>();
            //按关键字循环
            for (int kw = 0; kw < dtkey.Rows.Count; kw++)
            {
                //处理关键字
                if (selectKID != -1 && selectKwName != "全部")
                {
                    if (dtkey.Rows[kw]["name"].ToString().Trim() != selectKwName) continue;
                }

                string keyword = dtkey.Rows[kw]["KeyWord"].ToString().Trim();
                string keyTitle = dtkey.Rows[kw]["Name"].ToString().Trim();
                int kid = 0;
                int.TryParse(dtkey.Rows[kw]["kid"].ToString().Trim(), out kid);

                #region 新浪微博检索
                String encodeKey = CrawlHtml.UrlEncode(keyword);
                string url = "http://s.weibo.com/weibo/" + encodeKey + "?topnav=1&wvr=6&b=1&page=1";
                string html = HtmlUtil.HttpGet(url, Encoding.UTF8);
                List<ModelReleaseInfo> mris = parse.ParseSinaWeibo(html, keyword, kid);
                if (mris != null && mris.Count() > 0)
                {
                    DataPersistenceControl.GetInstance().Add(mris);
                }
                //防止微博拉黑
                Thread.Sleep(Interval30s);
                #endregion

                #region 中搜检索
                for (int i = 0; i < 10; i++)
                {
                    //组成查询字串
                    url = "";
                    html = "";
                    mris = null;
                    url = string.Format("http://t.zhongsou.com/wb?w={0}&b={1}", keyword, i + 1);
                    html = HtmlUtil.HttpGet(url, Encoding.Default);
                    mris = parse.ParseZhongsouWeibo(html, keyword, kid);
                    if (mris != null && mris.Count() > 0)
                    {
                        //写入数据库
                        DataPersistenceControl.GetInstance().Add(mris);
                    }
                    else
                    {
                        break;
                    }
                    //防止拉黑
                    Thread.Sleep(Interval2m);
                }
                #endregion
            }
        }