Example #1
0
        /// <summary>
        /// 修改用户
        /// </summary>
        /// <param name="obj">用户对象</param>
        /// <returns></returns>
        public int FixReleaseInfo(ModelReleaseInfo obj)
        {
            string sql = @"UPDATE SET ReleaseInfo Title=@Title,Contexts=@Contexts,ReleaseDate=@ReleaseDate,
                                InfoSource=@InfoSource,KeyWords=@KeyWords,ReleaseName=@ReleaseName,
                                CollectDate=@CollectDate,Snapshot=@Snapshot
                        WHERE uid=@uid";

            List<MySqlParameter> par = new List<MySqlParameter>();
            par.Add(new MySqlParameter("@uid", obj.Uid));
            par.Add(new MySqlParameter("@Title", obj.Title));
            par.Add(new MySqlParameter("@Contexts", obj.Contexts));
            par.Add(new MySqlParameter("@RleaseDate", obj.ReleaseDate));
            par.Add(new MySqlParameter("@InfoSource", obj.InfoSource));
            par.Add(new MySqlParameter("@KeyWords", obj.KeyWords));
            par.Add(new MySqlParameter("@ReleaseName", obj.ReleaseName));
            par.Add(new MySqlParameter("@CollectDate", obj.CollectDate));
            par.Add(new MySqlParameter("@Snapshot", obj.Snapshot));

            try
            {
                DataBaseServer.MySqlCmd dbobj = new DataBaseServer.MySqlCmd();
                return dbobj.ExecuteNonQueryInt(sql, par);
            }
            catch (Exception ex)
            {
                throw new Exception("新建失败,位置:FixReleaseInfo.原因:" + ex.Message);
            }
        }
Example #2
0
        private void btn_saveimgpath_Click(object sender, EventArgs e)
        {
            if (tb_choosedimgpath.Text == null || tb_choosedimgpath.Text.Trim().Length == 0)
            {
                MessageBox.Show("请先选择证据图片保存路径!", "提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
                return;
            }
            string sql = "UPDATE systemset SET EvidenceImgSavePath=@EvidenceImgSavePath WHERE id=1";
            List <MySqlParameter> pars = new List <MySqlParameter>();

            pars.Add(new MySqlParameter("@EvidenceImgSavePath", tb_choosedimgpath.Text));
            try
            {
                DataBaseServer.MySqlCmd dbobj = new DataBaseServer.MySqlCmd();
                if (dbobj.ExecuteNonQueryInt(sql, pars) > 0)
                {
                    SystemSet ss = (SystemSet)GlobalPars.GloPars["systemset"];
                    ss.EvidenceImgSavePath = tb_choosedimgpath.Text;
                    MessageBox.Show("修改成功!", "提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
                }
                else
                {
                    MessageBox.Show("修改失败,请稍后重试!", "提示", MessageBoxButtons.OK, MessageBoxIcon.Error);
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show("出现异常,请稍后重试或联系软件提供商!", "提示", MessageBoxButtons.OK, MessageBoxIcon.Error);
            }
        }
Example #3
0
        /// <summary>
        /// 修改用户
        /// </summary>
        /// <param name="obj">用户对象</param>
        /// <returns></returns>
        public int FixReleaseInfo(ModelReleaseInfo obj)
        {
            string sql = @"UPDATE SET ReleaseInfo Title=@Title,Contexts=@Contexts,ReleaseDate=@ReleaseDate,
                                InfoSource=@InfoSource,KeyWords=@KeyWords,ReleaseName=@ReleaseName,
                                CollectDate=@CollectDate,Snapshot=@Snapshot  
                        WHERE uid=@uid";

            List <MySqlParameter> par = new List <MySqlParameter>();

            par.Add(new MySqlParameter("@uid", obj.Uid));
            par.Add(new MySqlParameter("@Title", obj.Title));
            par.Add(new MySqlParameter("@Contexts", obj.Contexts));
            par.Add(new MySqlParameter("@RleaseDate", obj.ReleaseDate));
            par.Add(new MySqlParameter("@InfoSource", obj.InfoSource));
            par.Add(new MySqlParameter("@KeyWords", obj.KeyWords));
            par.Add(new MySqlParameter("@ReleaseName", obj.ReleaseName));
            par.Add(new MySqlParameter("@CollectDate", obj.CollectDate));
            par.Add(new MySqlParameter("@Snapshot", obj.Snapshot));

            try
            {
                DataBaseServer.MySqlCmd dbobj = new DataBaseServer.MySqlCmd();
                return(dbobj.ExecuteNonQueryInt(sql, par));
            }
            catch (Exception ex)
            {
                throw new Exception("新建失败,位置:FixReleaseInfo.原因:" + ex.Message);
            }
        }
Example #4
0
 /// <summary>
 /// 删除用户信息
 /// </summary>
 /// <param name="uid">用户ID</param>
 /// <returns>返回值,1为成功</returns>
 public int DelReleaseInfo(int uid)
 {
     string sql = @"DELETE FROM ReleaseInfo WHERE uid=@uid";
     List<MySqlParameter> par = new List<MySqlParameter>();
     par.Add(new MySqlParameter("@uid", uid));
     try
     {
         DataBaseServer.MySqlCmd dbobj = new MySqlCmd();
         return dbobj.ExecuteNonQueryInt(sql, par);
     }
     catch (Exception ex)
     {
         throw new Exception("删除失败,位置:DelReleaseInfo.原因:" + ex.Message);
     }
 }
Example #5
0
        private void btn_updatepwd_Click(object sender, EventArgs e)
        {
            UserInfo ui = (UserInfo)GlobalPars.GloPars["UserInfo"];

            if (ui.Pword != tb_oldpwd.Text)
            {
                MessageBox.Show("您输入的旧密码不正确!", "警告", MessageBoxButtons.OK, MessageBoxIcon.Error);
                return;
            }
            string newpwd1 = tb_newpwd1.Text;
            string newpwd2 = tb_newpwd2.Text;

            if (!newpwd1.Equals(newpwd2))
            {
                MessageBox.Show("您两次输入的密码不一致!", "警告", MessageBoxButtons.OK, MessageBoxIcon.Error);
                return;
            }
            string sql = "UPDATE LoginUser SET PWord=@PWord WHERE uid=@Uid";
            List <MySqlParameter> pars = new List <MySqlParameter>();

            pars.Add(new MySqlParameter("@PWord", newpwd1));
            pars.Add(new MySqlParameter("@Uid", ui.Uid));
            try
            {
                DataBaseServer.MySqlCmd dbobj = new DataBaseServer.MySqlCmd();
                if (dbobj.ExecuteNonQueryInt(sql, pars) > 0)
                {
                    MessageBox.Show("修改成功!", "提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
                    ui.Pword = newpwd1;
                }
                else
                {
                    MessageBox.Show("修改失败,请稍后重试或联系软件提供商!", "提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show("修改失败,请稍后重试或联系软件提供商!", "提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
            }
        }
Example #6
0
        /// <summary>
        /// 通用网站抓取程序
        /// </summary>
        private void GeneralWebSpider()
        {
            DataBaseServer.MySqlCmd cmd = new MySqlCmd();
            //得到关键字列表
            DataTable dtkey = cmd.GetTabel("select * from Keywords");
            //得到抓取网站的地址
            DataTable dtXs = cmd.GetTabel("Select * from WebAddress");

            #region 读取相似度表中的数据据,循环抓取
            for (int xs = 0; xs < dtXs.Rows.Count; xs++)
            {
                string url = dtXs.Rows[xs]["url"].ToString();
                string sheng = dtXs.Rows[xs]["sheng"].ToString();
                string shi = dtXs.Rows[xs]["shi"].ToString();
                string xian = dtXs.Rows[xs]["xian"].ToString();
                string name = dtXs.Rows[xs]["Name"].ToString();
                //读取相似链接
                string Similar = dtXs.Rows[xs]["likeurl"].ToString();

                bool isCatchSubWeb = false;
                BasicWebSpider(url, Similar, dtkey, sheng, shi, xian, name, isCatchSubWeb);

                //防止拉黑
                Thread.Sleep(2000);
            }

            #endregion
        }
Example #7
0
        private void BlogWebSpider()
        {
            #region 处理关键字
            MySqlCmd cmd = new MySqlCmd();
            //得到关键字列表
            DataTable dtkey;
            DataTable dtParts;
            if (selectKID == -1)
            {
                //全部
                dtkey = cmd.GetTabel("select * from Keywords");
            }
            else
            {
                dtkey = cmd.GetTabel("select * from Keywords where kid=" + selectKID);
            }
            dtParts = cmd.GetTabel("SELECT * FROM partword");
            #endregion

            HtmlParse.Parse parse = new HtmlParse.Parse();
            //parse.ReportCatchProcess += new HtmlParse.ReportCatchProcessEventHandler(Blog_ReportCatchProcess);
            List<ModelReleaseInfo> webDatas = new List<ModelReleaseInfo>();
            //按关键字循环
            for (int kw = 0; kw < dtkey.Rows.Count; kw++)
            {
                //处理关键字
                if (selectKID != -1 && selectKwName != "全部")
                {
                    if (dtkey.Rows[kw]["name"].ToString().Trim() != selectKwName) continue;
                }

                string keyword = dtkey.Rows[kw]["KeyWord"].ToString().Trim();
                string keyTitle = dtkey.Rows[kw]["Name"].ToString().Trim();
                int kid = 0;
                int.TryParse(dtkey.Rows[kw]["kid"].ToString().Trim(), out kid);
                #region 按关键字检索
                //组成查询字串
                string url = "http://www.sogou.com/web?interation=196647&query=" + keyword + "&ie=utf8";

                string html = HtmlUtil.HttpGet(url, Encoding.UTF8);
                #endregion
                List<ModelReleaseInfo> mris = parse.ParseSogouBlog(html, keyword, kid);
                if (mris != null && mris.Count() > 0)
                {
                    DataPersistenceControl.GetInstance().Add(mris);
                }
                //防止拉黑
                Thread.Sleep(Interval2m);
            }
        }
Example #8
0
        private void BaiduWebWebSpider()
        {
            #region 处理关键字
            MySqlCmd cmd = new MySqlCmd();
            //得到关键字列表
            DataTable dtkey;
            DataTable dtParts;
            if (selectKID == -1)
            {
                //全部
                dtkey = cmd.GetTabel("select * from Keywords");
            }
            else
            {
                dtkey = cmd.GetTabel("select * from Keywords where kid=" + selectKID);
            }
            dtParts = cmd.GetTabel("SELECT * FROM partword");
            #endregion

            HtmlParse.Parse parse = new HtmlParse.Parse();
            //parse.ReportCatchProcess += new HtmlParse.ReportCatchProcessEventHandler(BaiduWeb_ReportCatchProcess);
            List<ModelReleaseInfo> webDatas = new List<ModelReleaseInfo>();
            //按关键字循环
            for (int kw = 0; kw < dtkey.Rows.Count; kw++)
            {
                //处理关键字
                if (selectKID != -1 && selectKwName != "全部")
                {
                    if (dtkey.Rows[kw]["name"].ToString().Trim() != selectKwName) continue;
                }

                string keyword = dtkey.Rows[kw]["KeyWord"].ToString().Trim();
                string keyTitle = dtkey.Rows[kw]["Name"].ToString().Trim();
                int kid = 0;
                int.TryParse(dtkey.Rows[kw]["kid"].ToString().Trim(), out kid);

                #region 百度检索
                for (int i = 0; i < 5; i++)
                {
                    String encodeKey = CrawlHtml.UrlEncode(keyword);
                    string url = string.Format(@"http://www.baidu.com/s?wd={0}&pn={1}&ie=utf-8", encodeKey, i * 10);
                    string html = HtmlUtil.HttpGet(url, Encoding.UTF8);
                    List<ModelReleaseInfo> mris = parse.ParseBaiduWeb(html, keyword, kid);
                    if (mris != null && mris.Count() > 0)
                    {
                        DataPersistenceControl.GetInstance().Add(mris);
                    }
                    else
                    {
                        break;
                    }
                    //防止拉黑
                    Thread.Sleep(Interval50s);
                }
                #endregion

                #region bing检索
                for (int i = 0; i < 10; i++)
                {
                    //组成查询字串
                    string url = string.Format("http://cn.bing.com/search?q={0}&first={1}&FORM=PERE", keyword, i * 10 + 1);
                    string html = HtmlUtil.HttpGet(url, Encoding.UTF8);
                    List<ModelReleaseInfo> mris = parse.ParseBingWeb(html, keyword, kid);
                    if (mris != null && mris.Count() > 0)
                    {
                        //写入数据库
                        DataPersistenceControl.GetInstance().Add(mris);
                    }
                    else
                    {
                        break;
                    }
                    //防止拉黑
                    Thread.Sleep(Interval30s);
                }
                #endregion

                #region 搜狗检索
                for (int i = 0; i < 10; i++)
                {
                    //组成查询字串
                    string url = string.Format("http://www.sogou.com/web?query={0}&page={1}&ie=utf8", keyword, i + 1);
                    string html = HtmlUtil.HttpGet(url, Encoding.UTF8);
                    List<ModelReleaseInfo> mris = parse.ParseSogouWeb(html, keyword, kid);
                    if (mris != null && mris.Count() > 0)
                    {
                        //写入数据库
                        DataPersistenceControl.GetInstance().Add(mris);
                    }
                    else
                    {
                        break;
                    }
                    //防止拉黑
                    Thread.Sleep(Interval2m);
                }
                #endregion

                #region 中搜检索
                for (int i = 0; i < 10; i++)
                {
                    //组成查询字串
                    string url = string.Format("http://www.zhongsou.com/third?w={0}&b={1}", keyword, i + 1);
                    string html = HtmlUtil.HttpGet(url, Encoding.Default);
                    List<ModelReleaseInfo> mris = parse.ParseZhongsouWeb(html, keyword, kid);
                    if (mris != null && mris.Count() > 0)
                    {
                        //写入数据库
                        DataPersistenceControl.GetInstance().Add(mris);
                    }
                    else
                    {
                        break;
                    }
                    //防止拉黑
                    Thread.Sleep(Interval2m);
                }
                #endregion

                #region 好搜检索
                for (int i = 0; i < 10; i++)
                {
                    //组成查询字串
                    string url = string.Format("http://www.haosou.com/s?q={0}&pn={1}", keyword, i + 1);
                    string html = HtmlUtil.HttpGet(url, Encoding.UTF8);
                    List<ModelReleaseInfo> mris = parse.ParseHaosouWeb(html, keyword, kid);
                    if (mris != null && mris.Count() > 0)
                    {
                        //写入数据库
                        DataPersistenceControl.GetInstance().Add(mris);
                    }
                    else
                    {
                        break;
                    }
                    //防止拉黑
                    Thread.Sleep(Interval50s);
                }
                #endregion

            }
        }
Example #9
0
        public DataTable GetLatestData(int kid, string eventName)
        {
            DataPage firstPage = Finder.util.Comm.GetPageInfo();

            //string sql = "Select * From ReleaseInfo where pid={0} and deleted=0 order by collectdate desc limit 0,100";
            string sql = @"select b.Name eventname, a.* from releaseinfo a  left join keywords b on a.keywords=b.KeyWord
                                    where b.Name is not null and  a.deleted=0 and a.uid between " + firstPage.CurrenPageStartUid + " and " + firstPage.CurrenPageEndUid;
            if (kid != -1)
            {
                sql += "    and a.kid=" + kid;
                if (!string.IsNullOrEmpty(eventName) && eventName != "全部")
                {
                    sql += " and  b.Name='" + eventName + "'";
                }
            }
            sql += " order by a.collectdate desc limit 0,50";
            try
            {
                DataBaseServer.MySqlCmd dbobj = new MySqlCmd();

                return dbobj.GetTabel(sql);
            }
            catch (Exception ex)
            {
                return null;
            }
        }
Example #10
0
 //刷新微博数据,前台感觉是在搜索微博
 private void reFreshWeibo()
 {
     if (!Program.ProClose)
     {
         lbweibo.Visible = true;
         lbweibo.Text = "正在搜索微博数据...";
         lbweibo.ForeColor = Color.DarkBlue;
         string wblog = "select uid,releasename AS title,contexts,releasedate,infosource,keywords,releasename,collectdate,snapshot,webname,pid,part,reposts,comments from ReleaseInfowb order by uid desc limit 0,300";
         DataBaseServer.MySqlCmd cmd = new MySqlCmd();
         DataTable dtwBlog = new DataTable();
         dtwBlog = cmd.GetTabel(wblog);
         dvWBlog.DataSource = dtwBlog;
         dvWBlog.Refresh();
         lbweibo.Text = "一轮搜索完毕!";
         lbweibo.ForeColor = Color.Red;
     }
 }
Example #11
0
 private void btn_updatepwd_Click(object sender, EventArgs e)
 {
     UserInfo ui = (UserInfo)GlobalPars.GloPars["UserInfo"];
     if (ui.Pword != tb_oldpwd.Text)
     {
         MessageBox.Show("您输入的旧密码不正确!", "警告", MessageBoxButtons.OK, MessageBoxIcon.Error);
         return;
     }
     string newpwd1 = tb_newpwd1.Text;
     string newpwd2 = tb_newpwd2.Text;
     if (!newpwd1.Equals(newpwd2))
     {
         MessageBox.Show("您两次输入的密码不一致!", "警告", MessageBoxButtons.OK, MessageBoxIcon.Error);
         return;
     }
     string sql = "UPDATE LoginUser SET PWord=@PWord WHERE uid=@Uid";
     List<MySqlParameter> pars = new List<MySqlParameter>();
     pars.Add(new MySqlParameter("@PWord", newpwd1));
     pars.Add(new MySqlParameter("@Uid", ui.Uid));
     try
     {
         DataBaseServer.MySqlCmd dbobj = new DataBaseServer.MySqlCmd();
         if (dbobj.ExecuteNonQueryInt(sql, pars) > 0)
         {
             MessageBox.Show("修改成功!", "提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
             ui.Pword = newpwd1;
         }
         else
         {
             MessageBox.Show("修改失败,请稍后重试或联系软件提供商!", "提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
         }
     }
     catch (Exception ex)
     {
         MessageBox.Show("修改失败,请稍后重试或联系软件提供商!", "提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
     }
 }
Example #12
0
        /// <summary>
        /// 查询采集信息(按网址和关键字)
        /// </summary>
        /// <returns></returns>
        public int GetReleaseInfoCount(string sUrl, string keyword)
        {
            DataPage firstPage = Finder.util.Comm.GetPageInfo();
            string sql = "SELECT count(*) FROM ReleaseInfo WHERE InfoSource ='{0}' and keywords='{1}'  and uid between " + firstPage.CurrenPageStartUid + " and " + firstPage.CurrenPageEndUid ;
            sql = string.Format(sql, sUrl, keyword);

            try
            {
                DataBaseServer.MySqlCmd dbobj = new MySqlCmd();
                object obj = dbobj.GetOne(sql);
                int val = 0;
                int.TryParse(obj.ToString(), out val);

                return val;
            }
            catch (Exception ex)
            {
                //throw new Exception("查询失败,位置:SelReleaseInfo.原因:" + ex.Message);
            }
            return 0;
        }
Example #13
0
 public void timer_Elapsed(object source, System.Timers.ElapsedEventArgs e)
 {
     if (!Program.ProClose)
     {
         getSinaClientAuto();
         cmd = new MySqlCmd();
         dt_kw = cmd.GetTabel("select KeyWord from KeyWords group by KeyWord");
         dt_event = cmd.GetTabel("select name,keyword,kid from KeyWords group by name,keyword,kid");
         dt_partWord = cmd.GetTabel("select word,part from partword");
         getPublic_Timeline_sina(sina, 200);
         getFriends_Timeline_sina(sina, 100);
     }
 }
Example #14
0
        /// <summary>
        /// 查询采集信息
        /// </summary>
        /// <param name="t1">时间起</param>
        /// <param name="t2">时间至</param>
        /// <param name="infoType">数据类型</param>
        /// <returns></returns>
        public DataTable SelReleaseInfo(string t1, string t2, string pid)
        {
            DataPage firstPage = Finder.util.Comm.GetPageInfo();
            string sql = "SELECT * FROM ReleaseInfo WHERE CollectDate BETWEEN '{1}' AND '{0}' AND pid={2}  and uid between " + firstPage.CurrenPageStartUid + " and " + firstPage.CurrenPageEndUid  + " ORDER BY CollectDate";
            sql = string.Format(sql, t1, t2, pid);
            try
            {
                DataBaseServer.MySqlCmd dbobj = new MySqlCmd();

                return dbobj.GetTabel(sql);
            }
            catch (Exception ex)
            {
                throw new Exception("查询失败,位置:SelReleaseInfo.原因:" + ex.Message);
            }
        }
Example #15
0
        /// <summary>
        /// 查询采集信息(按网址)
        /// </summary>
        /// <returns></returns>
        public DataTable SelReleaseInfo(string sUrl)
        {
            DataPage firstPage = Finder.util.Comm.GetPageInfo();
            string sql = "SELECT * FROM ReleaseInfo WHERE InfoSource ='{0}' and uid between " + firstPage.CurrenPageStartUid + " and " + firstPage.CurrenPageEndUid;
            sql = string.Format(sql, sUrl);

            try
            {
                DataBaseServer.MySqlCmd dbobj = new MySqlCmd();
                return dbobj.GetTabel(sql);
            }
            catch (Exception ex)
            {
                throw new Exception("查询失败,位置:SelReleaseInfo.原因:" + ex.Message);
            }
        }
Example #16
0
        /// <summary>
        /// 查询信息(按ID)
        /// </summary>
        /// <returns></returns>
        public DataTable SelReleaseInfo(int uid)
        {
            string sql = "SELECT * FROM ReleaseInfo WHERE uid={0}";
            sql = string.Format(sql, uid);

            try
            {
                DataBaseServer.MySqlCmd dbobj = new MySqlCmd();
                return dbobj.GetTabel(sql);
            }
            catch (Exception ex)
            {
                throw new Exception("查询失败,位置:SelReleaseInfo.原因:" + ex.Message);
            }
        }
Example #17
0
        /// <summary>
        /// 添加用户
        /// </summary>
        /// <param name="obj">用户对象</param>
        /// <returns></returns>
        public int InsReleaseInfo(ModelReleaseInfo obj)
        {
            string sql = @"INSERT INTO ReleaseInfo(Title,Contexts,ReleaseDate,InfoSource,KeyWords,ReleaseName,CollectDate,Snapshot)
                            VALUES(@Title,@Contexts,@RleaseDate,@InfoSource,@KeyWords,@ReleaseName,@CollectDate,@Snapshot) ";

            List<MySqlParameter> par = new List<MySqlParameter>();
            par.Add(new MySqlParameter("@Title", obj.Title));
            par.Add(new MySqlParameter("@Contexts", obj.Contexts));
            par.Add(new MySqlParameter("@RleaseDate", obj.ReleaseDate));
            par.Add(new MySqlParameter("@InfoSource", obj.InfoSource));
            par.Add(new MySqlParameter("@KeyWords", obj.KeyWords));
            par.Add(new MySqlParameter("@ReleaseName", obj.ReleaseName));
            par.Add(new MySqlParameter("@CollectDate", obj.CollectDate));
            par.Add(new MySqlParameter("@Snapshot", obj.Snapshot));

            try
            {
                MySqlCmd dbobj = new MySqlCmd();
                return dbobj.ExecuteNonQueryInt(sql, par);

            }
            catch (Exception ex)
            {
                throw new Exception("新建失败,位置:InsReleaseInfo.原因:" + ex.Message);
            }
        }
Example #18
0
        /// <summary>
        /// 查询采集信息(按网址)
        /// </summary>
        /// <returns></returns>
        public DataTable GetReleaseInfoFormat()
        {
            string sql = "SELECT uid,Title,Contexts,ReleaseDate,InfoSource,KeyWords,ReleaseName,CollectDate,Snapshot,webName,pid,part,comments,reposts FROM ReleaseInfo WHERE 1=2";

            try
            {
                DataBaseServer.MySqlCmd dbobj = new MySqlCmd();

                return dbobj.GetTabel(sql);
            }
            catch (Exception ex)
            {
                throw new Exception("查询失败,位置:GetReleaseInfoFormat.原因:" + ex.Message);
            }
        }
Example #19
0
        public void StartWrite()
        {
            Action actionG = () =>
            {
                List<ModelReleaseInfo> data;
                while (true)
                {
                    if (Program.ProClose)
                    {
                        break;
                    }
                    if (Queue.TryDequeue(out data))
                    {
                        //// 写入数据库
                        #region 数据入库
                        try
                        {
                            StringBuilder sb = new StringBuilder();
                            sb.Append("");
                            TbReleaseInfo tri = new TbReleaseInfo();
                            MySqlCmd cmd = new MySqlCmd();
                            foreach (var mri in data)
                            {
                                #region 2016.11.16 加入精确匹配的判断
                                string keywords = mri.KeyWords;
                                string title = mri.Title;
                                string context = mri.Contexts;
                                if (!string.IsNullOrEmpty(keywords))
                                {
                                    bool isFundTitle = true;
                                    bool isFundContext = true;
                                    string[] keyw = keywords.Split(' ');
                                    if (keyw != null && keyw.Count() > 0)
                                    {
                                        foreach (string key in keyw)
                                        {
                                            if (title.IndexOf(key) < 0)
                                            {
                                                isFundTitle = false;
                                            }
                                            if (context.IndexOf(key) < 0)
                                            {
                                                isFundContext = false;
                                            }
                                        }
                                    }
                                    if (!isFundTitle && !isFundContext)
                                    {
                                        //如果标题或者内容没有匹配全部关键字则去掉该条数据
                                        continue;
                                    }
                                }
                                #endregion
                                if (tri.GetReleaseInfoCount(mri.InfoSource, mri.KeyWords) > 0) continue;
                                string sql = tri.GetInsertStr(mri);
                                if (!sql.Trim().EndsWith(";"))
                                {
                                    sql += sql + ";";
                                }
                                sb.Append(sql);
                            }

                            if (sb.ToString().Length > 0)
                            {
                                //执行插入
                                cmd.ExecuteNonQuery(sb.ToString());
                                //清除插入字段串
                                sb.Clear();
                            }
                        }
                        catch (Exception ex)
                        {
                            Comm.WriteErrorLog(ex.Message);
                            Comm.WriteErrorLog(ex.StackTrace);
                        }
                        #endregion

                        log.Info("数据层写入数据库成功");
                    }
                    else
                    {
                        System.Threading.Thread.Sleep(1000);
                    }
                }
            };

            Parallel.Invoke(actionG, actionG);
        }
Example #20
0
        public int GetMaxUid()
        {
            string sql = "SELECT max(uid) FROM ReleaseInfo";
            try
            {
                DataBaseServer.MySqlCmd dbobj = new MySqlCmd();
                DataTable dt = new DataTable();
                dt = dbobj.GetTabel(sql);
                try
                {
                    return int.Parse(dt.Rows[0][0].ToString());
                }
                catch (Exception)
                {

                    return 0;
                }
            }
            catch (Exception ex)
            {
                throw new Exception("查询失败,位置:GetMaxUid.原因:" + ex.Message);
            }
        }
Example #21
0
        private void MediaWebSpider()
        {
            #region 处理关键字
            MySqlCmd cmd = new MySqlCmd();
            //得到关键字列表
            DataTable dtkey;
            DataTable dtParts;
            if (selectKID == -1)
            {
                //全部
                dtkey = cmd.GetTabel("select * from Keywords");
            }
            else
            {
                dtkey = cmd.GetTabel("select * from Keywords where kid=" + selectKID);
            }
            dtParts = cmd.GetTabel("SELECT * FROM partword");
            #endregion

            HtmlParse.Parse parse = new HtmlParse.Parse();
            //parse.ReportCatchProcess += new HtmlParse.ReportCatchProcessEventHandler(parse_ReportCatchProcess);
            List<ModelReleaseInfo> webDatas = new List<ModelReleaseInfo>();
            //按关键字循环
            for (int kw = 0; kw < dtkey.Rows.Count; kw++)
            {
                //处理关键字
                if (selectKID != -1 && selectKwName != "全部")
                {
                    if (dtkey.Rows[kw]["name"].ToString().Trim() != selectKwName) continue;
                }
                string keyword = dtkey.Rows[kw]["KeyWord"].ToString().Trim();
                //string keyTitle = dtkey.Rows[kw]["Name"].ToString().Trim();
                int kid = 0;
                int.TryParse(dtkey.Rows[kw]["kid"].ToString().Trim(), out kid);

                #region 百度检索
                //组成查询字串
                string url = "http://news.baidu.com/ns?rn=100&word=" + keyword;
                string html = HtmlUtil.HttpGet(url, Encoding.UTF8);
                List<ModelReleaseInfo> mris = parse.ParseBaiduNews(html, keyword, kid);
                if (mris != null && mris.Count() > 0)
                {
                    //写入数据库
                    DataPersistenceControl.GetInstance().Add(mris);
                }
                #endregion

                #region bing检索
                for (int i = 0; i < 10; i++)
                {
                    //组成查询字串
                    url = "";
                    html = "";
                    mris = null;
                    url = string.Format("http://cn.bing.com/news/search?q={0}&first={1}&FORM=PENR", keyword, i * 10 + 1);
                    html = HtmlUtil.HttpGet(url, Encoding.UTF8);
                    mris = parse.ParseBingNews(html, keyword, kid);
                    if (mris != null && mris.Count() > 0)
                    {
                        //写入数据库
                        DataPersistenceControl.GetInstance().Add(mris);
                    }
                    else
                    {
                        break;
                    }
                    //防止拉黑
                    Thread.Sleep(Interval50s);
                }
                #endregion

                #region 搜狗新闻
                for (int i = 0; i < 10; i++)
                {
                    //组成查询字串
                    url = "";
                    html = "";
                    mris = null;
                    url = string.Format("http://news.sogou.com/news?query={0}&page={1}", keyword, i + 1);
                    html = HtmlUtil.HttpGet(url, Encoding.Default);
                    mris = parse.ParseSogouNews(html, keyword, kid);
                    if (mris != null && mris.Count() > 0)
                    {
                        //写入数据库
                        DataPersistenceControl.GetInstance().Add(mris);
                    }
                    else
                    {
                        break;
                    }
                    //防止拉黑(2分钟)
                    Thread.Sleep(Interval2m);
                }
                #endregion

                #region 中搜新闻
                for (int i = 0; i < 10; i++)
                {
                    //组成查询字串
                    url = "";
                    html = "";
                    mris = null;
                    url = string.Format("http://zixun.zhongsou.com/n?w={0}&b={1}", keyword, i + 1);
                    html = HtmlUtil.HttpGet(url, Encoding.Default);
                    mris = parse.ParseZhongsouNews(html, keyword, kid);
                    if (mris != null && mris.Count() > 0)
                    {
                        //写入数据库
                        DataPersistenceControl.GetInstance().Add(mris);
                    }
                    else
                    {
                        break;
                    }
                    //防止拉黑(2分钟)
                    Thread.Sleep(Interval2m);
                }
                #endregion

                #region 好搜新闻
                for (int i = 0; i < 10; i++)
                {
                    //组成查询字串
                    url = "";
                    html = "";
                    mris = null;
                    url = string.Format("http://news.haosou.com/ns?q={0}&pn={1}&tn=news&rank=rank&j=0", keyword, i + 1);
                    html = HtmlUtil.HttpGet(url, Encoding.UTF8);
                    mris = parse.ParseHaosouNews(html, keyword, kid);
                    if (mris != null && mris.Count() > 0)
                    {
                        //写入数据库
                        DataPersistenceControl.GetInstance().Add(mris);
                    }
                    else
                    {
                        break;
                    }
                    //防止拉黑
                    Thread.Sleep(Interval50s);
                }
                #endregion
            }
        }
Example #22
0
        private void TiebaWebSpider()
        {
            #region 处理关键字
            MySqlCmd cmd = new MySqlCmd();
            //得到关键字列表
            DataTable dtkey;
            DataTable dtParts;
            if (selectKID == -1)
            {
                //全部
                dtkey = cmd.GetTabel("select * from Keywords");
            }
            else
            {
                dtkey = cmd.GetTabel("select * from Keywords where kid=" + selectKID);
            }
            dtParts = cmd.GetTabel("SELECT * FROM partword");
            #endregion

            HtmlParse.Parse parse = new HtmlParse.Parse();
            //parse.ReportCatchProcess += new HtmlParse.ReportCatchProcessEventHandler(Tieba_ReportCatchProcess);

            //按关键字循环
            for (int kw = 0; kw < dtkey.Rows.Count; kw++)
            {
                //处理关键字
                if (selectKID != -1 && selectKwName != "全部")
                {
                    if (dtkey.Rows[kw]["name"].ToString().Trim() != selectKwName) continue;
                }

                string keyword = dtkey.Rows[kw]["KeyWord"].ToString().Trim();
                string keyTitle = dtkey.Rows[kw]["Name"].ToString().Trim();
                int kid = 0;
                int.TryParse(dtkey.Rows[kw]["kid"].ToString().Trim(), out kid);

                #region 按关键字检索
                //组成查询字串
                //返回前60条数据,暂且不处理翻页
                string url = "http://tieba.baidu.com/f/search/res?ie=utf-8&rn=60&qw=" + keyword;

                string html = HtmlUtil.HttpGet(url, Encoding.Default);
                #endregion
                List<ModelReleaseInfo> mris = parse.ParseBaiduTieba(html, keyword, kid);
                if (mris != null && mris.Count() > 0)
                {
                    DataPersistenceControl.GetInstance().Add(mris);
                }
                //防止拉黑
                Thread.Sleep(Interval50s);
            }
        }
Example #23
0
 private void btn_saveimgpath_Click(object sender, EventArgs e)
 {
     if (tb_choosedimgpath.Text == null || tb_choosedimgpath.Text.Trim().Length == 0)
     {
         MessageBox.Show("请先选择证据图片保存路径!", "提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
         return;
     }
     string sql = "UPDATE systemset SET EvidenceImgSavePath=@EvidenceImgSavePath WHERE id=1";
     List<MySqlParameter> pars = new List<MySqlParameter>();
     pars.Add(new MySqlParameter("@EvidenceImgSavePath", tb_choosedimgpath.Text));
     try
     {
         DataBaseServer.MySqlCmd dbobj = new DataBaseServer.MySqlCmd();
         if (dbobj.ExecuteNonQueryInt(sql, pars) > 0)
         {
             SystemSet ss = (SystemSet)GlobalPars.GloPars["systemset"];
             ss.EvidenceImgSavePath = tb_choosedimgpath.Text;
             MessageBox.Show("修改成功!", "提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
         }
         else
         {
             MessageBox.Show("修改失败,请稍后重试!", "提示", MessageBoxButtons.OK, MessageBoxIcon.Error);
         }
     }
     catch (Exception ex)
     {
         MessageBox.Show("出现异常,请稍后重试或联系软件提供商!", "提示", MessageBoxButtons.OK, MessageBoxIcon.Error);
     }
 }
Example #24
0
        private void WeiboWebSpider()
        {
            #region 处理关键字
            MySqlCmd cmd = new MySqlCmd();
            //得到关键字列表
            DataTable dtkey;
            DataTable dtParts;
            if (selectKID == -1)
            {
                //全部
                dtkey = cmd.GetTabel("select * from Keywords");
            }
            else
            {
                dtkey = cmd.GetTabel("select * from Keywords where kid=" + selectKID);
            }
            dtParts = cmd.GetTabel("SELECT * FROM partword");
            #endregion

            HtmlParse.Parse parse = new HtmlParse.Parse();
            //parse.ReportCatchProcess += new HtmlParse.ReportCatchProcessEventHandler(Weibo_ReportCatchProcess);
            List<ModelReleaseInfo> webDatas = new List<ModelReleaseInfo>();
            //按关键字循环
            for (int kw = 0; kw < dtkey.Rows.Count; kw++)
            {
                //处理关键字
                if (selectKID != -1 && selectKwName != "全部")
                {
                    if (dtkey.Rows[kw]["name"].ToString().Trim() != selectKwName) continue;
                }

                string keyword = dtkey.Rows[kw]["KeyWord"].ToString().Trim();
                string keyTitle = dtkey.Rows[kw]["Name"].ToString().Trim();
                int kid = 0;
                int.TryParse(dtkey.Rows[kw]["kid"].ToString().Trim(), out kid);

                #region 新浪微博检索
                String encodeKey = CrawlHtml.UrlEncode(keyword);
                string url = "http://s.weibo.com/weibo/" + encodeKey + "?topnav=1&wvr=6&b=1&page=1";
                string html = HtmlUtil.HttpGet(url, Encoding.UTF8);
                List<ModelReleaseInfo> mris = parse.ParseSinaWeibo(html, keyword, kid);
                if (mris != null && mris.Count() > 0)
                {
                    DataPersistenceControl.GetInstance().Add(mris);
                }
                //防止微博拉黑
                Thread.Sleep(Interval30s);
                #endregion

                #region 中搜检索
                for (int i = 0; i < 10; i++)
                {
                    //组成查询字串
                    url = "";
                    html = "";
                    mris = null;
                    url = string.Format("http://t.zhongsou.com/wb?w={0}&b={1}", keyword, i + 1);
                    html = HtmlUtil.HttpGet(url, Encoding.Default);
                    mris = parse.ParseZhongsouWeibo(html, keyword, kid);
                    if (mris != null && mris.Count() > 0)
                    {
                        //写入数据库
                        DataPersistenceControl.GetInstance().Add(mris);
                    }
                    else
                    {
                        break;
                    }
                    //防止拉黑
                    Thread.Sleep(Interval2m);
                }
                #endregion
            }
        }
Example #25
0
        /// <summary>
        /// 得到网站的新闻类数据
        /// </summary>
        private void GetWebNewsInfo()
        {
            lbWeb.Text = "";
            lbWeb.Visible = true;
            //相似链接
            string Similar = "";

            DataBaseServer.MySqlCmd cmd = new MySqlCmd();

            //得到相似表
            DataTable dtXs = new DataTable();
            dtXs = cmd.GetTabel("Select * from WebAddress WHERE pid=0");

            dtParts = cmd.GetTabel("SELECT * FROM partword");

            DataTable dtkey = new DataTable();
            dtkey = cmd.GetTabel("select * from Keywords");

            //相似表中的被抓取网址
            string webInfo = "";

            //要过滤链接中首页的正则
            string strTopFormat = "http://.+/";
            List<string> strTop = new List<string>();
            sb = new StringBuilder();
            sb.Append("");
            string filterStr = "";

            #region 读取相似度表中的数据据,循环抓取
            for (int xs = 0; xs < dtXs.Rows.Count; xs++)
            {
                lbWeb.ForeColor = Color.DarkBlue;
                lbWeb.Text = "正在搜索:" + dtXs.Rows[xs]["name"].ToString();
                //读取相似表中要抓取的网址
                webInfo = getHtml(dtXs.Rows[xs]["url"].ToString(), "");
                //读取相似链接
                Similar = dtXs.Rows[xs]["likeurl"].ToString();

                //取出
                //string[] strA = HtmlUtil.GetElementsByTagName(webInfo, "a");
                List<string> strList = HtmlUtil.GetElementsByTagNameList(webInfo, "a");

                string strURLformat = "http://.[^\"]+";

                TbReleaseInfo ri = new TbReleaseInfo();

                string[] strA = GetLIstDate(strList.Distinct());
                #region 逐个链接判断
                //循环时判断是否要验证
                bool isThere = false;

                for (int i = 0; i < strA.Length; i++)
                {
                    if (Program.ProClose == true) break;
                    Application.DoEvents();
                    try
                    {
                        //得到目标网址中的所有链接,如果未得到,那么就继续读取下一个
                        strA[i] = HtmlUtil.GetListByHtml(dtXs.Rows[xs]["url"].ToString(), strA[i], strURLformat)[0];
                        //处理含有单引号的链接
                        strA[i] = UrlCl(strA[i]);

                        //处理单引号的链接
                        if (strA[i].IndexOf("'") != -1)
                        {
                            strA[i] = GetstringByHtmlArray(strA[i], "http://.[^\']+");
                        }
                    }
                    catch (Exception)
                    {
                        continue;
                    }
                    //得到相似值,大于0.70的认为相同,并开始抓取
                    if (HtmlUtil.getSimilarDegree(Similar, strA[i]) >= 0.60)
                    {
                        //判断这个链接是否已经在库中或者列表中,如果存在,此次就不再执行
                        strTop = HtmlUtil.GetListByHtmlArray(strA[i], strTopFormat);
                        if (strTop.Count != 0)
                        {
                            //if (strTop[0] == "http://blog.sohu.com/")
                            continue;//同新闻,如果将首页去掉
                        }

                        if (isThere)
                        {
                            continue;
                        }
                        else
                        {
                            //if (strA[i] == "http://news.ifeng.com/mainland/detail_2013_10/18/30459577_0.shtml'>[详细]</a>")
                            //{
                            //    strA[i] = strA[i];
                            //}

                            if (UrlThereare(strA[i], this.dtnewsinfo, dtWebNewsInfo, true) != 0) { isThere = true; continue; }
                        }

                        //得到此链接的源码
                        webInfo = getHtml(strA[i], "");
                        if (webInfo.Length == 0) { continue; }

                        //创建数据对象
                        ModelReleaseInfo newsInfo = new ModelReleaseInfo();

                        try
                        {
                            //流水+1
                            newsInfo.Uid = this.dvAll.Rows.Count + 1;

                            //标题
                            string[] strT = HtmlUtil.GetElementsByTagName(webInfo, "title");
                            if (strT.Length == 0)
                            {
                                continue;
                            }
                            else
                            {
                                newsInfo.Title = HtmlUtil.NoHTML(HtmlUtil.GetElementsByTagName(webInfo, "title")[0]);
                            }

                            //得到正文,以P标签来区分
                            string[] strContext = HtmlUtil.GetElementsByTagName(webInfo, "p");
                            newsInfo.Contexts = "";
                            for (int j = 0; j < strContext.Length; j++)
                            {
                                //循环累加正文信息
                                newsInfo.Contexts += HtmlUtil.NoHTML(strContext[j]);
                            }

                            //如果正文信息为空,那么将无法做关键字对照,此条数据舍弃
                            if (newsInfo.Contexts.Length == 0)
                            {
                                continue;
                            }

                            //网站链接
                            newsInfo.InfoSource = strA[i].Trim();

                            //关键字的设置
                            newsInfo.KeyWords = "";
                            for (int j = 0; j < dtkey.Rows.Count; j++)
                            {
                                Application.DoEvents();
                                if (newsInfo.Contexts.IndexOf(dtkey.Rows[j][1].ToString()) > 0)
                                { newsInfo.KeyWords += dtkey.Rows[j][1].ToString() + ","; }
                                else
                                {

                                }
                            }
                            if (newsInfo.KeyWords.Length == 0) { continue; }
                            newsInfo.KeyWords = newsInfo.KeyWords.Substring(0, newsInfo.KeyWords.Length - 1);

                            //收集日期
                            newsInfo.CollectDate = string.Format(DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"));

                            //发布人和发布日期暂时无法取到,手工赋值为空
                            newsInfo.ReleaseDate = "";
                            newsInfo.ReleaseName = "";

                            //网页快照,这里为用户指定生成,如果未选择生成,那么为空
                            newsInfo.Snapshot = "";

                            //网站名
                            newsInfo.WebName = dtXs.Rows[xs]["Name"].ToString();
                            //pid
                            newsInfo.Pid = 0;
                            //part正负判断
                            newsInfo.Part = GetParts(newsInfo.Contexts);
                            //reposts
                            newsInfo.Reposts = 0;
                            //comments
                            newsInfo.Comments = 0;

                            //新建数据行
                            DataRow dr = dtnewsinfo.NewRow();
                            if (dvWeb.RowCount == 0)
                            {
                                dr[0] = 1;
                            }
                            else
                            {
                                dr[0] = int.Parse(dvWeb.Rows[dvWeb.RowCount - 1].Cells[0].Value.ToString()) + 1;
                            }
                            //dr[0] = newsInfo.Uid;
                            dr[1] = newsInfo.Title;
                            dr[2] = newsInfo.Contexts;
                            dr[3] = newsInfo.ReleaseDate;
                            dr[4] = newsInfo.InfoSource;
                            dr[5] = newsInfo.KeyWords;
                            dr[6] = newsInfo.ReleaseName;
                            dr[7] = newsInfo.CollectDate;
                            dr[8] = newsInfo.Snapshot;
                            dr[9] = newsInfo.WebName;
                            dr[10] = newsInfo.Pid;
                            dr[11] = newsInfo.Part;
                            dr[12] = newsInfo.Reposts;
                            dr[13] = newsInfo.Comments;

                            //把行加到DT中
                            dtnewsinfo.Rows.InsertAt(dr, 0);

                            //数据源刷新
                            if (dtnewsinfo.Rows.Count >= 500)
                            {
                                dtnewsinfo.Rows.RemoveAt(500);
                            }
                            dvWeb.Refresh();
                        }
                        catch (Exception ex)
                        {
                            StreamWriter sw = File.AppendText("log.txt");
                            sw.WriteLine(DateTime.Now.ToLongDateString());
                            sw.WriteLine("begin");
                            sw.WriteLine(ex.Message);
                            sw.WriteLine(sb.ToString());
                            sw.WriteLine("end");
                            sw.WriteLine("");

                            sw.Close();
                        }

                        ////总表刷新
                        //dt.Rows.Add(dr);
                        //dvAll.Refresh();

                        //得到插入语句
                        try
                        {
                            if (isThere)
                            {
                                continue;
                            }
                            else
                            {
                                sb.Append(ri.GetInsString(newsInfo) + ";");
                            }

                            //每10次执行一次插入数据库
                            if (sb.ToString().Length != 0)
                            {
                                if (i % 10 == 0)
                                {
                                    filterStr = sb.ToString();
                                    filterStr = filterStr.Replace("[ ", "[");
                                    filterStr = filterStr.Replace(" ]", "]");
                                    //执行插入
                                    cmd.ExecuteNonQuery(filterStr);
                                    //清除插入字段串
                                    sb.Clear();
                                    filterStr = "";
                                }
                            }
                        }
                        catch (Exception ex)
                        {
                            StreamWriter sw = File.AppendText("log.txt");
                            sw.WriteLine(DateTime.Now.ToLongDateString());
                            sw.WriteLine("begin");
                            sw.WriteLine(ex.Message);
                            sw.WriteLine(sb.ToString());
                            sw.WriteLine("end");
                            sw.WriteLine("");

                            sw.Close();
                        }
                    }
                }
                #endregion
            }
            #endregion

            try
            {
                if (sb.ToString().Length != 0)
                {
                    filterStr = sb.ToString();
                    filterStr = filterStr.Replace("[ ", "[");
                    filterStr = filterStr.Replace(" ]", "]");
                    //执行插入
                    cmd.ExecuteNonQuery(filterStr);
                    //清除插入字段串
                    sb.Clear();
                    filterStr = "";
                }
            }
            catch (Exception ex)
            {
                StreamWriter sw = File.AppendText("log.txt");
                sw.WriteLine(DateTime.Now.ToLongDateString());
                sw.WriteLine("begin");
                sw.WriteLine(ex.Message);
                sw.WriteLine(sb.ToString());
                sw.WriteLine("end");
                sw.WriteLine("");

                sw.Close();
            }

            //执行完毕后,重新获取一次数据库的数据
            dtWebNewsInfo = tri.SelReleaseInfo(DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"), DateTime.Now.AddDays(-15).ToString("yyyy-MM-dd HH:mm:ss"), "0 AND webName<>'百度'");
            //MessageBox.Show("ok");
            lbWeb.Text = "一轮搜索完毕!";
            lbWeb.ForeColor = Color.Red;
        }
Example #26
0
        private void WeixinWebSpider()
        {
            #region 处理关键字
            MySqlCmd cmd = new MySqlCmd();
            //得到关键字列表
            DataTable dtkey;
            //DataTable dtParts;
            //dtkey = cmd.GetTabel("select * from Keywords");
            //dtParts = cmd.GetTabel("SELECT * FROM partword");
            if (selectKID == -1)
            {
                //全部
                dtkey = cmd.GetTabel("select * from Keywords");
            }
            else
            {
                dtkey = cmd.GetTabel("select * from Keywords where kid=" + selectKID);
            }
            #endregion

            HtmlParse.Parse parse = new HtmlParse.Parse();
            //parse.ReportCatchProcess += new HtmlParse.Parse.ReportCatchProcessEventHandler(Weixin_ReportCatchProcess);
            List<ModelReleaseInfo> webDatas = new List<ModelReleaseInfo>();
            //按关键字循环
            for (int kw = 0; kw < dtkey.Rows.Count; kw++)
            {
                //处理关键字
                if (selectKID != -1 && selectKwName != "全部")
                {
                    if (dtkey.Rows[kw]["name"].ToString().Trim() != selectKwName) continue;
                }

                string keyword = dtkey.Rows[kw]["KeyWord"].ToString().Trim();
                //string keyTitle = dtkey.Rows[kw]["Name"].ToString().Trim();
                int kid = 0;
                int.TryParse(dtkey.Rows[kw]["kid"].ToString().Trim(), out kid);
                #region 按关键字检索
                //组成查询字串
                string url = "http://weixin.sogou.com/weixin?type=2&query=" + keyword + "&ie=utf8";

                //string html = HtmlUtil.HttpGet(url, Encoding.UTF8);
                CookieContainer cookies = new CookieContainer();
                string strCookie = "";
                string html = HtmlUtil.HttpGet(url, Encoding.UTF8, cookies, "weixin.sogou.com", ref strCookie);
                #endregion
                List<ModelReleaseInfo> mris = parse.ParseSogouWeixin(html, keyword, kid, cookies, strCookie);
                if (mris != null && mris.Count() > 0)
                {
                    DataPersistenceControl.GetInstance().Add(mris);
                    //webDatas.AddRange(mris);
                    //刷新界面
                    //RefreshDataGridView(6);
                }
                //防止拉黑(2分钟)
                Thread.Sleep(Interval2m);
            }
        }
Example #27
0
        private void GetBaiduInfo()
        {
            lbAll.Text = "";
            lbAll.Visible = true;

            MySqlCmd cmd = new MySqlCmd();
            ;
            //得到关键字列表
            DataTable dtkey = new DataTable();
            dtkey = cmd.GetTabel("select * from Keywords");

            dtParts = cmd.GetTabel("SELECT * FROM partword");

            //链接的正则
            string aa = "http://.[^\"]+";
            string[] sDate;

            sb = new StringBuilder();
            sb.Append("");

            //TbReleaseInfo ri = new TbReleaseInfo();

            //按关键字循环
            for (int kw = 0; kw < dtkey.Rows.Count; kw++)
            {
                lbAll.Text = "正在搜索关键字为<" + dtkey.Rows[kw]["KeyWord"].ToString().Trim() + ">的数据.";
                lbAll.ForeColor = Color.DarkBlue;
                //取得关键字
                string keys = dtkey.Rows[kw]["KeyWord"].ToString().Trim();
                //组成查询字串
                string url = "http://www.baidu.com/s?wd=\"" + keys + "\"&rn=50";

                //得到结果放在数组内
                List<string> lis = new List<string>();
                lis = HtmlUtil.GetElementsByClassList(getHtml(url, "utf-8"), "result");

                //如果没取到,就结束本次循环
                if (lis == null) return;
                //webBrowser1.Navigate(url);

                //循环时判断是否要验证
                bool isThere = false;

                for (int i = 0; i < lis.Count; i++)
                {
                    if (Program.ProClose == true) break;

                    ModelReleaseInfo mri = new ModelReleaseInfo();

                    //发布日期的赋值
                    sDate = HtmlUtil.GetElementsByTagAndClass(lis[i], "span", "g");
                    if (sDate.Length <= 0) continue;

                    mri.ReleaseDate = HtmlUtil.NoHTML(sDate[0]);
                    mri.ReleaseDate = mri.ReleaseDate.Substring(mri.ReleaseDate.Length - 10, 10);

                    //判断日期
                    DateTime ddt;
                    if (DateTime.TryParse(mri.ReleaseDate, out ddt))
                    {
                    }
                    else
                    {
                        //百度的快照日期有时会是9位或8位,如果是这种情况,那么按规则去掉
                        mri.ReleaseDate = mri.ReleaseDate.Substring(1, 9);
                        if (DateTime.TryParse(mri.ReleaseDate, out ddt))
                        {
                        }
                        else
                        {
                            mri.ReleaseDate = mri.ReleaseDate.Substring(1, 8);
                        }
                    }
                    //处理日期
                    try
                    {
                        mri.ReleaseDate = DateTime.Parse(mri.ReleaseDate).ToString("yyyy-MM-dd HH:mm:ss");
                    }
                    catch (Exception ex)
                    {
                        StreamWriter sw = File.AppendText("log.txt");
                        sw.WriteLine(DateTime.Now.ToLongDateString());
                        sw.WriteLine("begin");
                        sw.WriteLine(ex.Message);
                        sw.WriteLine(sb.ToString());
                        sw.WriteLine("end");
                        sw.WriteLine("");

                        sw.Close();

                    }

                    //只拿取三天的内的数据
                    try
                    {
                        if (DateTime.Parse(mri.ReleaseDate) < DateTime.Now.AddDays(-3)) continue;
                    }
                    catch (Exception ex) { continue; }
                    try
                    {
                        //得到标题
                        mri.Title = HtmlUtil.NoHTML(HtmlUtil.GetElementsByTagName(lis[i], "h3")[0]);
                        string[] temp = HtmlUtil.GetElementsByClass(lis[i], "c-abstract");

                        //如果未取到内容部分,就跳出
                        if (temp.Length == 0)
                            continue;

                        mri.Contexts = HtmlUtil.NoHTML(temp[0]);
                        mri.InfoSource = HtmlUtil.GetListByHtml("", HtmlUtil.GetElementsByTagName(lis[i], "a")[0], aa)[0];

                        //去掉重复
                        if (isThere)
                        {
                            continue;
                        }
                        else
                        {
                            if (UrlThereare(mri.Title, this.dtqueryinfo, dtWebQueryInfo, false) != 0)
                            { isThere = true; continue; }
                        }

                        mri.KeyWords = dtkey.Rows[kw]["KeyWord"].ToString().Trim();
                        mri.CollectDate = string.Format(DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"));
                        mri.Snapshot = "";
                        mri.ReleaseName = "";

                        mri.WebName = "百度";
                        mri.Pid = 0;
                        mri.Part = GetParts(mri.Contexts);
                        mri.Comments = 0;
                        mri.Reposts = 0;

                        DataRow dr = dtqueryinfo.NewRow();
                        if (dvAll.RowCount == 0)
                        {
                            dr[0] = 1;
                        }
                        else
                        {
                            dr[0] = int.Parse(dvAll.Rows[dvAll.RowCount - 1].Cells[0].Value.ToString()) + 1;
                        }
                        dr[1] = mri.Title;
                        dr[2] = mri.Contexts;
                        dr[3] = mri.ReleaseDate;
                        dr[4] = mri.InfoSource;
                        dr[5] = mri.KeyWords;
                        dr[6] = mri.ReleaseName;
                        dr[7] = mri.CollectDate;
                        dr[8] = mri.Snapshot;
                        dr[9] = mri.WebName;
                        dr[10] = mri.Pid;
                        dr[11] = mri.Part;
                        dr[12] = mri.Reposts;
                        dr[13] = mri.Comments;

                        dtqueryinfo.Rows.InsertAt(dr, 0);

                        if (dtqueryinfo.Rows.Count >= 500)
                        {
                            dtqueryinfo.Rows.RemoveAt(500);
                        }
                        dvAll.Refresh();
                    }
                    catch (Exception ex)
                    {
                        StreamWriter sw = File.AppendText("log.txt");
                        sw.WriteLine(DateTime.Now.ToLongDateString());
                        sw.WriteLine("begin");
                        sw.WriteLine(ex.Message);
                        sw.WriteLine(sb.ToString());
                        sw.WriteLine("end");
                        sw.WriteLine("");

                        sw.Close();
                    }

                    try
                    {
                        //得到插入语句
                        if (isThere)
                        {
                            continue;
                        }
                        else
                        {
                            sb.Append(tri.GetInsString(mri) + ";");
                        }

                        //每10次执行一次插入数据库
                        if (sb.ToString().Length != 0)
                        {
                            if (i % 10 == 0)
                            {
                                //执行插入
                                cmd.ExecuteNonQuery(sb.ToString());
                                //清除插入字段串
                                sb.Clear();
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        StreamWriter sw = File.AppendText("log.txt");
                        sw.WriteLine(DateTime.Now.ToLongDateString());
                        sw.WriteLine("begin");
                        sw.WriteLine(ex.Message);
                        sw.WriteLine(sb.ToString());
                        sw.WriteLine("end");
                        sw.WriteLine("");

                        sw.Close();
                    }

                }
            }
            try
            {
                if (sb.ToString().Length != 0)
                {
                    //执行插入
                    cmd.ExecuteNonQuery(sb.ToString());
                    //清除插入字段串
                    sb.Clear();
                }
            }
            catch (Exception ex)
            {
                StreamWriter sw = File.AppendText("log.txt");
                sw.WriteLine(DateTime.Now.ToLongDateString());
                sw.WriteLine("begin");
                sw.WriteLine(ex.Message);
                sw.WriteLine(sb.ToString());
                sw.WriteLine("end");
                sw.WriteLine("");

                sw.Close();
            }

            //执行完毕后,重新获取一次数据库的数据
            dtWebQueryInfo = tri.SelReleaseInfo(DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"), DateTime.Now.AddDays(-15).ToString("yyyy-MM-dd HH:mm:ss"), "0 AND webName='百度'");
            //MessageBox.Show("ok");
            lbAll.Text = "一轮搜索完毕!";
            lbAll.ForeColor = Color.Red;
        }
Example #28
0
        public DataTable GetLatestData(int pid)
        {
            DataPage firstPage = Finder.util.Comm.GetPageInfo();

            //string sql = "Select * From ReleaseInfo where pid={0} and deleted=0 order by collectdate desc limit 0,100";
            string sql = @"select b.Name eventname, a.* from releaseinfo a  left join keywords b on a.keywords=b.KeyWord
                                    where b.Name is not null and  a.pid={0} and a.deleted=0 and a.uid between " + firstPage.CurrenPageStartUid + " and " + firstPage.CurrenPageEndUid
                                    + " order by a.collectdate desc limit 0,50";
            sql = string.Format(sql, pid);
            try
            {
                DataBaseServer.MySqlCmd dbobj = new MySqlCmd();

                return dbobj.GetTabel(sql);
            }
            catch (Exception ex)
            {
                return null;
            }
        }