コード例 #1
0
ファイル: index.cs プロジェクト: harryluo163/spider_winform
        public void spiderMain()
        {
            ClsLog clsLog = new ClsLog();

            clsLog.AddLog(DateTime.Now.ToString(), "入口抓取开始");
            bool       flag            = false;
            int        CurrSpiderTimes = 1;
            ClsPageUrl clsPageUrl      = new ClsPageUrl();

            Control.CheckForIllegalCrossThreadCalls = false;

            clsPageUrl.AddPageUrl("ProgramName", "", "", "Portal", "", "", "http://cht.cjsyw.com:8080/ShipSource/listSS.aspx?pageno=1",
                                  "GET", "", "utf-8", "", null, "", 1, 1);
        }
コード例 #2
0
ファイル: index.cs プロジェクト: harryluo163/spider_winform
        private void btnStart_Click(object sender, EventArgs e)
        {
            btnStart.Enabled = false;
            #region 日志文件记录
            ClsLog clsLog    = new ClsLog();
            Thread LogThread = new Thread(new ThreadStart(clsLog.WriteLog));
            LogThread.Start();
            #endregion

            #region 抓取线程
            clsLog.AddLog(DateTime.Now.ToString(), "抓取开始");
            ClsPageUrl clsPageUrl   = new ClsPageUrl();
            Thread     SpiderThread = new Thread(new ThreadStart(clsPageUrl.SpiderData));
            SpiderThread.Start();
            #endregion

            #region 分析线程
            ClsPageContent clsPageContent = new ClsPageContent();
            Thread         AnalyseThread  = new Thread(new ThreadStart(clsPageContent.AnalyseData));
            AnalyseThread.Start();
            #endregion

            #region 数据库插入操作线程
            //ClsDB clsDB = new ClsDB();
            //Thread dbThread = new Thread(new ThreadStart(clsDB.ExecPageDBData));
            //dbThread.Start();
            #endregion

            #region 事件注册
            EventController helper = new EventController();
            /// 所有需要分析的,都完成事件
            helper.OnAllItemAnalyzeCompleted += (senders, es) =>
            {
                if (Program.clsUrlSignal == 0 && Program.clsContentSignal == 0 && Program.clsDBSignal == 0)
                {
                    SpiderThread.Abort();
                    AnalyseThread.Abort();
                    //dbThread.Abort();
                    Thread.Sleep(20000);
                    LogThread.Abort();
                    clsLog.AddLog(DateTime.Now.ToString(), "抓取结束");
                }
            };

            #endregion

            //入口方法
            spiderMain();
        }
コード例 #3
0
        public void HousePortalAnalysis(PageContentEntity entity)
        {
            try
            {
                string     pContent   = entity.PContent;
                ClsPageUrl clsPageUrl = new ClsPageUrl();
                Utilities  util       = new Utilities();
                SqlBuild   sqlBuild   = new SqlBuild();
                SqlPara    sqlPara    = new SqlPara();
                ClsDB      clsDB      = new ClsDB();
                RegFunc    rf         = new RegFunc();


                ArrayList arrayList = rf.GetStrArr(pContent, "\"id\":", ",");
                for (int k = 0; k < arrayList.Count; k++)
                {
                }



                string   KeyWord = entity.KeyWord;
                decimal  num;
                DateTime dt;
                string   postDataStr = "";
                pContent = rf.GetStr(pContent, "/共有", "页");
                if (pContent != "")
                {
                    for (int i = 1; i <= Convert.ToInt32(pContent); i++)
                    //for (int i = 1; i <= 1; i++)
                    {
                        //       clsPageUrl.AddPageUrl(entity.ProgramName, entity.KeyWord, entity.PID, "Batch", entity.SiteUrl, entity.Url, "http://218.14.207.76/xxgs/xmlpzs/webissue.asp?page=" + i,
                        //"GET", "", entity.EnCode, i.ToString(), entity.CookieContent, entity.AContent, entity.TrySpiderTimes, entity.Depth + 1);
                    }
                }
                else
                {
                    throw new Exception("分析数据失败:页面没有数据");
                }
            }
            catch (Exception ex)
            {
                ClsLog clsLog = new ClsLog();
                clsLog.AddLog(DateTime.Now.ToString(), "分析数据失败" + ex.ToString());
                clsLog.AddLog(DateTime.Now.ToString(), entity.SType + ";" + entity.Url + ";");
                UrlContorl urlContorl = new UrlContorl();
                urlContorl.SaveUrl(entity, ex.ToString());
            }
        }
コード例 #4
0
        private void btnStart_Click(object sender, EventArgs e)
        {
            if (Program.userList.Count <= 0)
            {
                MessageBox.Show("请导入用户账号");
                return;
            }

            btnStart.Enabled = false;

            int spiderNum  = 0;
            int xmlnamenum = 0;



            if (useproxy.Checked)
            {
                if (Program.IPList.Count < 0)
                {
                    MessageBox.Show("ip列表为空,请到ip.xml编辑");
                    return;
                }
            }
            //配置更新
            Program.sysPara.BegSpiderIntervalTime = Convert.ToInt32(spidertime.Value * 1000);
            Program.sysPara.IsProxy = useproxy.Checked ? "true" : "false";



            btnStart.Enabled = false;
            #region 日志文件记录
            ClsLog clsLog    = new ClsLog();
            Thread LogThread = new Thread(new ThreadStart(clsLog.WriteLog));
            LogThread.Start();
            #endregion

            #region 抓取线程
            clsLog.AddLog(DateTime.Now.ToString(), "抓取开始");
            ClsPageUrl clsPageUrl   = new ClsPageUrl();
            Thread     SpiderThread = new Thread(new ThreadStart(clsPageUrl.SpiderData));
            SpiderThread.Start();
            #endregion

            #region 分析线程
            ClsPageContent clsPageContent = new ClsPageContent();
            Thread         AnalyseThread  = new Thread(new ThreadStart(clsPageContent.AnalyseData));
            AnalyseThread.Start();
            #endregion

            #region 数据库插入操作线程
            //ClsDB clsDB = new ClsDB();
            //Thread dbThread = new Thread(new ThreadStart(clsDB.ExecPageDBData));
            //dbThread.Start();
            #endregion

            #region 事件注册

            /// 所有需要分析的,都完成事件
            Program.helper.OnAllItemAnalyzeCompleted += (senders, es) =>
            {
                if (Program.clsUrlSignal == 0 && Program.clsContentSignal == 0 && Program.clsDBSignal == 0)
                {
                    SpiderThread.Abort();
                    AnalyseThread.Abort();
                    //dbThread.Abort();
                    Thread.Sleep(20000);
                    LogThread.Abort();
                    clsLog.AddLog(DateTime.Now.ToString(), "第" + Program.CurrSpiderTimes + "次抓取结束");
                    Program.CurrSpiderTimes++;
                    clsLog.AddLog(DateTime.Now.ToString(), "第" + Program.CurrSpiderTimes + "次开始");
                    //入口方法
                    spiderMain();
                }
                Application.DoEvents();
            };


            Program.helper.OntxtviewCompleted += (senders, es) =>
            {
                EventControllerArgs _tem = es as EventControllerArgs;
                txtview.AppendText(_tem.Msg + Environment.NewLine);

                Application.DoEvents();
            };

            #endregion


            //入口方法
            spiderMain();
        }
コード例 #5
0
        public void spiderMain()
        {
            ClsLog clsLog = new ClsLog();

            clsLog.AddLog(DateTime.Now.ToString(), "入口抓取开始");
            Program.helper.OntxtviewCompleted(this, new EventControllerArgs()
            {
                IsSuccess = true, Msg = "入口抓取开始"
            });

            bool flag = false;

            ClsPageUrl clsPageUrl = new ClsPageUrl();

            Program.helper.OntxtviewCompleted(this, new EventControllerArgs()
            {
                IsSuccess = true, Msg = "开始登陆"
            });
            foreach (user item in Program.userList)
            {
                CookieContainer cookie     = new CookieContainer();
                HttpClient      httpClient = new HttpClient("", 0, false, cookie);
                Program.helper.OntxtviewCompleted(this, new EventControllerArgs()
                {
                    IsSuccess = true, Msg = item.userName + "登陆"
                });
                string content = httpClient.GetResponse("", "http://t.cjcyw.com:8081/login", "Post", "pwd=" + item.psw + "&userid=" + item.userName + "");
                item.cookie          = httpClient.Cookie;
                item.cookieContainer = httpClient.cookieContainer;;
            }



            Control.CheckForIllegalCrossThreadCalls = false;
            if (url_comb.Text == "全部")
            {
                //船源
                clsPageUrl.AddPageUrl("ProgramName", "", "", "cyPortal", "", "", "http://t.cjcyw.com:8081/ship/list",
                                      "GET", "", "utf-8", "", null, "", 1, 1);
                //货源
                clsPageUrl.AddPageUrl("ProgramName", "", "", "hyPortal", "", "", "http://t.cjcyw.com:8081/goods/list",
                                      "GET", "", "utf-8", "", null, "", 1, 1);

                for (int i = 1; i <= nmccda.Value; i++)
                {
                    //船舶档案
                    clsPageUrl.AddPageUrl("ProgramName", "", "", "cydaPortal", "", "", "http://t.cjcyw.com:8081/Boat/BoatList.aspx?pageno=" + i + "&&",
                                          "GET", "", "utf-8", "", null, "", 1, 1);
                }
            }
            else if (url_comb.Text == "船源")
            {
                //船源
                clsPageUrl.AddPageUrl("ProgramName", "", "", "cyPortal", "", "", "http://t.cjcyw.com:8081/ship/list",
                                      "GET", "", "utf-8", "", null, "", 1, 1);
            }
            else if (url_comb.Text == "货源")
            {
                //货源
                clsPageUrl.AddPageUrl("ProgramName", "", "", "hyPortal", "", "", "http://t.cjcyw.com:8081/goods/list",
                                      "GET", "", "utf-8", "", null, "", 1, 1);
            }
            else if (url_comb.Text == "船舶档案")
            {
                for (int i = 1; i <= nmccda.Value; i++)
                {
                    //船舶档案
                    clsPageUrl.AddPageUrl("ProgramName", "", "", "cydaPortal", "", "", "http://t.cjcyw.com:8081/Boat/BoatList.aspx?pageno=" + i + "&&",
                                          "GET", "", "utf-8", "", null, "", 1, 1);
                }
            }
        }