public void spiderMain() { ClsLog clsLog = new ClsLog(); clsLog.AddLog(DateTime.Now.ToString(), "入口抓取开始"); bool flag = false; int CurrSpiderTimes = 1; ClsPageUrl clsPageUrl = new ClsPageUrl(); Control.CheckForIllegalCrossThreadCalls = false; clsPageUrl.AddPageUrl("ProgramName", "", "", "Portal", "", "", "http://cht.cjsyw.com:8080/ShipSource/listSS.aspx?pageno=1", "GET", "", "utf-8", "", null, "", 1, 1); }
private void btnStart_Click(object sender, EventArgs e) { btnStart.Enabled = false; #region 日志文件记录 ClsLog clsLog = new ClsLog(); Thread LogThread = new Thread(new ThreadStart(clsLog.WriteLog)); LogThread.Start(); #endregion #region 抓取线程 clsLog.AddLog(DateTime.Now.ToString(), "抓取开始"); ClsPageUrl clsPageUrl = new ClsPageUrl(); Thread SpiderThread = new Thread(new ThreadStart(clsPageUrl.SpiderData)); SpiderThread.Start(); #endregion #region 分析线程 ClsPageContent clsPageContent = new ClsPageContent(); Thread AnalyseThread = new Thread(new ThreadStart(clsPageContent.AnalyseData)); AnalyseThread.Start(); #endregion #region 数据库插入操作线程 //ClsDB clsDB = new ClsDB(); //Thread dbThread = new Thread(new ThreadStart(clsDB.ExecPageDBData)); //dbThread.Start(); #endregion #region 事件注册 EventController helper = new EventController(); /// 所有需要分析的,都完成事件 helper.OnAllItemAnalyzeCompleted += (senders, es) => { if (Program.clsUrlSignal == 0 && Program.clsContentSignal == 0 && Program.clsDBSignal == 0) { SpiderThread.Abort(); AnalyseThread.Abort(); //dbThread.Abort(); Thread.Sleep(20000); LogThread.Abort(); clsLog.AddLog(DateTime.Now.ToString(), "抓取结束"); } }; #endregion //入口方法 spiderMain(); }
public void HousePortalAnalysis(PageContentEntity entity) { try { string pContent = entity.PContent; ClsPageUrl clsPageUrl = new ClsPageUrl(); Utilities util = new Utilities(); SqlBuild sqlBuild = new SqlBuild(); SqlPara sqlPara = new SqlPara(); ClsDB clsDB = new ClsDB(); RegFunc rf = new RegFunc(); ArrayList arrayList = rf.GetStrArr(pContent, "\"id\":", ","); for (int k = 0; k < arrayList.Count; k++) { } string KeyWord = entity.KeyWord; decimal num; DateTime dt; string postDataStr = ""; pContent = rf.GetStr(pContent, "/共有", "页"); if (pContent != "") { for (int i = 1; i <= Convert.ToInt32(pContent); i++) //for (int i = 1; i <= 1; i++) { // clsPageUrl.AddPageUrl(entity.ProgramName, entity.KeyWord, entity.PID, "Batch", entity.SiteUrl, entity.Url, "http://218.14.207.76/xxgs/xmlpzs/webissue.asp?page=" + i, //"GET", "", entity.EnCode, i.ToString(), entity.CookieContent, entity.AContent, entity.TrySpiderTimes, entity.Depth + 1); } } else { throw new Exception("分析数据失败:页面没有数据"); } } catch (Exception ex) { ClsLog clsLog = new ClsLog(); clsLog.AddLog(DateTime.Now.ToString(), "分析数据失败" + ex.ToString()); clsLog.AddLog(DateTime.Now.ToString(), entity.SType + ";" + entity.Url + ";"); UrlContorl urlContorl = new UrlContorl(); urlContorl.SaveUrl(entity, ex.ToString()); } }
private void btnStart_Click(object sender, EventArgs e) { if (Program.userList.Count <= 0) { MessageBox.Show("请导入用户账号"); return; } btnStart.Enabled = false; int spiderNum = 0; int xmlnamenum = 0; if (useproxy.Checked) { if (Program.IPList.Count < 0) { MessageBox.Show("ip列表为空,请到ip.xml编辑"); return; } } //配置更新 Program.sysPara.BegSpiderIntervalTime = Convert.ToInt32(spidertime.Value * 1000); Program.sysPara.IsProxy = useproxy.Checked ? "true" : "false"; btnStart.Enabled = false; #region 日志文件记录 ClsLog clsLog = new ClsLog(); Thread LogThread = new Thread(new ThreadStart(clsLog.WriteLog)); LogThread.Start(); #endregion #region 抓取线程 clsLog.AddLog(DateTime.Now.ToString(), "抓取开始"); ClsPageUrl clsPageUrl = new ClsPageUrl(); Thread SpiderThread = new Thread(new ThreadStart(clsPageUrl.SpiderData)); SpiderThread.Start(); #endregion #region 分析线程 ClsPageContent clsPageContent = new ClsPageContent(); Thread AnalyseThread = new Thread(new ThreadStart(clsPageContent.AnalyseData)); AnalyseThread.Start(); #endregion #region 数据库插入操作线程 //ClsDB clsDB = new ClsDB(); //Thread dbThread = new Thread(new ThreadStart(clsDB.ExecPageDBData)); //dbThread.Start(); #endregion #region 事件注册 /// 所有需要分析的,都完成事件 Program.helper.OnAllItemAnalyzeCompleted += (senders, es) => { if (Program.clsUrlSignal == 0 && Program.clsContentSignal == 0 && Program.clsDBSignal == 0) { SpiderThread.Abort(); AnalyseThread.Abort(); //dbThread.Abort(); Thread.Sleep(20000); LogThread.Abort(); clsLog.AddLog(DateTime.Now.ToString(), "第" + Program.CurrSpiderTimes + "次抓取结束"); Program.CurrSpiderTimes++; clsLog.AddLog(DateTime.Now.ToString(), "第" + Program.CurrSpiderTimes + "次开始"); //入口方法 spiderMain(); } Application.DoEvents(); }; Program.helper.OntxtviewCompleted += (senders, es) => { EventControllerArgs _tem = es as EventControllerArgs; txtview.AppendText(_tem.Msg + Environment.NewLine); Application.DoEvents(); }; #endregion //入口方法 spiderMain(); }
public void spiderMain() { ClsLog clsLog = new ClsLog(); clsLog.AddLog(DateTime.Now.ToString(), "入口抓取开始"); Program.helper.OntxtviewCompleted(this, new EventControllerArgs() { IsSuccess = true, Msg = "入口抓取开始" }); bool flag = false; ClsPageUrl clsPageUrl = new ClsPageUrl(); Program.helper.OntxtviewCompleted(this, new EventControllerArgs() { IsSuccess = true, Msg = "开始登陆" }); foreach (user item in Program.userList) { CookieContainer cookie = new CookieContainer(); HttpClient httpClient = new HttpClient("", 0, false, cookie); Program.helper.OntxtviewCompleted(this, new EventControllerArgs() { IsSuccess = true, Msg = item.userName + "登陆" }); string content = httpClient.GetResponse("", "http://t.cjcyw.com:8081/login", "Post", "pwd=" + item.psw + "&userid=" + item.userName + ""); item.cookie = httpClient.Cookie; item.cookieContainer = httpClient.cookieContainer;; } Control.CheckForIllegalCrossThreadCalls = false; if (url_comb.Text == "全部") { //船源 clsPageUrl.AddPageUrl("ProgramName", "", "", "cyPortal", "", "", "http://t.cjcyw.com:8081/ship/list", "GET", "", "utf-8", "", null, "", 1, 1); //货源 clsPageUrl.AddPageUrl("ProgramName", "", "", "hyPortal", "", "", "http://t.cjcyw.com:8081/goods/list", "GET", "", "utf-8", "", null, "", 1, 1); for (int i = 1; i <= nmccda.Value; i++) { //船舶档案 clsPageUrl.AddPageUrl("ProgramName", "", "", "cydaPortal", "", "", "http://t.cjcyw.com:8081/Boat/BoatList.aspx?pageno=" + i + "&&", "GET", "", "utf-8", "", null, "", 1, 1); } } else if (url_comb.Text == "船源") { //船源 clsPageUrl.AddPageUrl("ProgramName", "", "", "cyPortal", "", "", "http://t.cjcyw.com:8081/ship/list", "GET", "", "utf-8", "", null, "", 1, 1); } else if (url_comb.Text == "货源") { //货源 clsPageUrl.AddPageUrl("ProgramName", "", "", "hyPortal", "", "", "http://t.cjcyw.com:8081/goods/list", "GET", "", "utf-8", "", null, "", 1, 1); } else if (url_comb.Text == "船舶档案") { for (int i = 1; i <= nmccda.Value; i++) { //船舶档案 clsPageUrl.AddPageUrl("ProgramName", "", "", "cydaPortal", "", "", "http://t.cjcyw.com:8081/Boat/BoatList.aspx?pageno=" + i + "&&", "GET", "", "utf-8", "", null, "", 1, 1); } } }