public void cydaDetailAnalysis(PageContentEntity entity) { try { string content2 = entity.PContent; Utilities util = new Utilities(); SqlBuild sqlBuild = new SqlBuild(); SqlPara sqlPara = new SqlPara(); ClsDB clsDB = new ClsDB(); RegFunc rf = new RegFunc(); var controllerArgs = new EventControllerArgs() { IsSuccess = false }; user user = getuser(); if (rf.GetStr(content2, "\"mobile\":\"", "\",") == "操作频繁稍后再试!") { controllerArgs.Msg = "操作频繁切换用户补抓" + user.token; string nexurl = "http://t.cjcyw.com:8081/Boat/getBoatById.aspx?userid=" + user.token + "&id=" + entity.APara + ""; clsPageUrl.AddPageUrl(entity.ProgramName, entity.KeyWord, entity.PID, "cyDetail", entity.SiteUrl, entity.Url, nexurl, "GET", "", entity.EnCode, entity.APara, entity.CookieContent, entity.AContent, entity.TrySpiderTimes, entity.Depth + 1); } else if (!string.IsNullOrEmpty(content2)) { string _datastr = ""; //创建文件夹 FileStream fs; string Path = "down\\船舶档案数据.txt"; if (!File.Exists(Path)) { using (new FileStream(Path, FileMode.Create, FileAccess.Write)) { }; } using (StreamWriter sw = new StreamWriter(Path, true, Encoding.Default)) { _datastr += "<id>" + rf.GetStr(content2, "\"id\":", ",") + "</id>"; _datastr += "<dw>" + rf.GetStr(content2, "\"hzimg\":", ",") + "</hzimg>"; _datastr += "<cx>" + rf.GetStr(content2, "\"cx\":\"", "\",") + "</cx>"; _datastr += "<sf>" + rf.GetStr(content2, "\"sf\":\"", "\",") + "</sf>"; _datastr += "<cx>" + rf.GetStr(content2, "\"cx\":\"", "\",") + "</cx>"; _datastr += "<city>" + rf.GetStr(content2, "\"city\":\"", "\",") + "</city>"; _datastr += "<czxm>" + rf.GetStr(content2, "\"czxm\":\"", "\",") + "</czxm>"; _datastr += "<sjhm>" + rf.GetStr(content2, "\"sjhm\":\"", "\",") + "</sjhm>"; _datastr += "<date>" + rf.GetStr(content2, "\"date\":\"", "\",") + "</date>"; _datastr += "<gkgs>" + rf.GetStr(content2, "\"gkgs\":\"", "\",") + "</gkgs>"; _datastr += "<sfzh>" + rf.GetStr(content2, "\"sfzh\":\"", "\",") + "</sfzh>"; _datastr += "<ch>" + rf.GetStr(content2, "\"ch\":\"", "\",") + "</ch>"; _datastr += "<hc>" + rf.GetStr(content2, "\"hc\":\"", "\",") + "</hc>"; _datastr += "<bz>" + rf.GetStr(content2, "\"bz\":\"", "\",") + "</bz>"; _datastr += "<cc>" + rf.GetStr(content2, "\"cc\":\"", "\",") + "</cc>"; _datastr += "<cg>" + rf.GetStr(content2, "\"cg\":\"", "\",") + "</cg>"; _datastr += "<ck>" + rf.GetStr(content2, "\"ck\":\"", "\",") + "</ck>"; _datastr += "<cs>" + rf.GetStr(content2, "\"cs\":\"", "\",") + "</cs>"; _datastr += "<sfdb>" + rf.GetStr(content2, "\"sfdb\":\"", "\",") + "</sfdb>"; _datastr += "<adress>" + rf.GetStr(content2, "\"adress\":\"", "\",") + "</adress>"; _datastr += "<lxdh>" + rf.GetStr(content2, "\"lxdh\":\"", "\",") + "</lxdh>"; _datastr += "<qq>" + rf.GetStr(content2, "\"qq\":\"", "\",") + "</qq>"; _datastr += "<gmsj>" + rf.GetStr(content2, "\"gmsj\":\"", "\",") + "</gmsj>"; _datastr += "<email>" + rf.GetStr(content2, "\"email\":\"", "\",") + "</email>"; _datastr += "<frdb>" + rf.GetStr(content2, "\"frdb\":\"", "\",") + "</frdb>"; _datastr += "<gsdh>" + rf.GetStr(content2, "\"gsdh\":\"", "\",") + "</gsdh>"; _datastr += "<gsweb>" + rf.GetStr(content2, "\"gsweb\":\"", "\",") + "</gsweb>"; _datastr += "<gsemail>" + rf.GetStr(content2, "\"gsemail\":\"", "\",") + "</gsemail>"; _datastr += "<gsfax>" + rf.GetStr(content2, "\"gsfax\":\"", "\",") + "</gsfax>"; _datastr += "<gsadress>" + rf.GetStr(content2, "\"gsadress\":\"", "\",") + "</gsadress>"; _datastr += "<flag>" + rf.GetStr(content2, "\"flag\":", ",") + "</flag>"; _datastr += "<userid>" + rf.GetStr(content2, "\"userid\":", ",") + "</userid>"; _datastr += "<lx>" + rf.GetStr(content2, "\"lx\":\"", "\",") + "</lx>"; _datastr += "<ip>" + rf.GetStr(content2, "\"ip\":\"", "\",") + "</ip>"; _datastr += "<hits>" + rf.GetStr(content2, "\"hits\":\"", "\",") + "</hits>"; _datastr += "<ISCheck>" + rf.GetStr(content2, "\"ISCheck\":\"", "\",") + "</ISCheck>"; _datastr += "<CB_Photo>" + rf.GetStr(content2, "\"CB_Photo\":\"", "\",") + "</CB_Photo>"; _datastr += "<CB_Class>" + rf.GetStr(content2, "\"CB_Class\":\"", "\",") + "</CB_Class>"; _datastr += "<ISTop>" + rf.GetStr(content2, "\"ISTop\":\"", "\",") + "</ISTop>"; _datastr += "<Topdate>" + rf.GetStr(content2, "\"Topdate\":\"", "\"") + "</Topdate>"; //开始写入 sw.Write(_datastr + "\r\n"); controllerArgs.Msg = "已抓到" + rf.GetStr(content2, "\"ch\":\"", "\",") + "的船舶档案信息" + rf.GetStr(content2, "\"title\":\"", "\","); } Program.helper.OntxtviewCompleted(this, controllerArgs); } } catch (Exception ex) { ClsLog clsLog = new ClsLog(); clsLog.AddLog(DateTime.Now.ToString(), "分析数据失败" + ex.ToString()); clsLog.AddLog(DateTime.Now.ToString(), entity.SType + ";" + entity.Url + ";"); UrlContorl urlContorl = new UrlContorl(); } }
public void cyDetailAnalysis(PageContentEntity entity) { try { string content2 = entity.PContent; Utilities util = new Utilities(); SqlBuild sqlBuild = new SqlBuild(); SqlPara sqlPara = new SqlPara(); ClsDB clsDB = new ClsDB(); RegFunc rf = new RegFunc(); var controllerArgs = new EventControllerArgs() { IsSuccess = false }; user user = getuser(); if (rf.GetStr(content2, "\"mobile\":\"", "\",") == "操作频繁稍后再试!") { controllerArgs.Msg = "操作频繁切换用户补抓" + user.userName; string nexurl = "http://t.cjcyw.com:8081/ship/detail"; clsPageUrl.AddPageUrl(entity.ProgramName, entity.KeyWord, entity.PID, "cyDetail", entity.SiteUrl, entity.Url, nexurl, "POST", entity.APara, entity.EnCode, entity.APara, getuser().cookieContainer, entity.AContent, entity.TrySpiderTimes, entity.Depth + 1); } else if (!string.IsNullOrEmpty(content2)) { if (rf.GetStr(content2, "\"publishTel\":", ",").IndexOf("*") >= 0) { Program.helper.OntxtviewCompleted(this, new EventControllerArgs() { IsSuccess = true, Msg = "查看" }); string nexurl = "http://t.cjcyw.com:8081/user/viewDetail?" + entity.APara; HttpClient httpClient = new HttpClient("", 0, false, entity.CookieContent); string viewpon = httpClient.GetResponse("", nexurl, "Post", entity.APara); if (viewpon.IndexOf("积分") >= 0) { Program.helper.OntxtviewCompleted(this, new EventControllerArgs() { IsSuccess = true, Msg = "积分不足,切换账号" }); nexurl = "http://t.cjcyw.com:8081/ship/detail"; clsPageUrl.AddPageUrl(entity.ProgramName, entity.KeyWord, entity.PID, "cyDetail", entity.SiteUrl, entity.Url, nexurl, "POST", entity.APara, entity.EnCode, entity.APara, getuser().cookieContainer, entity.AContent, entity.TrySpiderTimes, entity.Depth + 1); } else { content2 = httpClient.GetResponse("", "http://t.cjcyw.com:8081/ship/detail", "Post", entity.APara); } } if (rf.GetStr(content2, "\"publishTel\":", ",").IndexOf("*") < 0) { string _datastr = ""; //创建文件夹 string Path = "down\\船源数据.txt"; if (!File.Exists(Path)) { using (new FileStream(Path, FileMode.Create, FileAccess.Write)) { }; } using (StreamWriter sw = new StreamWriter(Path, true, Encoding.Default)) { _datastr += "<id>" + rf.GetStr(content2, "\"id\":", ",") + "</id>"; _datastr += "<boatid>" + rf.GetStr(content2, "\"boatid\":", ",") + "</boatid>"; _datastr += "<Privince>" + rf.GetStr(content2, "\"Privince\":\"", "\",") + "</Privince>"; _datastr += "<city>" + rf.GetStr(content2, "\"city\":\"", "\",") + "</city>"; _datastr += "<bz>" + rf.GetStr(content2, "\"bz\":\"", "\"") + "/bz>"; _datastr += "<userid>" + rf.GetStr(content2, "\"userid\":", ",") + "/userid>"; _datastr += "<gsmc>" + rf.GetStr(content2, "\"gsmc\":\"", "\",") + "</gsmc>"; _datastr += "<szg>" + rf.GetStr(content2, "\"szg\":\"", "\",") + "</szg>"; _datastr += "<mdg>" + rf.GetStr(content2, "\"mdg\":\"", "\",") + "</mdg>"; _datastr += "<cpdw>" + rf.GetStr(content2, "\"cpdw\":\"", "\",") + "</cpdw>"; _datastr += "<cplx>" + rf.GetStr(content2, "\"cplx\":\"", "\",") + "</cplx>"; _datastr += "<kclb>" + rf.GetStr(content2, "\"kclb\":\"", "\",") + "</kclb>"; _datastr += "<zhrq1>" + rf.GetStr(content2, "\"zhrq1\":\"", "\",") + "</zhrq1>"; _datastr += "<zhrq2>" + rf.GetStr(content2, "\"zhrq2\":\"", "\",") + "</zhrq2>"; _datastr += "<bzlb>" + rf.GetStr(content2, "\"bzlb\":\"", "\",") + "</bzlb>"; _datastr += "<name>" + rf.GetStr(content2, "\"name\":\"", "\"") + "</name>"; _datastr += "<mobile>" + rf.GetStr(content2, "\"mobile\":\"", "\",") + "</mobile>"; string content3 = rf.GetStr(content2, "\"czxx\":", "}]"); _datastr += "<czxxid>" + rf.GetStr(content3, "\"id\":", ",") + "</czxxid>"; _datastr += "<lx>" + rf.GetStr(content3, "\"lx\":\"", "\",") + "</lx>"; _datastr += "<Qymc>" + rf.GetStr(content3, "\"Qymc\":\"", "\",") + "</Qymc>"; _datastr += "<Uname>" + rf.GetStr(content3, "\"Uname\":\"", "\",") + "</Uname>"; _datastr += "<name>" + rf.GetStr(content3, "\"name\":\"", "\",") + "</name>"; _datastr += "<mobile>" + rf.GetStr(content3, "\"mobile\":\"", "\",") + "</mobile>"; _datastr += "<flag>" + rf.GetStr(content3, "\"flag\":", ",") + "</flag>"; _datastr += "<hppj>" + rf.GetStr(content3, "\"hppj\":", ",") + "</hppj>"; _datastr += "<ybpj>" + rf.GetStr(content3, "\"ybpj\":", ",") + "</ybpj>"; _datastr += "<cppj>" + rf.GetStr(content3, "\"cppj\":", ",") + "</cppj>"; _datastr += "<userimg>" + rf.GetStr(content3, "\"userimg\":\"", "\"") + "</userimg>"; string content4 = rf.GetStr(content2, "\"ds\":", "}"); if (!string.IsNullOrEmpty(rf.GetStr(content4, "\"ch\":\"", "\","))) { _datastr += "<ch>" + rf.GetStr(content4, "\"ch\":\"", "\",") + "</ch>"; _datastr += "<sf>" + rf.GetStr(content4, "\"sf\":\"", "\",") + "</sf>"; _datastr += "<city>" + rf.GetStr(content4, "\"city\":\"", "\",") + "</city>"; _datastr += "<sc>" + rf.GetStr(content4, "\"sc\":\"", "\",") + "</sc>"; _datastr += "<cc>" + rf.GetStr(content4, "\"cc\":\"", "\",") + "</cc>"; _datastr += "<ck>" + rf.GetStr(content4, "\"ck\":\"", "\",") + "</ck>"; _datastr += "<cs>" + rf.GetStr(content4, "\"cs\":\"", "\"") + "</cs>"; } else { _datastr += "<ch></ch>"; _datastr += "<sf></sf>"; _datastr += "<city></city>"; _datastr += "<sc></sc>"; _datastr += "<cc></cc>"; _datastr += "<ck></ck>"; _datastr += "<cs></cs>"; } //开始写入 sw.Write(_datastr + "\r\n"); controllerArgs.Msg = "已抓到吨位" + rf.GetStr(content2, "\"cpdw\":\"", "\",") + "所在地" + rf.GetStr(content2, "\"szg\":\"", "\",") + rf.GetStr(content2, "\"cplx\":\"", "\","); } } } Program.helper.OntxtviewCompleted(this, controllerArgs); } catch (Exception ex) { ClsLog clsLog = new ClsLog(); clsLog.AddLog(DateTime.Now.ToString(), "分析数据失败" + ex.ToString()); clsLog.AddLog(DateTime.Now.ToString(), entity.SType + ";" + entity.Url + ";"); UrlContorl urlContorl = new UrlContorl(); } }
public void hyDetailAnalysis(PageContentEntity entity) { try { string content2 = entity.PContent; Utilities util = new Utilities(); SqlBuild sqlBuild = new SqlBuild(); SqlPara sqlPara = new SqlPara(); ClsDB clsDB = new ClsDB(); RegFunc rf = new RegFunc(); var controllerArgs = new EventControllerArgs() { IsSuccess = false }; user user = getuser(); if (rf.GetStr(content2, "\"mobile\":\"", "\",") == "操作频繁稍后再试!") { controllerArgs.Msg = "操作频繁切换用户补抓" + user.token; string nexurl = "http://t.cjcyw.com:8081//Goods/FindGoodsDetails.aspx?userid=" + user.token + "&hwid=" + entity.APara + ""; clsPageUrl.AddPageUrl(entity.ProgramName, entity.KeyWord, entity.PID, "cyDetail", entity.SiteUrl, entity.Url, nexurl, "GET", "", entity.EnCode, entity.APara, entity.CookieContent, entity.AContent, entity.TrySpiderTimes, entity.Depth + 1); } else if (!string.IsNullOrEmpty(content2)) { string _datastr = ""; //创建文件夹 FileStream fs; string Path = "down\\货源数据.txt"; if (!File.Exists(Path)) { using (new FileStream(Path, FileMode.Create, FileAccess.Write)) { }; } using (StreamWriter sw = new StreamWriter(Path, true, Encoding.Default)) { _datastr += "<hzimg>" + rf.GetStr(content2, "\"hzimg\":\"", "\",") + "</hzimg>"; _datastr += "<name>" + rf.GetStr(content2, "\"name\":\"", "\",") + "</name>"; _datastr += "<mobile>" + rf.GetStr(content2, "\"mobile\":\"", "\",") + "</mobile>"; _datastr += "<title>" + rf.GetStr(content2, "\"title\":\"", "\",") + "</title>"; _datastr += "<hwUserid>" + rf.GetStr(content2, "\"hwUserid\":", ",") + "/hwUserid>"; _datastr += "<cppj>" + rf.GetStr(content2, "\"cppj\":\"", "\",") + "</cppj>"; _datastr += "<ybpj>" + rf.GetStr(content2, "\"ybpj\":\"", "\",") + "</ybpj>"; _datastr += "<cppj>" + rf.GetStr(content2, "\"cppj\":\"", "\",") + "</cppj>"; _datastr += "<hits>" + rf.GetStr(content2, "\"hits\":\"", "\",") + "</hits>"; _datastr += "<hymc>" + rf.GetStr(content2, "\"hymc\":\"", "\",") + "</hymc>"; _datastr += "<ckyj>" + rf.GetStr(content2, "\"ckyj\":\"", "\",") + "</ckyj>"; _datastr += "<hwds>" + rf.GetStr(content2, "\"hwds\":\"", "\",") + "</hwds>"; _datastr += "<fhg>" + rf.GetStr(content2, "\"fhg\":\"", "\",") + "</fhg>"; _datastr += "<ddg>" + rf.GetStr(content2, "\"ddg\":\"", "\",") + "</ddg>"; _datastr += "<ssss>" + rf.GetStr(content2, "\"ssss\":\"", "\"") + "</ssss>"; _datastr += "<CFPrivince>" + rf.GetStr(content2, "\"CFPrivince\":\"", "\",") + "</CFPrivince>"; _datastr += "<CFCity>" + rf.GetStr(content2, "\"CFCity\":\"", "\",") + "</CFCity>"; _datastr += "<bzxs>" + rf.GetStr(content2, "\"bzxs\":\"", "\",") + "</bzxs>"; _datastr += "<fhrq>" + rf.GetStr(content2, "\"fhrq\":\"", "\",") + "</fhrq>"; _datastr += "<jzrq>" + rf.GetStr(content2, "\"jzrq\":\"", "\",") + "</jzrq>"; _datastr += "<lb>" + rf.GetStr(content2, "\"lb\":\"", "\",") + "</lb>"; _datastr += "<hwid>" + rf.GetStr(content2, "\"hwid\":", ",") + "</hwid>"; _datastr += "<bz>" + rf.GetStr(content2, "\"bz\":\"", "\"") + "</bz>"; //开始写入 sw.Write(_datastr + "\r\n"); controllerArgs.Msg = "已抓到" + rf.GetStr(content2, "\"name\":\"", "\",") + "的货源" + rf.GetStr(content2, "\"title\":\"", "\","); } //抓起间隔 } Program.helper.OntxtviewCompleted(this, controllerArgs); } catch (Exception ex) { ClsLog clsLog = new ClsLog(); clsLog.AddLog(DateTime.Now.ToString(), "分析数据失败" + ex.ToString()); clsLog.AddLog(DateTime.Now.ToString(), entity.SType + ";" + entity.Url + ";"); UrlContorl urlContorl = new UrlContorl(); } }
private void btnStart_Click(object sender, EventArgs e) { if (Program.userList.Count <= 0) { MessageBox.Show("请导入用户账号"); return; } btnStart.Enabled = false; int spiderNum = 0; int xmlnamenum = 0; if (useproxy.Checked) { if (Program.IPList.Count < 0) { MessageBox.Show("ip列表为空,请到ip.xml编辑"); return; } } //配置更新 Program.sysPara.BegSpiderIntervalTime = Convert.ToInt32(spidertime.Value * 1000); Program.sysPara.IsProxy = useproxy.Checked ? "true" : "false"; btnStart.Enabled = false; #region 日志文件记录 ClsLog clsLog = new ClsLog(); Thread LogThread = new Thread(new ThreadStart(clsLog.WriteLog)); LogThread.Start(); #endregion #region 抓取线程 clsLog.AddLog(DateTime.Now.ToString(), "抓取开始"); ClsPageUrl clsPageUrl = new ClsPageUrl(); Thread SpiderThread = new Thread(new ThreadStart(clsPageUrl.SpiderData)); SpiderThread.Start(); #endregion #region 分析线程 ClsPageContent clsPageContent = new ClsPageContent(); Thread AnalyseThread = new Thread(new ThreadStart(clsPageContent.AnalyseData)); AnalyseThread.Start(); #endregion #region 数据库插入操作线程 //ClsDB clsDB = new ClsDB(); //Thread dbThread = new Thread(new ThreadStart(clsDB.ExecPageDBData)); //dbThread.Start(); #endregion #region 事件注册 /// 所有需要分析的,都完成事件 Program.helper.OnAllItemAnalyzeCompleted += (senders, es) => { if (Program.clsUrlSignal == 0 && Program.clsContentSignal == 0 && Program.clsDBSignal == 0) { SpiderThread.Abort(); AnalyseThread.Abort(); //dbThread.Abort(); Thread.Sleep(20000); LogThread.Abort(); clsLog.AddLog(DateTime.Now.ToString(), "第" + Program.CurrSpiderTimes + "次抓取结束"); Program.CurrSpiderTimes++; clsLog.AddLog(DateTime.Now.ToString(), "第" + Program.CurrSpiderTimes + "次开始"); //入口方法 spiderMain(); } Application.DoEvents(); }; Program.helper.OntxtviewCompleted += (senders, es) => { EventControllerArgs _tem = es as EventControllerArgs; txtview.AppendText(_tem.Msg + Environment.NewLine); Application.DoEvents(); }; #endregion //入口方法 spiderMain(); }