public string RegionUser() { var smsmanger = new SiteFactory().SmsApiManager; string catid = "1773"; string yaoqing = "YKQcw"; string phone = smsmanger.GetPhoneNum(catid); HtmlAnalysis request1 = new HtmlAnalysis(); request1.RequestContentType = "application/x-www-form-urlencoded"; request1.Headers.Add("Cookie", "JSESSIONID=A96903EAC0B275902F08953A3C87C808; RecommendCityStatus=1; burl=https%3a%2f%2fm%2ebenlai%2ecom%2factivity%2fpullNewReceive%3fcallback%3d0%26showtype%3d4%26invitatorCode%3dqVL4Y%26referSysNo%3dDECAA91A6206EDD4; ASP.NET_SessionId=rizc5rofdjadxcytxo3dhizb; uuk=657efcf3-a63d-4e3c-9718-f4d809c92353; userGuid=aafe002b-730b-4c90-8aa3-cad731bb4db720160618014311; _jzqckmp_v2=1/; _jzqckmp=1/; AppCity=*e5*ae*81*e6*b3*a2; curRecommendation=%e5%ae%81%e6%b3%a2; _pk_id.7.2b60=b6589fc6ab0dc82c.1466228584.1.1466228589.1466228584.; _pk_ses.7.2b60=*; recentcNo=\"135, \"; DeliverySysNo=135; WebSiteSysNo=3; CityPY=nb; city=*e5*ae*81*e6*b3*a2; hsc=1; ProvinceSysNo=28; localcity=135; backUrl=https%253A%252F%252Fm.benlai.com%252Factivity%252FpullNewReceive%253Fcallback%253D0%2526showtype%253D4%2526invitatorCode%253DqVL4Y%2526referSysNo%253DDECAA91A6206EDD4; _pk_id.9.2b60=b6589fc6ab0dc82c.1466228593.1.1466228593.1466228593.; _pk_ses.9.2b60=*; _qzja=1.430535276.1466228583963.1466228583963.1466228583963.1466228593005.1466228593012.https%253A%252F%252Fm_benlai_com.0.0.5.1; _qzjb=1.1466228583963.5.0.0.0; _qzjc=1; _qzjto=5.1.0; Hm_lvt_9a7d729a11da2966935bcb2908a98794=1465949409,1465953691,1466042258,1466121250; Hm_lpvt_9a7d729a11da2966935bcb2908a98794=1466228593; Hm_lvt_7feabb06873cfd158820492f754cc70b=1465949409,1465953691,1466042258,1466121250; Hm_lpvt_7feabb06873cfd158820492f754cc70b=1466228593; CSESSIONID=A96903EAC0B275902F08953A3C87C808; source=2"); request1.RequestUserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36"; request1.Headers.Add("X-Requested-With", "XMLHttpRequest"); request1.Headers.Add("Origin", "https://m.benlai.com"); request1.RequestReferer = "https://m.benlai.com/showReg?comeFromApp=0"; string url = "https://m.benlai.com/regPhoneVry?phoneNumber="+ phone; request1.RequestMethod = "post"; var page = request1.HttpRequest(url); if (!page.Contains("'短信验证码生成成功")) { LogServer.WriteLog(page); return ""; } var msm = smsmanger.GetValidateMsg(phone,catid); string code = Regex.Match(msm, "注册验证码为 (?<x>\\d+) \\(本来生活绝不会索取此验证码", ro).Groups["x"].Value; if (string.IsNullOrEmpty(code)) { LogServer.WriteLog(code); return ""; } url = "https://m.benlai.com/activity/receiveAndReg?referSysNo=DECAA91A6206EDD4&invitatorCode=qVL4Y&cellphone=" + phone + "&code="+ code + "&unionId="; page = request1.HttpRequest(url); LogServer.WriteLog(url + "\t" + page); LogServer.WriteLog(phone+"\t"+code,"benlaishenhuo"); var shy = new SmsHistory { SmsServer = smsmanger.smsManger.ServerName, SmsUserName = smsmanger.smsManger.UserName, Phone = phone, CaseName = "本来生活168活动", Summary = "创建帐号并获取满200减40的券", MessageInfo = string.Format("[\"phone\":{0},\"pwd\":{1}]",phone,code), CreateDate = DateTime.Now }; new SmsHistoryDB().AddSmsHistory(shy); return "1"; url = "https://m.benlai.com/registerPhone?regPhoneNum=" + phone+ "®PhVerify=" + code+ "&invitationCode="+ yaoqing; page = request1.HttpRequest(url); string tempurl1 = "https://m.benlai.com/registerByPh?customerID=" + phone + "&invitationCode="+ yaoqing + "&customerPwd=62415109"; string secc = request1.HttpRequest(tempurl1); LogServer.WriteLog(url+"\t"+page); }
public List<CommentInfo> GetCommentsFirstPage(string itemUrl) { string proid = RegGroupsX<string>(itemUrl, "/product/(?<x>.*?)$"); string url = string.Format(UrlMode, proid, 0); HtmlAnalysis requert = new HtmlAnalysis { RequestReferer = itemUrl }; string page = requert.HttpRequest(url); if (page.Contains("目前还没有用户评论")) return null; var list = RegGroupCollection(page, "<div style=\"margin-left:0.5em;\">(?<x>.*?)<span\nclass=\"crVotingButtons\">"); if (list == null) return null; return (from Match item in list select getCommentNode(item.Groups["x"].Value)).Where(c => c != null).ToList(); }
public void GetAllSite() { const string urlmode = // "http://app1.sfda.gov.cn/datasearch/face3/base.jsp?tableId=96&tableName=TABLE9&title=%CD%F8%C9%CF%D2%A9%B5%EA&bcId=1394682945092808297939426895866&curstart={0}"; // "http://app1.sfda.gov.cn/datasearch/face3/base.jsp?tableId=96&tableName=TABLE96&title=%CD%F8%C9%CF%D2%A9%B5%EA&bcId=139468294509280829793942689586&curstart={0}"; //"http://app1.sfda.gov.cn/datasearch/face3/search.jsp?tableId=96&bcId=139468294509280829793942689586&curstart={0}&tableName=TABLE96&State=1&viewtitleName=COLUMN1229&viewsubTitleName=COLUMN1227&State=1&tableView=%25E7%25BD%2591%25E4%25B8%258A%25E8%258D%25AF%25E5%25BA%2597"; "http://app1.sfda.gov.cn/datasearch/face3/search.jsp?tableId=96&State=1&bcId=139468294509280829793942689586&State=1&curstart={0}&State=1&tableName=TABLE96&State=1&viewtitleName=COLUMN1229&State=1&viewsubTitleName=COLUMN1227&State=1&tableView=%25E7%25BD%2591%25E4%25B8%258A%25E8%258D%25AF%25E5%25BA%2597&State=1"; HtmlAnalysis request = new HtmlAnalysis(); request.RequestMethod = "post"; request.RequestAccept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"; request.RequestContentType = "application/x-www-form-urlencoded; charset=UTF-8"; request.RequestUserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0"; int page = 1; for (int i = 1; i <= page; i++) { try { if (i > 1) request.RequestReferer = string.Format(urlmode, i - 1); string url = string.Format(urlmode, i); string homepage = request.HttpRequest(url); if (homepage == "") continue; if (page == 1) { page = RegGroupsX<int>(homepage, "共(?<x>\\d+)页"); } string content = RegGroupsX<string>(homepage, "<td height=30><p align=left>(?<x>.*?)</table>"); var list = RegGroupCollection(content, "\\&Id=(?<x>\\d+)"); if (list == null) { LogServer.WriteLog("第" + i + "页\t" + url, "sfda"); continue; } var ids = from Match item in list select item.Groups["x"].Value; addSigelSite(ids); LogServer.WriteLog("第" + i + "页\t" + url, "sfda"); } catch (Exception ex) { LogServer.WriteLog(ex); } } }
public List<CommentInfo> GetCommentsFirstPage(string itemUrl) { string proid = RegGroupsX<string>(itemUrl, "(?<x>\\d+)"); string url = ""; if (string.IsNullOrEmpty(CommentType) || CommentType == "1") url = string.Format(UrlMode, proid, 0); else if (CommentType == "2") url = string.Format(UrlModeimg, proid, 0); HtmlAnalysis requert = new HtmlAnalysis {RequestReferer = itemUrl}; string json = requert.HttpRequest(url); if (json.Contains("\"comments\":[],") || !json.Contains("comments")) return null; JObject obj = JObject.Parse(json); if (obj["comments"] == null) return null; JArray list = (JArray) obj["comments"]; return list.Select(getCommentNode).Where(c => c != null).ToList(); }
public void GetAllProducts() { //const string urlmode = "http://app1.sfda.gov.cn/datasearch/face3/search.jsp?tableId=25&State=1&bcId=124356560303886909015737447882&State=1&tableName=TABLE25&State=1&viewtitleName=COLUMN167&State=1&viewsubTitleName=COLUMN166,COLUMN170,COLUMN821&State=1&curstart={0}&State=1&tableView=%25E5%259B%25BD%25E4%25BA%25A7%25E8%258D%25AF%25E5%2593%2581&State=1"; const string urlmode = "http://app1.sfda.gov.cn/datasearch/face3/search.jsp?tableId=36&State=1&bcId=124356651564146415214424405468&State=1&curstart={0}&State=1&tableName=TABLE36&State=1&viewtitleName=COLUMN361&State=1&viewsubTitleName=COLUMN354,COLUMN355,COLUMN356,COLUMN823&State=1&tableView=%25E8%25BF%259B%25E5%258F%25A3%25E8%258D%25AF%25E5%2593%2581&State=1"; HtmlAnalysis request = new HtmlAnalysis(); request.RequestMethod = "post"; request.RequestAccept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"; request.RequestContentType = "application/x-www-form-urlencoded; charset=UTF-8"; request.RequestUserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0"; int page = 1; for (int i = 1; i <= page; i++) { try { string url = string.Format(urlmode, i); string homepage = request.HttpRequest(url); if (homepage == "") continue; if (page == 1) { page = RegGroupsX<int>(homepage, "共(?<x>\\d+)页"); } string content = RegGroupsX<string>(homepage, "<td height=30><p align=left>(?<x>.*?)</table>"); var list = RegGroupCollection(content, "callbackC,'(?<x>.*?)',"); if (list == null) { LogServer.WriteLog("第" + i + "页\t" + url, "sfda"); continue; } var ids = from Match item in list select item.Groups["x"].Value; addproducts(ids); LogServer.WriteLog("第" + i + "页\t" + url, "sfda"); } catch (Exception ex) { LogServer.WriteLog(ex); } } LogServer.WriteLog("抓取完毕共抓取到 " + page, "sfda"); }
private void addSigelSite(IEnumerable<string> ids) { const string proUrlMode = "http://app1.sfda.gov.cn/datasearch/face3/content.jsp?tableId=96&tableName=TABLE96&tableView=%CD%F8%C9%CF%D2%A9%B5%EA&Id={0}"; HtmlAnalysis request = new HtmlAnalysis(); request.RequestAccept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"; request.RequestContentType = "application/x-www-form-urlencoded; charset=UTF-8"; request.RequestUserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0"; List<MedicineSiteInfo> list = new List<MedicineSiteInfo>(); foreach (string page in ids.Select(proid => string.Format(proUrlMode, proid)).Select(request.HttpRequest)) { var sintinfo = RegGroupCollection(page, "<tr>(?<x>.*?)</tr>"); if (sintinfo.Count < 13) continue ; try { MedicineSiteInfo msite = new MedicineSiteInfo { CertificateNo = RegGroupsX<string>(sintinfo[0].Groups["x"].Value, "width=83%>(?<x>.*?)</td>"), ServerArea = RegGroupsX<string>(sintinfo[1].Groups["x"].Value, "width=83%>(?<x>.*?)</td>"), CompanyName = RegGroupsX<string>(sintinfo[2].Groups["x"].Value, "width=83%>(?<x>.*?)</td>"), Gerent = RegGroupsX<string>(sintinfo[3].Groups["x"].Value, "width=83%>(?<x>.*?)</td>"), CompanyAddress = RegGroupsX<string>(sintinfo[4].Groups["x"].Value, "width=83%>(?<x>.*?)</td>"), Province = RegGroupsX<string>(sintinfo[5].Groups["x"].Value, "width=83%>(?<x>.*?)</td>"), SiteName = RegGroupsX<string>(sintinfo[6].Groups["x"].Value, "width=83%>(?<x>.*?)</td>"), SiteIp = RegGroupsX<string>(sintinfo[7].Groups["x"].Value, "width=83%>(?<x>.*?)</td>"), Domian = RegGroupsX<string>(sintinfo[8].Groups["x"].Value, "width=83%>(?<x>.*?)</td>"), ReleaseTime = RegGroupsX<DateTime>(sintinfo[9].Groups["x"].Value, "width=83%>(?<x>.*?)</td>"), ValidityDate = RegGroupsX<DateTime>(sintinfo[10].Groups["x"].Value, "width=83%>(?<x>.*?)</td>"), PostNo = RegGroupsX<string>(sintinfo[11].Groups["x"].Value, "width=83%>(?<x>.*?)</td>"), Remark = RegGroupsX<string>(sintinfo[13].Groups["x"].Value, "000066\">(?<x>.*?)</span>"), CreateTime =DateTime.Now }; msite.Domian = msite.Domian.Replace(";", ";"); //if (tempcount == 2) //{ // msite.Domian = "www." + msite.Domian; //} //if (tempcount > 3) //{ // msite.Domian = msite.Domian; //} //string tempurl = $"http://{msite.Domian}"; //string temppage= HtmlAnalysis.HttpRequest(tempurl); //msite.Usefull = temppage.Contains("page"); list.Add(msite); } catch (Exception ex) { LogServer.WriteLog(ex); } } if (list.Count > 0) { new MedicineSiteDB().AddMedicineSite(list); list.Clear(); } }
private void addproducts(IEnumerable<string> ids) { const string proUrlMode = "http://app1.sfda.gov.cn/datasearch/face3/{0}"; HtmlAnalysis request = new HtmlAnalysis(); request.RequestAccept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"; request.RequestContentType = "application/x-www-form-urlencoded; charset=UTF-8"; request.RequestUserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0"; List<MedicineInfo> list = new List<MedicineInfo>(); foreach (string page in ids.Select(proid => string.Format(proUrlMode, proid)).Select(request.HttpRequest)) { list.Add(getProDetial(page)); if (list.Count > 49) { new MedicineDB().AddMedicineInfo(list); list.Clear(); } } if (list.Count > 0) { new MedicineDB().AddMedicineInfo(list); list.Clear(); } }
public BaseSiteInfo() { HtmlAnalysis = new HtmlAnalysis(); }
private void UpdateTmallNode(SiteClassInfo item) { if(item.ParentClass!="") return; string url = string.Format("http://list.tmall.com/search_product.htm?cat={0}" , item.ClassId); HtmlAnalysis reqest = new HtmlAnalysis(); reqest.Headers.Add("Cookie", "_med=dw:1440&dh:900&pw:1440&ph:900&ist:0; pnm_cku822=126UW5TcyMNYQwiAiwQRHhBfEF8QXtHcklnMWc%3D%7CUm5Ockt%2BQXVPdUp%2BQH9Dfyk%3D%7CU2xMHDJxPk82UjVOI1h2VnhCbExiPl85VTJMNhhOGA%3D%3D%7CVGhXd1llXGlWYlhiXWlXaFRoX2JAekN3TndMdUB1S3RAeUx0TmA2%7CVWldfS0SMgwzCCgULg4gWz0ReEB2Aix6LA%3D%3D%7CVmhIGCcYJAQ%2FAyMXLRc3DTQNORklHCUYOAwxDCwQKRAtDTgDPmg%2B%7CV25Tbk5zU2xMcEl1VWtTaUlwJg%3D%3D; cq=ccp%3D1; tt=login.taobao.com; res=scroll%3A990*776-client%3A977*290-offset%3A977*290-screen%3A1440*900; hng=; uss=BqRyb7nd5KLIbC5D91VCamaiwt66iy8KP0cAS24EJNQWFeWsxGZv%2FwEo%2BAs%3D; cna=cFJaEEwJdRsCATyy24A1yMNe; l=AkZGKGJIZ/WDVSsY65u6dVSj1jLItYph; isg=Alpa8TTm1nmgf1rVi7OVW5M1rADEst5lFaLZTWTTFO241_oRTBsudSCt8xs0; OZ_1U_2061=vid=v801c15a894bb1.0&ctime=1478143053<ime=1476512356; otherx=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0; x=__ll%3D-1%26_ato%3D0; _tb_token_=eeed7bb353eb5; ck1=; uc1=cookie14=UoW%2FX9QwsnjAzg%3D%3D&lng=zh_CN&cookie16=W5iHLLyFPlMGbLDwA%2BdvAGZqLg%3D%3D&existShop=false&cookie21=V32FPkk%2FhSg%2F&tag=0&cookie15=UIHiLt3xD8xYTw%3D%3D&pas=0; uc3=sg2=AQI4ctClVx2ycnFp5kyAa%2F3VFKDYjzhZBJFC8KK2LVw%3D&nk2=D9ZNP7htc6w%3D&id2=UU8Lx7%2BmPirPbw%3D%3D&vt3=F8dARHfHI%2BnGtn3VuNA%3D&lg2=UtASsssmOIJ0bQ%3D%3D; lgc=lunce188; tracknick=lunce188; cookie2=10682dca3e46d779e26f299924785699; cookie1=AV0h8l61cg4iTp3AqqPZRlYP3nQGpHHQCAg%2FB5Sm3VI%3D; unb=2731635449; t=65336f3349d3648c68445898ef92bec2; skt=2c4d55251dbb75a9; _nk_=lunce188; _l_g_=Ug%3D%3D; cookie17=UU8Lx7%2BmPirPbw%3D%3D; login=true"); reqest.RequestUserAgent = "Mozilla/5.0 (SymbianOS/9.3; U; Series60/3.2 NokiaE75-1 /110.48.125 Profile/MIDP-2.1 Configuration/CLDC-1.1 ) AppleWebKit/413 (KHTML, like Gecko) Safari/413"; string catPage= reqest.HttpRequest(url); Thread.Sleep(new Random().Next(6, 30)*1000); //string catPage = HtmlAnalysis.Gethtmlcode("http://list.tmall.com/search_product.htm?cat=" + item.ClassId); if (catPage.Contains("ResponseUri:http://www.tmall.com/") && item.UpdateTime.AddDays(15)<DateTime.Now) { new SiteClassBll().delClass(item); return; } var crumbsList = RegGroupCollection(catPage, "<li data-tag=\"cat\">(?<x>.*?)</li>"); if (crumbsList == null) return; SiteClassInfo catinfo = new SiteClassInfo(); string paraInfo = ""; string paraUrl = ""; string paraCatId = ""; string paraName = ""; for (int i = 0; i < crumbsList.Count; i++) { catinfo.ParentName = paraName; catinfo.ParentClass = paraCatId; catinfo.ParentUrl = paraCatId == "" ? "" : "http://list.tmall.com/search_product.htm?cat=" + paraCatId; //添加父类 if (paraCatId != "" && paraCatId != "" && !HasBindClasslist.Exists(c => c.ClassId == paraCatId)) { string tempparaInfo = ""; string tempparaUrl = ""; string tempparaCatId = ""; string tempparaName = ""; if (i > 1) { Match pnode = crumbsList[i - 2]; tempparaInfo = pnode.Groups["x"].Value; // tempparaUrl = RegGroupsX<string>(tempparaInfo, "href=\"(?<x>.*?)\""); tempparaCatId = RegGroupsX<string>(paraUrl, "cat=(?<x>\\d+)"); tempparaUrl = "http://list.tmall.com/search_product.htm?cat=" + tempparaCatId; tempparaName = RegGroupsX<string>(tempparaInfo, "title=\"(?<x>.*?)\""); } SiteClassInfo catPareInfo = new SiteClassInfo { ClassName = paraName, ClassId = paraCatId, SiteId = Baseinfo.SiteId, CreateDate = DateTime.Now, UpdateTime = DateTime.Now, IsHide = false, ParentUrl = tempparaUrl, ParentName = tempparaName, ClassCrumble = tempparaCatId + ",", ParentClass = tempparaCatId, Urlinfo = "http://list.tmall.com/search_product.htm?cat=" + paraCatId, IsDel=false, HasChild = true, IsBind = false }; catPareInfo.ClassCrumble = catPareInfo.ClassCrumble.TrimEnd(','); HasBindClasslist.Add(catPareInfo); shopClasslist.Add(catPareInfo); //父类的同级分类 GetAllBrotherCats(catPareInfo); //子类 GetChildCats(catPareInfo, catPage); } Match node = crumbsList[i]; paraInfo = node.Groups["x"].Value; paraUrl = RegGroupsX<string>(paraInfo, "href=\"(?<x>.*?)\""); paraCatId = RegGroupsX<string>(paraUrl, "cat=(?<x>\\d+)"); paraName = RegGroupsX<string>(paraInfo, "title=\"(?<x>.*?)\""); catinfo.ClassName = paraName; catinfo.ClassId = paraCatId; catinfo.SiteId = Baseinfo.SiteId; catinfo.CreateDate = DateTime.Now; catinfo.UpdateTime = DateTime.Now; catinfo.Urlinfo = "http://list.tmall.com/search_product.htm?cat=" + paraCatId; catinfo.TotalProduct = RegGroupsX<int>(catPage, "共<span> (?<x>\\d+)</span>件相关商品"); catinfo.IsHide = false; catinfo.IsBind = false; catinfo.IsDel = false; catinfo.ClassCrumble += paraCatId + ","; GetAllBrotherCats(catinfo); } if (string.IsNullOrEmpty(catinfo.ClassId)) return; if(regIsMatch(catPage, "<div class=\"cateAttrs\" data-spm=\".*?\">(?<x>.*?)<div class=\"propAttrs\"")) { GetChildCats(catinfo, catPage); catinfo.HasChild = true; } else catinfo.HasChild = false; catinfo.ClassCrumble = catinfo.ClassCrumble.TrimEnd(','); var oldCatInfo = HasBindClasslist.Find(c => c.ClassId == catinfo.ClassId); if (oldCatInfo==null) { catinfo.HasChild = true; HasBindClasslist.Add(catinfo); shopClasslist.Add(catinfo); } else { oldCatInfo.Urlinfo = catinfo.Urlinfo; oldCatInfo.ClassId = catinfo.ClassId; oldCatInfo.ClassName = catinfo.ClassName; oldCatInfo.TotalProduct = catinfo.TotalProduct; oldCatInfo.ParentUrl = catinfo.ParentUrl; oldCatInfo.ParentClass = catinfo.ParentClass; oldCatInfo.ParentUrl = catinfo.ParentUrl; oldCatInfo.UpdateTime = DateTime.Now; new SiteClassBll().UpdateSiteCat(oldCatInfo); } if (shopClasslist.Count > 0) { new SiteClassInfoDB().AddSiteClass(shopClasslist); shopClasslist.Clear(); } }
/// <summary> /// 更新类别 /// </summary> /// <param name="catinfo"></param> private void UpdateCat(SiteClassInfo catinfo) { string pageinfo = HtmlAnalysis.Gethtmlcode(catinfo.Urlinfo); catinfo.TotalProduct = RegGroupsX<int>(pageinfo, "共(?<x>\\d+)商品|共 <em id=\"searchTotalNumber\">(?<x>\\d+)</em> 个商品"); if (catinfo.TotalProduct == 0) return; var tempar = HasBindClasslist.FirstOrDefault(c => c.ClassId == catinfo.ParentClass); if (tempar != null) { catinfo.ParentName = tempar.ClassName; } string tempCatUrl = string.Format(AsynSearchMoth, catinfo.ClassId); HtmlAnalysis request = new HtmlAnalysis { RequestMethod = "POST" }; request.RequestUserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0"; request.RequestAccept = "application/json, text/javascript, */*; q=0.01"; request.RequestContentType = "application/x-www-form-urlencoded; charset=UTF-8"; string catTxt = ""; int error = 0; do { catTxt = request.HttpRequest(tempCatUrl); if (!catTxt.Contains("success")) { error++; Thread.Sleep(5000); LogServer.WriteLog(Baseinfo.SiteName + "分类抓取错误\r\n" + catTxt, "AddClassError"); } else error = 4; } while (error < 3); if (string.IsNullOrEmpty(catTxt) || !catTxt.Contains("success")) return; JObject obj = JObject.Parse(catTxt); if (obj == null || obj["response"] == null || obj["response"]["pageJson"] == null || obj["response"]["pageJson"]["content"] == null || obj["response"]["pageJson"]["content"]["catObj"] == null) { LogServer.WriteLog(Baseinfo.SiteName + "分类抓取错误\r\n" + catTxt, "AddClassError"); return; } if (obj["response"]["pageJson"]["content"]["catObj"]["brothers"] != null) { var temp = from item in obj["response"]["pageJson"]["content"]["catObj"]["brothers"] where !HasBindClasslist.Exists(c => c.ClassId == (string) item["catId"]) select new SiteClassInfo { ClassId = (string) item["catId"], ParentClass = catinfo.ParentClass ?? "", ClassName = (string) item["catName"], HasChild = true, ParentName = catinfo.ParentName, IsDel = false, IsHide = false, ParentUrl = catinfo.ParentUrl, SiteId = catinfo.SiteId, Urlinfo = string.Format("http://list.gome.com.cn/{0}.html", (string) item["catId"]), CreateDate = DateTime.Now, UpdateTime = DateTime.Now, IsBind = false }; var siteClassInfos = temp as SiteClassInfo[] ?? temp.ToArray(); if (siteClassInfos.Any()) { shopClasslist.AddRange(siteClassInfos); HasBindClasslist.AddRange(siteClassInfos); } } if (obj["response"]["pageJson"]["content"]["catObj"]["children"] != null) { var temp = from item in obj["response"]["pageJson"]["content"]["catObj"]["children"] where !HasBindClasslist.Exists(c => c.ClassId == (string)item["catId"]) select new SiteClassInfo { ClassId = (string)item["catId"], ClassCrumble = "", BindClassId = 0, ParentClass = catinfo.ClassId, ClassName = (string)item["catName"], HasChild = true, ParentName = catinfo.ClassName, IsDel = false, IsHide = false, ParentUrl = catinfo.Urlinfo, SiteId = catinfo.SiteId, Urlinfo = string.Format("http://list.gome.com.cn/{0}.html", (string)item["catId"]), CreateDate = DateTime.Now, UpdateTime = DateTime.Now, IsBind = false }; var siteClassInfos = temp as SiteClassInfo[] ?? temp.ToArray(); if (siteClassInfos.Any()) { shopClasslist.AddRange(siteClassInfos); HasBindClasslist.AddRange(siteClassInfos); } } catinfo.HasChild = HasBindClasslist.Any(c => c.ParentClass == catinfo.ClassId); catinfo.IsDel = false; catinfo.UpdateTime = DateTime.Now; catinfo.ClassName = obj["response"]["pageJson"]["content"]["catObj"]["catName"].ToString(); catinfo.ClassId = obj["response"]["pageJson"]["content"]["catObj"]["catId"].ToString(); catinfo.ParentClass = obj["response"]["pageJson"]["content"]["catObj"]["parentId"].ToString(); catinfo.Urlinfo = string.Format("http://list.gome.com.cn/{0}.html", catinfo.ClassId); if (catinfo.ParentClass == "homeStoreRootCategory") { catinfo.ParentClass = ""; } new SiteClassBll().UpdateSiteCat(catinfo); if (shopClasslist.Any()) { new SiteClassInfoDB().AddSiteClass(shopClasslist); shopClasslist.Clear(); } }
/// <summary> /// 更新分类,有问题需优化 /// </summary> /// <param name="siteClassInfo"></param> private void UpdateCat( HtmlAnalysis html,SiteClassInfo siteClassInfo) { string url =string.Format(CatListMode,siteClassInfo.ClassId); //html.RequestAccept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"; //html.RequestHeaders.Add("x-amz-id-1", "12SNRGFVN7F3GN9TZQ6A"); //html.RequestHeaders.Add("x-amz-id-2", "HHlJ6ZQcJqqihTf2uhlh7xPqV4kx2CyUKZOeVXInz/IdAUgpQDfMInz6tG5ia6VKvlmZXjpDTPz 2WwRGyFE9A=="); //html.RequestHeaders.Add("X-Frame-Options", "SAMEORIGIN"); //html.RequestHeaders.Add("x-ua-compatible", "IE=edge"); //html.RequestHeaders.Add("Cookie", "x-wl-uid=1ABOedPNZYW+nIQbl1b30JM+0+4U4bA6yQoNrlasdwDpl1IqkNfHVhbzaL5/qqOVMsWtwsc2i3RM=; amznacsleftnav-99427612=1,2; amznacsleftnav-100213812=1; s_nr=1440467727734-Repeat; s_vnum=1869792751635%26vn%3D4; s_dslv=1440467727736; 5SnMamzvowels.time.4=1440664276731; 5SnMamzvowels.time.0=1441181856266; amznacsleftnav-99204312=1; amznacsleftnav-100100372=1; 5SnMamzvowels.time.1=1441508587108; 5SnMamzvowels.time.2=1441523642117; 5SnMamzvowels.pos=4; 5SnMamzvowels.time.3=1442287567488; session-token=\"1GYzzhtBqbos2Ftk+KCgTA9f5yRCkSZjj6lDEmlXHPNRua8TAmIztTkzk9RooM3be2CPBhr21h404qe4Gy3e+0zCarS/5xlXuC6Xy1C2GeGy0sFr8nGgWp1iGD+VjAuyOrdQH+mqy1Ie9/ELizvIqfF5PmoPqTy4vp92B/sNMX8xqOeHkYqdWAWHB6mJW0TAgqRgYhTgIGt8CBKxi/8Bhg==\"; x-amz-captcha-1=1444963265190387; x-amz-captcha-2=GcvGzFcFHl5jmwSO4uF/Zg==; UserPref=T/z6Om64hhzqQdB81xsMYv0BMLVePVAtO8NUr8jzM/2BocRpX2Zd+SKZAUfRAIURECn/REj5CtQscHUNwkr+LIJr0IOKTpqjXQwnd1ojeIeEJ8uB0tDMjvJhI2wjoBG5yoZuWZdEO7fGaBCDJwMi5qmump/VskQ6wtpK0Dcid8ozeNIhZvSdzwcFNYxsFxYYiS+aPd+0ZpXUn74/4YD4RAH+s8YPC06fOt1R3/XVpBPVhRq+7YhjPlQAg1RFaH02WYtSbZJV9d5sAmAJzCyK9T1yOKwuHCDsnBjPHo8FLlOptd49AkRa4xXApgrufn7Hhqecje92i3QU9X6cShPi1QIX93mkef+uArPXUMQYU7kgkvSjVkjfyYlhOQUSpDgg1j9G/PRUrY+Eb7hezUw1lvlzgb1p4nQnipeJK63vuJJCeRyfc/r4lLNdDBv3NnHV; session-id-time=2082729601l; session-id=479-8884718-9124366; ubid-acbcn=479-6055231-7810538; csm-hit=0MG3ENSCMXYNMQY80BH8+s-0MG3ENSCMXYNMQY80BH8|1446517791183"); string pageinfo = html.HttpRequest(url); //string pageinfo = HtmlAnalysis.Gethtmlcode(url, "utf8", false); if (pageinfo.Contains("没有找到任何与") || pageinfo.Contains("<h2 id=\"s-result-count\" class=\"a-size-base a-text-normal\"><b> </b>\n </h2></div>\n </div><div class=\"a-column a-span4 a-text-right a-spacing-none a-span-last\"><div class=\"a-row a-spacing-micro a-spacing-top-micro\"><div class=\"s-last-column\">\n </div>\n </div></div></div></div><script type=\"text/javascript\">\n \n function viewCompleteImageLoaded(image, time, resultCount, shouldUseCSMScopes) {\n if (image) {\n image.onload = image.onerror = image.onabort = null;\n }\n\n amzn.sx.utils.jsDepMgr.when('clickToViewLogger', 'viewCompleteImageLoaded',\n function(clickToViewLogger) {\n if (typeof ctvcL == 'undefined') {ctvcL = new clickToViewLogger.ClickToViewCompleteLogger(\"false\");}\n ctvcL.iL(image, time, resultCount, shouldUseCSMScopes);\n }\n );\n };\n</script>\n<div class=\"img_header hdr noborder\" id=\"bottomBar\">\n <div id=\"pagn\" class=\"pagnHy\" >\n <br clear=\"all\" />")) { new SiteClassBll().delClass(siteClassInfo); return; } if (pageinfo.Contains("ResponseUri:http://www.amazon.cn/") || pageinfo.Contains("没有找到任何与") || pageinfo.Contains("请输入您在下方看到的字符")) return; string crumble = RegGroupsX<string>(pageinfo, "<div class=\"s-first-column\">(?<x>.*?)<span id=\"breadcrumbSearchSeperator\">|<h2 id=\"s-result-count\"(?<x>.*?)</h2>"); if (crumble == null) { url = url.Replace("page=1", "page=2"); pageinfo = HtmlAnalysis.Gethtmlcode(url); crumble = RegGroupsX<string>(pageinfo, "<div class=\"s-first-column\">(?<x>.*?)<span id=\"breadcrumbSearchSeperator\">|<h2 id=\"s-result-count\"(?<x>.*?)</h2>"); if (crumble == null) { LogServer.WriteLog(Baseinfo.SiteName + "分类抓取错误1\turl:" + url, "AddClassError"); return; } } string currentName = RegGroupsX<string>(crumble, "<span class=\"a-color-state a-text-bold\">(?<x>.*?)</span>|<strong>(?<x>.*?)$"); if (!ValidCatName(currentName)) { LogServer.WriteLog(Baseinfo.SiteName + "分类抓名称匹配错误\turl:" + url, "AddClassError"); return; } var crumlist = RegGroupCollection(crumble, "<a class=\"a-link-normal a-color-base a-text-bold a-text-normal\" href=\"(?<x>.*?)\">(?<y>.*?)</a>|<a href=\"(?<x>.*?)\">(?<y>.*?)</a>"); //if(crumlist==null) //{ // LogServer.WriteLog(Baseinfo.SiteName + "分类抓取错误2\turl:" + url, "AddClassError"); // return; //} string pcatUrl = ""; string pcatName = ""; string pcatId = ""; string classCrumble = ""; if (crumlist!=null) foreach (Match match in crumlist) { pcatUrl = match.Groups["x"].Value; if (string.IsNullOrEmpty(pcatUrl)) continue; if (!pcatUrl.Contains("http")) pcatUrl = "http://www.amazon.cn" + pcatUrl; var pcatList = RegGroupCollection(pcatUrl, "%3A(?<x>\\d+)"); if (pcatList != null) { foreach (Match item in pcatList) { pcatId = item.Groups["x"].Value; if (!string.IsNullOrEmpty(pcatId) && !HasBindClasslist.Exists(p => p.ClassId == pcatId)) { GetAmazonNode(string.Format(CatListMode, pcatId)); } } } else { pcatId = RegGroupsX<string>(pcatUrl, "3A(?<x>\\d+)\\&bbn|/s/ref=sr_hi_\\d+\\?rh=n%3A(?<x>\\d+)|rh=n%3A(?<x>\\d+)\\&"); if (pcatId == null) pcatId = RegGroupsX<string>(pcatUrl, "node=(?<x>\\d+)|nodeId=(?<x>\\d+)|bbn=(?<x>\\d+)"); } if (!string.IsNullOrEmpty(pcatId)) { classCrumble += pcatId + ","; } pcatName = WordCenter.FilterHtml(match.Groups["y"].Value); } SiteClassInfo catInfo = new SiteClassInfo(); catInfo.ClassName = currentName; catInfo.ParentName = pcatName; catInfo.ParentClass = pcatId; catInfo.ParentUrl = pcatUrl; catInfo.ClassCrumble = classCrumble; string count = RegGroupsX<string>(pageinfo, "<span>显示(?<x>.*?)个结果|<span>显示所有(?<x>.*?)个结果</span>|共(?<x>.*?)条|<div id=\"resultCount\" class=\"toTheEdge searchListHeader\">\n(?<x>.*?) 条结果</div>"); if (count != null) { int procount; int.TryParse(count.Replace(" ", ""), out procount); catInfo.TotalProduct = procount; } catInfo.Urlinfo = url; catInfo.ClassId = RegGroupsX<string>(url, "node=(?<x>\\d+)|nodeId=(?<x>\\d+)|bbn=(?<x>\\d+)|rh=n%3A(?<x>\\d+)%2Cp_\\d|ref=lp_(?<x>\\d+)_pg"); if (!ValidCatId(catInfo.ClassId)) { LogServer.WriteLog("ClassId:" + catInfo.ClassId + "验证失败\turl:" + url, "AddClassError"); return; } if (siteClassInfo.ClassId != catInfo.ClassId) { LogServer.WriteLog(Baseinfo.SiteName + "抓取分类id不一致 old:" + siteClassInfo.ClassId+"new:"+ catInfo.ClassId); return; } //更新当前分类 siteClassInfo.Urlinfo = catInfo.Urlinfo; siteClassInfo.ClassId = catInfo.ClassId; siteClassInfo.ClassName = catInfo.ClassName; siteClassInfo.TotalProduct = catInfo.TotalProduct; siteClassInfo.ParentUrl = catInfo.ParentUrl; siteClassInfo.ParentClass = catInfo.ParentClass; siteClassInfo.ParentUrl = catInfo.ParentUrl; siteClassInfo.UpdateTime = DateTime.Now; new SiteClassBll().UpdateSiteCat(siteClassInfo); string classList; if (pageinfo.Contains("data-typeid=\"n\"")) { classList = RegGroupsX<string>(pageinfo, "<ul id=\"ref_\\d+\" data-typeid=\"n\"(?<x>.*?)</ul>"); } else { classList = RegGroupsX<string>(pageinfo, "<div class=\"sbDepartmentLabel\">(?<x>.*?)<form id=\"bottomSearchForm\"|<div class=\"categoryRefinementsSection\">(?<x>.*?)<div class=\"shoppingEngineSectionHeaders\">"); } if (classList == null) { LogServer.WriteLog(Baseinfo.SiteName + "分类抓取错误1\turl:" + url, "AddClassError"); return; } var classInfo = RegGroupCollection(classList, "<span class=\"deptLevel\\d+( deptCurrent)?\">(?<x>.*?)</span>|<li.*?>(?<x>.*?)</li>"); if (classInfo == null || classInfo.Count == 0) return; for (int i = 0; i < classInfo.Count; i++) { var item = classInfo[i].ToString(); string tempUrl = RegGroupsX<string>(item, "href=\"(?<x>.*?)\""); var pcatList = RegGroupCollection(tempUrl, "n%3A(?<x>\\d{7,16})"); if (pcatList != null) { foreach (Match obj in pcatList) { var tempcatId = obj.Groups["x"].Value; if (!string.IsNullOrEmpty(pcatId) && !HasBindClasslist.Exists(p => p.ClassId == tempcatId)) { GetAmazonNode(string.Format(CatListMode, tempcatId)); } } } else { string tempid = ""; if (tempUrl != null) { tempUrl = "http://www.amazon.cn" + tempUrl.Replace("&", "&"); tempid = RegGroupsX<string>(tempUrl, "3A(?<x>\\d{7,16})\\&bbn"); if (tempid == null) tempid = RegGroupsX<string>(item, "node=(?<x>\\d+)|nodeId=(?<x>\\d+)|bbn=(?<x>\\d+)"); } if (!string.IsNullOrEmpty(tempid) && !HasBindClasslist.Exists(p => p.ClassId == tempid)) { GetAmazonNode(tempUrl); } } } if (shopClasslist.Count > 0) { new SiteClassInfoDB().AddSiteClass(shopClasslist); shopClasslist.Clear(); } }
public void UpdateSiteCat() { HasBindClasslist = new SiteClassInfoDB().getAllSiteCatInfo(Baseinfo.SiteId).OrderBy(p => p.UpdateTime).ToList(); HtmlAnalysis html = new HtmlAnalysis(); for (int i = 0; i < HasBindClasslist.Count; i++) { try { UpdateCat(html,HasBindClasslist[i]); } catch (Exception ex) { LogServer.WriteLog(ex); } } }
public void write(string contents) { string paramlist = "content=" + contents + Baseparam; string url = "https://open.t.qq.com/api/t/add?" + paramlist; const string url1 = "http://open.t.qq.com/api/friends/fanslist?format=json&reqnum=20&startindex=0&mode=0&install=0&sex=0&oauth_consumer_key=" + ClientId + "&access_token=" + AccessToken + "&openid=" + Openid + "&clientip=122.245.206.107&oauth_version=2.a"; HtmlAnalysis analysis = new HtmlAnalysis { RequestMethod = "POST" }; analysis.RequestMethod = "GET"; var cc = analysis.HttpRequest(url1); var bb = analysis.HttpRequest(url); string result = HtmlAnalysis.HttpRequestFromPost(url, paramlist, "utf-8"); //const string url1 = "http://open.t.qq.com/api/friends/fanslist?format=json&reqnum=20&startindex=0&mode=0&install=0&sex=0&oauth_consumer_key=" + ClientId + "&access_token=" + AccessToken + "&openid=" + Openid + "&clientip=122.245.206.107&oauth_version=2.a"; HtmlAnalysis.HttpRequestFromPost(url1, paramlist, "utf-8"); }