/// <summary> /// 高级爬取 /// </summary> public void AdvCrawler() { string heads = @"Accept:text/html,application/xhtml+xm…plication/xml;q=0.9,*/*;q=0.8 //Accept-Encoding:gzip, deflate //Accept-Language:zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 //Cache-Control:max-age=0 //Connection:keep-alive //Cookie:cy=1; _lxsdk_cuid=15ffc822338c…3fb990e3e-b37-f9f-cd5%7C%7C20 //Host:www.dianping.com //Upgrade-Insecure-Requests:1 //Accept:text/html,application/xhtml+xm…plication/xml;q=0.9,*/*;q=0.8 //Accept-Encoding:gzip, deflate //Accept-Language:zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 //Cache-Control:max-age=0 //Connection:keep-alive //Cookie:cy=1; _lxsdk_cuid=15ffc822338c…3fb990e3e-b37-f9f-cd5%7C%7C20 //Host:www.dianping.com //Upgrade-Insecure-Requests:1 //User-Agent:Mozilla/5.0 (Windows NT 10.0; …) Gecko/20100101 Firefox/60.0"; ClassHttpRequestClient s = new ClassHttpRequestClient(true); string content = ""; string response = s.httpPost(initurl, heads, content, Encoding.UTF8); Response.Write(response); }
/// <summary> /// 高级爬取 /// </summary> public void AdvCrawler() { string heads = @"at_bucketid: sbucket_7 at_cat: 2 at_mall_pro_re: 1635 at_rn: 0397de31c2ae1cbafdf3f6ded7be1fe0 at_type: search at_vmarket: 0 content-encoding: gzip content-language: zh-CN content-type: text/html;charset=GBK date: Wed, 10 Jun 2020 01:28:36 GMT eagleeye-traceid: 0b5218bb15917525159411294e7f2d s_group: tao-session s_ip: 4547514b653151352b3059752f6a633155746b3d s_read_unit: [CN:CENTER] s_status: STATUS_NOT_EXISTED s_tag: 283674001342464|4294967296^1|^^ s_tid: 0b5218bb15917525159411294e7f2d s_ucode: CN:CENTER s_v: 4.0.2.6 server: Tengine/Aserver status: 200 strict-transport-security: max-age=31536000 timing-allow-origin: * ufe-result: A6 vary: Accept-Encoding"; ClassHttpRequestClient s = new ClassHttpRequestClient(true); string content = ""; string response = s.httpPost(initurl, heads, content, Encoding.UTF8); Response.Write(response); }
protected void Page_Load(object sender, EventArgs e) { string heads = @"Accept: application/json, text/javascript, */* q=0.01 " + @"Accept-Encoding: gzip, deflate " + @"Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 " + @"Connection: keep-alive " + @"Cookie: s_ViewType=10; _lxsdk_cuid=1729cd29d3dc8-04d80d1c3b31398-4c302c7d-144000-1729cd29d3ec8; _lxsdk=1729cd29d3dc8-04d80d1c3b31398-4c302c7d-144000-1729cd29d3ec8; _hc.v=6c48a318-c117-5df7-478a-f0f694f1570e.1591768948; Hm_lvt_602b80cf8079ae6591966cc70a3940e7=1591768950,1591788446; _lxsdk_s=1729dfc18eb-4f6-3ef-94c%7C%7C19; Hm_lpvt_602b80cf8079ae6591966cc70a3940e7=1591788446 " + @"Host: catdot.dianping.com " + @"Referer: http:/www.dianping.com/search…/0_%E8%8B%B1%AF%AD%E5%9F%B9%AE " + @"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0"; string url = @"http://www.dianping.com/search/keyword/1/0_%E8%8B%B1%AF%AD%E5%9F%B9%AE"; ClassHttpRequestClient s = new ClassHttpRequestClient(true); HtmlDocument doc = new HtmlDocument(); string content = ""; string response = s.httpPost(url, heads, content, Encoding.UTF8); HtmlAgilityPack.HtmlNodeCollection collection = doc.DocumentNode.SelectNodes("//div[@class=\"txt\"]"); StringBuilder sb = new StringBuilder(); foreach (HtmlAgilityPack.HtmlNode item in collection) { HtmlAgilityPack.HtmlNode divtit = item.SelectNodes("div[@class=\"txt\"]")[0]; HtmlAgilityPack.HtmlNode aname = divtit.SelectNodes("a[1]")[0]; HtmlAgilityPack.HtmlNode divcomment = item.SelectNodes("div[@class=\"comment\"]")[0]; HtmlAgilityPack.HtmlNode anum = divcomment.SelectNodes("a[1]")[0]; HtmlAgilityPack.HtmlNode aprice = divcomment.SelectNodes("a[2]")[0]; sb.Append(string.Format("{0}—{1}—{2}", aname.InnerText, anum.InnerText, aprice.InnerText)); } Response.Write(sb); }
protected void Page_Load(object sender, EventArgs e) { string heads = @"Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 Accept-Encoding:gzip, deflate Accept-Language:zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 Cache-Control:max-age=0 Connection:keep-alive Cookie:showNav=#nav-tab|0|0; navCtgScroll=0; cy=1; cye=shanghai; _lxsdk_cuid=1693813fdecc8-062bf66f365a768-11666e4a-384000-1693813fdecc8; _lxsdk_s=1693813fded-ea2-2e7-d89%7C%7C51; _lxsdk=1693813fdecc8-062bf66f365a768-11666e4a-384000-1693813fdecc8; _hc.v=7216e9e3-be12-eff4-1836-49d9b0c4b0ce.1551424029; s_ViewType=10 Host:www.dianping.com Upgrade-Insecure-Requests:1 User-Agent:Mozilla/5.0 (Windows NT 10.0; WOW64; rv:65.0) Gecko/20100101 Firefox/65.0"; string url = "http://www.dianping.com/search/keyword/1/0_%E8%8B%B1%E8%AF%AD%E5%9F%B9%E8%AE%AD/r842"; ClassHttpRequestClient s = new ClassHttpRequestClient(true); string content = ""; string response = s.httpPost(url, heads, content, Encoding.UTF8); // Response.Write(response); // 第一步声明HtmlAgilityPack.HtmlDocument实例 HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); //第二步加载html文档 doc.LoadHtml(response); HtmlAgilityPack.HtmlNodeCollection collection = doc.DocumentNode.SelectNodes("//div[@class=\"txt\"]"); StringBuilder sb = new StringBuilder(); foreach (HtmlAgilityPack.HtmlNode item in collection) { HtmlAgilityPack.HtmlNode divtit = item.SelectNodes("div[@class=\"tit\"]")[0]; HtmlAgilityPack.HtmlNode aname = divtit.SelectNodes("a[1]")[0]; //divtit下面的第一个超级链接 HtmlAgilityPack.HtmlNode divcomment = item.SelectNodes("div[@class=\"comment\"]")[0]; HtmlAgilityPack.HtmlNode anum = divcomment.SelectNodes("a[1]")[0]; //divcomment下面的第一个超级链接 HtmlAgilityPack.HtmlNode aprice = divcomment.SelectNodes("a[2]")[0]; //divcomment下面的第二个超级链接 sb.Append(string.Format("{0}---{1}---{2}</br>", aname.InnerText, anum.InnerText, aprice.InnerText)); } Response.Write(sb); }
protected void Page_Load(object sender, EventArgs e) { string heads = @"Accept:text/html,application/xhtml+xm…plication/xml;q=0.9,*/*;q=0.8 Accept-Encoding:gzip, deflate Accept-Language:zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 Cache-Control:max-age=0 Connection:keep-alive Cookie:cy=1; _lxsdk_cuid=15ffc822338c…3fb990e3e-b37-f9f-cd5%7C%7C20 Host:www.dianping.com Upgrade-Insecure-Requests:1 Accept:text/html,application/xhtml+xm…plication/xml;q=0.9,*/*;q=0.8 Accept-Encoding:gzip, deflate Accept-Language:zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 Cache-Control:max-age=0 Connection:keep-alive Cookie:cy=1; _lxsdk_cuid=15ffc822338c…3fb990e3e-b37-f9f-cd5%7C%7C20 Host:www.dianping.com Upgrade-Insecure-Requests:1 User-Agent:Mozilla/5.0 (Windows NT 10.0; …) Gecko/20100101 Firefox/60.0"; string url = "http://www.dianping.com/shanghai/ch75/g3032"; ClassHttpRequestClient s = new ClassHttpRequestClient(true); string content = ""; string response = s.httpPost(url, heads, content, Encoding.UTF8); // Response.Write(response); // 第一步声明HtmlAgilityPack.HtmlDocument实例 HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); //第二步加载html文档 doc.LoadHtml(response); HtmlAgilityPack.HtmlNodeCollection collection = doc.DocumentNode.SelectNodes("//div[@class=\"txt\"]"); StringBuilder sb = new StringBuilder(); foreach (HtmlAgilityPack.HtmlNode item in collection) { HtmlAgilityPack.HtmlNode divtit = item.SelectNodes("div[@class=\"tit\"]")[0]; HtmlAgilityPack.HtmlNode aname = divtit.SelectNodes("a[1]")[0]; //divtit下面的第一个超级链接 HtmlAgilityPack.HtmlNode divcomment = item.SelectNodes("div[@class=\"comment\"]")[0]; HtmlAgilityPack.HtmlNode anum = divcomment.SelectNodes("a[1]")[0]; //divcomment下面的第一个超级链接 HtmlAgilityPack.HtmlNode aprice = divcomment.SelectNodes("a[2]")[0]; //divcomment下面的第二个超级链接 sb.Append(string.Format("{0}---{1}---{2}</br>", aname.InnerText, anum.InnerText, aprice.InnerText)); } Response.Write(sb); }