Пример #1
0
        /// <summary>
        /// 高级爬取
        /// </summary>
        public void AdvCrawler()
        {
            string heads = @"Accept:text/html,application/xhtml+xm…plication/xml;q=0.9,*/*;q=0.8
//Accept-Encoding:gzip, deflate
//Accept-Language:zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2
//Cache-Control:max-age=0
//Connection:keep-alive
//Cookie:cy=1; _lxsdk_cuid=15ffc822338c…3fb990e3e-b37-f9f-cd5%7C%7C20
//Host:www.dianping.com
//Upgrade-Insecure-Requests:1
//Accept:text/html,application/xhtml+xm…plication/xml;q=0.9,*/*;q=0.8
//Accept-Encoding:gzip, deflate
//Accept-Language:zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2
//Cache-Control:max-age=0
//Connection:keep-alive
//Cookie:cy=1; _lxsdk_cuid=15ffc822338c…3fb990e3e-b37-f9f-cd5%7C%7C20
//Host:www.dianping.com
//Upgrade-Insecure-Requests:1
//User-Agent:Mozilla/5.0 (Windows NT 10.0; …) Gecko/20100101 Firefox/60.0";

            ClassHttpRequestClient s = new ClassHttpRequestClient(true);
            string content           = "";
            string response          = s.httpPost(initurl, heads, content, Encoding.UTF8);

            Response.Write(response);
        }
Пример #2
0
        /// <summary>
        /// 高级爬取
        /// </summary>
        public void AdvCrawler()
        {
            string heads = @"at_bucketid: sbucket_7
at_cat: 2
at_mall_pro_re: 1635
at_rn: 0397de31c2ae1cbafdf3f6ded7be1fe0
at_type: search
at_vmarket: 0
content-encoding: gzip
content-language: zh-CN
content-type: text/html;charset=GBK
date: Wed, 10 Jun 2020 01:28:36 GMT
eagleeye-traceid: 0b5218bb15917525159411294e7f2d
s_group: tao-session
s_ip: 4547514b653151352b3059752f6a633155746b3d
s_read_unit: [CN:CENTER]
s_status: STATUS_NOT_EXISTED
s_tag: 283674001342464|4294967296^1|^^
s_tid: 0b5218bb15917525159411294e7f2d
s_ucode: CN:CENTER
s_v: 4.0.2.6
server: Tengine/Aserver
status: 200
strict-transport-security: max-age=31536000
timing-allow-origin: *
ufe-result: A6
vary: Accept-Encoding";


            ClassHttpRequestClient s = new ClassHttpRequestClient(true);
            string content           = "";
            string response          = s.httpPost(initurl, heads, content, Encoding.UTF8);

            Response.Write(response);
        }
Пример #3
0
        protected void Page_Load(object sender, EventArgs e)
        {
            string heads = @"Accept: application/json, text/javascript, */* q=0.01 " +
                           @"Accept-Encoding: gzip, deflate " +
                           @"Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 " +
                           @"Connection: keep-alive " +
                           @"Cookie: s_ViewType=10; _lxsdk_cuid=1729cd29d3dc8-04d80d1c3b31398-4c302c7d-144000-1729cd29d3ec8; _lxsdk=1729cd29d3dc8-04d80d1c3b31398-4c302c7d-144000-1729cd29d3ec8; _hc.v=6c48a318-c117-5df7-478a-f0f694f1570e.1591768948; Hm_lvt_602b80cf8079ae6591966cc70a3940e7=1591768950,1591788446; _lxsdk_s=1729dfc18eb-4f6-3ef-94c%7C%7C19; Hm_lpvt_602b80cf8079ae6591966cc70a3940e7=1591788446 " +
                           @"Host: catdot.dianping.com " +
                           @"Referer: http:/www.dianping.com/search…/0_%E8%8B%B1%AF%AD%E5%9F%B9%AE " +
                           @"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0";
            string url = @"http://www.dianping.com/search/keyword/1/0_%E8%8B%B1%AF%AD%E5%9F%B9%AE";
            ClassHttpRequestClient s   = new ClassHttpRequestClient(true);
            HtmlDocument           doc = new HtmlDocument();
            string content             = "";
            string response            = s.httpPost(url, heads, content, Encoding.UTF8);

            HtmlAgilityPack.HtmlNodeCollection collection = doc.DocumentNode.SelectNodes("//div[@class=\"txt\"]");
            StringBuilder sb = new StringBuilder();

            foreach (HtmlAgilityPack.HtmlNode item in collection)
            {
                HtmlAgilityPack.HtmlNode divtit     = item.SelectNodes("div[@class=\"txt\"]")[0];
                HtmlAgilityPack.HtmlNode aname      = divtit.SelectNodes("a[1]")[0];
                HtmlAgilityPack.HtmlNode divcomment = item.SelectNodes("div[@class=\"comment\"]")[0];
                HtmlAgilityPack.HtmlNode anum       = divcomment.SelectNodes("a[1]")[0];
                HtmlAgilityPack.HtmlNode aprice     = divcomment.SelectNodes("a[2]")[0];
                sb.Append(string.Format("{0}—{1}—{2}", aname.InnerText, anum.InnerText, aprice.InnerText));
            }
            Response.Write(sb);
        }
        protected void Page_Load(object sender, EventArgs e)
        {
            string heads             = @"Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
                            Accept-Encoding:gzip, deflate
                            Accept-Language:zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2
                            Cache-Control:max-age=0
                            Connection:keep-alive
                            Cookie:showNav=#nav-tab|0|0; navCtgScroll=0; cy=1; cye=shanghai; _lxsdk_cuid=1693813fdecc8-062bf66f365a768-11666e4a-384000-1693813fdecc8; _lxsdk_s=1693813fded-ea2-2e7-d89%7C%7C51; _lxsdk=1693813fdecc8-062bf66f365a768-11666e4a-384000-1693813fdecc8; _hc.v=7216e9e3-be12-eff4-1836-49d9b0c4b0ce.1551424029; s_ViewType=10
                            Host:www.dianping.com
                            Upgrade-Insecure-Requests:1                           
                            User-Agent:Mozilla/5.0 (Windows NT 10.0; WOW64; rv:65.0) Gecko/20100101 Firefox/65.0";
            string url               = "http://www.dianping.com/search/keyword/1/0_%E8%8B%B1%E8%AF%AD%E5%9F%B9%E8%AE%AD/r842";
            ClassHttpRequestClient s = new ClassHttpRequestClient(true);
            string content           = "";
            string response          = s.httpPost(url, heads, content, Encoding.UTF8);

            // Response.Write(response);
            // 第一步声明HtmlAgilityPack.HtmlDocument实例
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            //第二步加载html文档
            doc.LoadHtml(response);
            HtmlAgilityPack.HtmlNodeCollection collection = doc.DocumentNode.SelectNodes("//div[@class=\"txt\"]");
            StringBuilder sb = new StringBuilder();

            foreach (HtmlAgilityPack.HtmlNode item in collection)
            {
                HtmlAgilityPack.HtmlNode divtit     = item.SelectNodes("div[@class=\"tit\"]")[0];
                HtmlAgilityPack.HtmlNode aname      = divtit.SelectNodes("a[1]")[0];     //divtit下面的第一个超级链接
                HtmlAgilityPack.HtmlNode divcomment = item.SelectNodes("div[@class=\"comment\"]")[0];
                HtmlAgilityPack.HtmlNode anum       = divcomment.SelectNodes("a[1]")[0]; //divcomment下面的第一个超级链接
                HtmlAgilityPack.HtmlNode aprice     = divcomment.SelectNodes("a[2]")[0]; //divcomment下面的第二个超级链接
                sb.Append(string.Format("{0}---{1}---{2}</br>", aname.InnerText, anum.InnerText, aprice.InnerText));
            }
            Response.Write(sb);
        }
Пример #5
0
        protected void Page_Load(object sender, EventArgs e)
        {
            string heads = @"Accept:text/html,application/xhtml+xm…plication/xml;q=0.9,*/*;q=0.8
Accept-Encoding:gzip, deflate
Accept-Language:zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2
Cache-Control:max-age=0
Connection:keep-alive
Cookie:cy=1; _lxsdk_cuid=15ffc822338c…3fb990e3e-b37-f9f-cd5%7C%7C20
Host:www.dianping.com
Upgrade-Insecure-Requests:1
Accept:text/html,application/xhtml+xm…plication/xml;q=0.9,*/*;q=0.8
Accept-Encoding:gzip, deflate
Accept-Language:zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2
Cache-Control:max-age=0
Connection:keep-alive
Cookie:cy=1; _lxsdk_cuid=15ffc822338c…3fb990e3e-b37-f9f-cd5%7C%7C20
Host:www.dianping.com
Upgrade-Insecure-Requests:1
User-Agent:Mozilla/5.0 (Windows NT 10.0; …) Gecko/20100101 Firefox/60.0";

            string url = "http://www.dianping.com/shanghai/ch75/g3032";
            ClassHttpRequestClient s = new ClassHttpRequestClient(true);
            string content           = "";
            string response          = s.httpPost(url, heads, content, Encoding.UTF8);

            // Response.Write(response);
            // 第一步声明HtmlAgilityPack.HtmlDocument实例
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            //第二步加载html文档
            doc.LoadHtml(response);
            HtmlAgilityPack.HtmlNodeCollection collection = doc.DocumentNode.SelectNodes("//div[@class=\"txt\"]");
            StringBuilder sb = new StringBuilder();

            foreach (HtmlAgilityPack.HtmlNode item in collection)
            {
                HtmlAgilityPack.HtmlNode divtit = item.SelectNodes("div[@class=\"tit\"]")[0];
                HtmlAgilityPack.HtmlNode aname  = divtit.SelectNodes("a[1]")[0]; //divtit下面的第一个超级链接

                HtmlAgilityPack.HtmlNode divcomment = item.SelectNodes("div[@class=\"comment\"]")[0];
                HtmlAgilityPack.HtmlNode anum       = divcomment.SelectNodes("a[1]")[0]; //divcomment下面的第一个超级链接
                HtmlAgilityPack.HtmlNode aprice     = divcomment.SelectNodes("a[2]")[0]; //divcomment下面的第二个超级链接
                sb.Append(string.Format("{0}---{1}---{2}</br>", aname.InnerText, anum.InnerText, aprice.InnerText));
            }
            Response.Write(sb);
        }