Beispiel #1
0
        public void sougou()

        {
            string cookie = "CXID=D44C63D34623066DA36D11B4B82C488C; SUV=1518337255419884; SMAPUVID=1518337255419884; SUV=1801190926013760; IPLOC=CN3213; sct=1; SNUID=D67B2A4373761425DCE0226F733E2FA7; ad=zMGYjlllll2zYIpclllllVr1fv6lllllGq6poyllll9llllljZlll5@@@@@@@@@@; SUID=A10659313565860A5A6291B800040AA1; wP_w=544ebe2c0329~HXcgwyvcH_c_5BDcHXULrZvNyX9XwbmSJXPsNNDN3XNBbbDJbdNhb; activecity=%u5BBF%u8FC1%2C13168867%2C3999623%2C12; ho_co=";
            string url    = "http://map.sogou.com/EngineV6/search/json";

            string charset = "gb2312";

            try

            {
                string[] citys    = textBox1.Text.Trim().Split(new string[] { "\r\n" }, StringSplitOptions.None);
                string[] keywords = textBox2.Text.Trim().Split(new string[] { "\r\n" }, StringSplitOptions.None);



                int pages = 200;

                foreach (string city in citys)

                {
                    //搜索的城市和关键词都需要两次url编码

                    string city1    = System.Web.HttpUtility.UrlEncode(city, System.Text.Encoding.GetEncoding("utf-8"));
                    string cityutf8 = System.Web.HttpUtility.UrlEncode(city1, System.Text.Encoding.GetEncoding("utf-8"));
                    foreach (string keyword in keywords)

                    {
                        string keyword1    = System.Web.HttpUtility.UrlEncode(keyword, System.Text.Encoding.GetEncoding("utf-8"));
                        string keywordutf8 = System.Web.HttpUtility.UrlEncode(keyword1, System.Text.Encoding.GetEncoding("utf-8"));

                        for (int i = 1; i <= pages; i++)
                        {
                            string postData = "what=keyword%3A" + keywordutf8 + "&range=bound%3A00000000.5%2C0000000.5%2C99999999.5%2C9999999.5%3A0&othercityflag=1&appid=1361&thiscity=" + cityutf8 + "&lastcity=" + cityutf8 + "&userdata=3&encrypt=1&pageinfo=" + i + "%2C10&locationsort=0&version=7.0&ad=0&level=12&exact=1&type=&attr=&order=&submittime=0&resultTypes=poi&sort=0&reqid=1526008949358471&cb=parent.IFMS.search";

                            string html = Method.PostUrl(url, postData, cookie, charset);


                            MatchCollection TitleMatchs = Regex.Matches(html, @"""dataid"":""([\s\S]*?)""", RegexOptions.IgnoreCase);

                            ArrayList lists = new ArrayList();

                            foreach (Match NextMatch in TitleMatchs)
                            {
                                lists.Add(NextMatch.Groups[1].Value);
                            }
                            if (lists.Count == 0)  //当前页没有网址数据跳过之后的网址采集,进行下个foreach采集

                            {
                                break;
                            }

                            string tm1 = DateTime.Now.ToString();  //获取系统时间

                            textBox3.Text += tm1 + "-->正在采集" + city + "" + keyword + "第" + i + "页\r\n";


                            Application.DoEvents();
                            Thread.Sleep(100);



                            foreach (string poid in lists)

                            {
                                int index = this.dataGridView1.Rows.Add();


                                string Url1    = "http://map.sogou.com/poi/1_" + poid + ".htm";
                                string strhtml = GetUrl(Url1);


                                string title = @"""caption"":""([\s\S]*?)""";
                                string Rxg   = @"""phone"":""([\s\S]*?)""";
                                string Rxg1  = @"""address"":""([\s\S]*?)""";



                                MatchCollection titles  = Regex.Matches(strhtml, title);
                                MatchCollection tell    = Regex.Matches(strhtml, Rxg);
                                MatchCollection address = Regex.Matches(strhtml, Rxg1);

                                if (visualButton2.Text == "已停止")
                                {
                                    return;
                                }


                                foreach (Match match in titles)
                                {
                                    this.dataGridView1.Rows[index].Cells[0].Value = index;
                                    this.dataGridView1.Rows[index].Cells[1].Value = match.Groups[1].Value;
                                    this.dataGridView1.CurrentCell = this.dataGridView1.Rows[index].Cells[0];
                                }
                                foreach (Match match in tell)
                                {
                                    this.dataGridView1.Rows[index].Cells[2].Value += match.Groups[1].Value.Trim() + ",";
                                }
                                foreach (Match match in address)
                                {
                                    this.dataGridView1.Rows[index].Cells[3].Value = match.Groups[1].Value;
                                }


                                this.dataGridView1.Rows[index].Cells[4].Value = keyword;
                                this.dataGridView1.Rows[index].Cells[5].Value = city;


                                Application.DoEvents();
                                Thread.Sleep(500);   //内容获取间隔,可变量
                            }
                        }
                    }
                }
            }

            catch (System.Exception ex)
            {
                textBox3.Text = ex.ToString();
            }
        }