コード例 #1
0
        /// <summary>
        /// 爬取
        /// </summary>
        private void Crawling(string url, string host)
        {
            //    if (!VisitedHelper.IsVisited(url))
            //   {
            //        VisitedHelper.Add(url);

            if (host == null)
            {
                host = GetHost(url);
            }

            string pageHtml = HttpRequestUtil.GetPageHtml(url);

            // list_m.Items.Add(pageHtml);
            textBox2.Text = pageHtml;
            //< input name = "P8" type = "text" size = "22" maxlength = "22" value = "湿度:%46.8  温度:+19.6C" >
            // Regex regInput_1 = new Regex(@"<input[\s]+[^<>]*name=", RegexOptions.IgnoreCase);
            Regex regA   = new Regex(@"<a[\s]+[^<>]*href=(?:""|')([^<>""']+)(?:""|')[^<>]*>[^<>]+</a>", RegexOptions.IgnoreCase);
            Regex regImg = new Regex(@"<img[\s]+[^<>]*src=(?:""|')([^<>""']+(?:jpg|jpeg|png|gif))(?:""|')[^<>]*>", RegexOptions.IgnoreCase);

            //MatchCollection mcImg = regImg.Matches(pageHtml);
            //foreach (Match mImg in mcImg)
            //{
            //    string imageUrl = mImg.Groups[1].Value;
            //    try
            //    {
            //        int imageWidth = GetImageWidthOrHeight(mImg.Value, true);
            //        int imageHeight = GetImageWidthOrHeight(imageUrl, false);
            //        if (imageWidth >= m_MinWidth && imageHeight >= m_MinHeight)
            //        {
            //            if (imageUrl.IndexOf("javascript") == -1)
            //            {
            //                if (imageUrl.IndexOf("http") == 0)
            //                {
            //                    HttpRequestUtil.HttpDownloadFile(imageUrl, m_MinWidth, m_MinHeight);
            //                }
            //                else
            //                {
            //                    HttpRequestUtil.HttpDownloadFile(host + imageUrl, m_MinWidth, m_MinHeight);
            //                }
            //            }
            //        }
            //    }
            //    catch { }
            //}

            //  }
        } //end Crawling方法
コード例 #2
0
        private void btn_Start_Click(object sender, EventArgs e)
        {
            DataTable dt       = GetTable();
            string    CityName = "成都"; //
            int       LineID   = 0;    //
            string    LineName = "";   //

            for (int i = 1; i < 9; i++)
            {
                try
                {
                    LineID   = i;
                    LineName = string.Format("{0}地铁{1}号线", CityName, LineID);
                    string tempLineName   = System.Web.HttpUtility.UrlEncode(LineName, Encoding.UTF8);
                    string pageHtml       = HttpRequestUtil.GetPageHtml("http://ditu.amap.com/service/poiInfo?query_type=TQUERY&pagesize=20&pagenum=1&qii=true&cluster_state=5&need_utd=true&utd_sceneid=1000&div=PC1000&addr_poi_merge=true&is_classify=true&city=510100&geoobj=104.05071%7C30.584681%7C104.06101%7C30.591479&keywords=" + tempLineName);
                    Regex  regTypeVehicle = new Regex("{\"type\"[\\s\\S]*?(?=,{\"type\":\"polyline\")");

                    MatchCollection    mc         = regTypeVehicle.Matches(pageHtml);
                    Match              match      = mc[0];
                    string             vChildren  = match.Value.ToString();
                    SubWayReturnEntity listSubWay = JsonHelper.JSON.parse <SubWayReturnEntity>(vChildren);

                    foreach (var item in listSubWay.busData)
                    {
                        dt.Rows.Add(0, CityName, LineID, LineName, item.sequence, item.name, item.location.lng, item.location.lat);
                    }

                    //写库
                    //ConnectionStringSettingsCollection connections = ConfigurationManager.ConnectionStrings;
                    //string conStr = connections["constrLocal"].ToString();

                    string conStr = "Data Source=127.0.0.1;Initial Catalog=EFDemo;User ID=sa;Password=123456";
                    SqlBuckCopyHelper.SqlBulkCopyByDatatable(conStr, "SubWayLocationT", dt);
                    dt.Clear();
                    //进度
                    this.rbox_info.Text += string.Format("{0} 数据获取成功\r\n", LineName);

                    //
                    Thread.Sleep(20000);
                }
                catch (Exception ex)
                {
                    this.rbox_errInfo.Text += ex.ToString();
                    this.rbox_info.Text    += string.Format("{0} 数据获取失败\r\n", LineName);
                }
            }
        }
コード例 #3
0
        private void btn_attack_Click(object sender, EventArgs e)
        {
            int page = 0;

            for (int j = 1; j <= 13; j++)
            {
                try
                {
                    string          pageHtml = HttpRequestUtil.GetPageHtml("http://qq.yh31.com/dm/bl/List_" + j.ToString() + ".html");
                    Regex           regA     = new Regex("<a href=.*img.*</a>");
                    Regex           regImg   = new Regex("<img.*/>");
                    MatchCollection mc       = regA.Matches(pageHtml);
                    Console.WriteLine($"网页共有{mc.Count}");
                    foreach (Match match in mc)
                    {
                        //lblPerson.Invoke(new Action(delegate () { lblPerson.Text = "已完成条数:" + personCount.ToString(); }));
                        MatchCollection mcImgPage = regImg.Matches(match.ToString());
                        foreach (Match matchImgPage in mcImgPage)
                        {
                            int    start  = matchImgPage.ToString().IndexOf("src=\"");
                            string imgUrl = matchImgPage.ToString().Substring(start + 5);
                            int    end    = imgUrl.IndexOf("\"");
                            imgUrl = imgUrl.Substring(0, end);
                            try
                            {
                                HttpRequestUtil.HttpDownloadFile("http://qq.yh31.com/" + imgUrl);
                                Console.WriteLine($"图片地址{imgUrl}");
                            }
                            catch { }
                            Thread.Sleep(1);
                        }
                    }
                }
                catch { }
                page++;
                if (page == 100)
                {
                    MessageBox.Show("完成!");
                }
            }
        }