/// <summary> /// 爬取 /// </summary> private void Crawling(string url, string host) { // if (!VisitedHelper.IsVisited(url)) // { // VisitedHelper.Add(url); if (host == null) { host = GetHost(url); } string pageHtml = HttpRequestUtil.GetPageHtml(url); // list_m.Items.Add(pageHtml); textBox2.Text = pageHtml; //< input name = "P8" type = "text" size = "22" maxlength = "22" value = "湿度:%46.8 温度:+19.6C" > // Regex regInput_1 = new Regex(@"<input[\s]+[^<>]*name=", RegexOptions.IgnoreCase); Regex regA = new Regex(@"<a[\s]+[^<>]*href=(?:""|')([^<>""']+)(?:""|')[^<>]*>[^<>]+</a>", RegexOptions.IgnoreCase); Regex regImg = new Regex(@"<img[\s]+[^<>]*src=(?:""|')([^<>""']+(?:jpg|jpeg|png|gif))(?:""|')[^<>]*>", RegexOptions.IgnoreCase); //MatchCollection mcImg = regImg.Matches(pageHtml); //foreach (Match mImg in mcImg) //{ // string imageUrl = mImg.Groups[1].Value; // try // { // int imageWidth = GetImageWidthOrHeight(mImg.Value, true); // int imageHeight = GetImageWidthOrHeight(imageUrl, false); // if (imageWidth >= m_MinWidth && imageHeight >= m_MinHeight) // { // if (imageUrl.IndexOf("javascript") == -1) // { // if (imageUrl.IndexOf("http") == 0) // { // HttpRequestUtil.HttpDownloadFile(imageUrl, m_MinWidth, m_MinHeight); // } // else // { // HttpRequestUtil.HttpDownloadFile(host + imageUrl, m_MinWidth, m_MinHeight); // } // } // } // } // catch { } //} // } } //end Crawling方法
private void btn_Start_Click(object sender, EventArgs e) { DataTable dt = GetTable(); string CityName = "成都"; // int LineID = 0; // string LineName = ""; // for (int i = 1; i < 9; i++) { try { LineID = i; LineName = string.Format("{0}地铁{1}号线", CityName, LineID); string tempLineName = System.Web.HttpUtility.UrlEncode(LineName, Encoding.UTF8); string pageHtml = HttpRequestUtil.GetPageHtml("http://ditu.amap.com/service/poiInfo?query_type=TQUERY&pagesize=20&pagenum=1&qii=true&cluster_state=5&need_utd=true&utd_sceneid=1000&div=PC1000&addr_poi_merge=true&is_classify=true&city=510100&geoobj=104.05071%7C30.584681%7C104.06101%7C30.591479&keywords=" + tempLineName); Regex regTypeVehicle = new Regex("{\"type\"[\\s\\S]*?(?=,{\"type\":\"polyline\")"); MatchCollection mc = regTypeVehicle.Matches(pageHtml); Match match = mc[0]; string vChildren = match.Value.ToString(); SubWayReturnEntity listSubWay = JsonHelper.JSON.parse <SubWayReturnEntity>(vChildren); foreach (var item in listSubWay.busData) { dt.Rows.Add(0, CityName, LineID, LineName, item.sequence, item.name, item.location.lng, item.location.lat); } //写库 //ConnectionStringSettingsCollection connections = ConfigurationManager.ConnectionStrings; //string conStr = connections["constrLocal"].ToString(); string conStr = "Data Source=127.0.0.1;Initial Catalog=EFDemo;User ID=sa;Password=123456"; SqlBuckCopyHelper.SqlBulkCopyByDatatable(conStr, "SubWayLocationT", dt); dt.Clear(); //进度 this.rbox_info.Text += string.Format("{0} 数据获取成功\r\n", LineName); // Thread.Sleep(20000); } catch (Exception ex) { this.rbox_errInfo.Text += ex.ToString(); this.rbox_info.Text += string.Format("{0} 数据获取失败\r\n", LineName); } } }
private void btn_attack_Click(object sender, EventArgs e) { int page = 0; for (int j = 1; j <= 13; j++) { try { string pageHtml = HttpRequestUtil.GetPageHtml("http://qq.yh31.com/dm/bl/List_" + j.ToString() + ".html"); Regex regA = new Regex("<a href=.*img.*</a>"); Regex regImg = new Regex("<img.*/>"); MatchCollection mc = regA.Matches(pageHtml); Console.WriteLine($"网页共有{mc.Count}"); foreach (Match match in mc) { //lblPerson.Invoke(new Action(delegate () { lblPerson.Text = "已完成条数:" + personCount.ToString(); })); MatchCollection mcImgPage = regImg.Matches(match.ToString()); foreach (Match matchImgPage in mcImgPage) { int start = matchImgPage.ToString().IndexOf("src=\""); string imgUrl = matchImgPage.ToString().Substring(start + 5); int end = imgUrl.IndexOf("\""); imgUrl = imgUrl.Substring(0, end); try { HttpRequestUtil.HttpDownloadFile("http://qq.yh31.com/" + imgUrl); Console.WriteLine($"图片地址{imgUrl}"); } catch { } Thread.Sleep(1); } } } catch { } page++; if (page == 100) { MessageBox.Show("完成!"); } } }