public void run(int item) { string city = label6.Text; if (city == "") { MessageBox.Show("请选择城市!"); return; } for (int i = 1; i < 100; i++) { String Url = "https://" + city + ".nuomi.com/" + item + "-page" + i; string html = Method.GetUrl(Url); //定义的GetRul方法 返回 reader.ReadToEnd() MatchCollection TitleMatchs = Regex.Matches(html, @"<li class=""shop-infoo-list-item clearfix"">([\s\S]*?)<a href=""([\s\S]*?)""", RegexOptions.IgnoreCase | RegexOptions.Multiline); ArrayList lists = new ArrayList(); foreach (Match NextMatch in TitleMatchs) { lists.Add("https:" + NextMatch.Groups[2].Value); } if (lists.Count == 0) { break; } foreach (string list in lists) { int index = this.skinDataGridView1.Rows.Add(); string strhtml = Method.GetUrl(list); //定义的GetRul方法 返回 reader.ReadToEnd() string Rxg = @"name:'([\s\S]*?)'"; string Rxg1 = @"address:'([\s\S]*?)'"; string Rxg2 = @"phone:'([\s\S]*?)'"; Match name = Regex.Match(strhtml, Rxg); Match addr = Regex.Match(strhtml, Rxg1); Match tel = Regex.Match(strhtml, Rxg2); this.skinDataGridView1.Rows[index].Cells[0].Value = name.Groups[1].Value; this.skinDataGridView1.Rows[index].Cells[2].Value = addr.Groups[1].Value; this.skinDataGridView1.Rows[index].Cells[1].Value = tel.Groups[1].Value; this.skinDataGridView1.CurrentCell = this.skinDataGridView1.Rows[i].Cells[0]; //让datagridview滚动到当前行 Application.DoEvents(); System.Threading.Thread.Sleep(800); } } }
private void button1_Click(object sender, EventArgs e) { #region 通用验证 bool value = false; string html = Method.GetUrl("http://acaiji.com/success/ip.php"); string localip = Method.GetIP(); MatchCollection ips = Regex.Matches(html, @"<td style='color:red;'>([\s\S]*?)</td>", RegexOptions.IgnoreCase | RegexOptions.Multiline); foreach (Match ip in ips) { if (ip.Groups[1].Value.Trim() == "9.9.9.9") { value = true; break; } } if (value == true) { Thread thread = new Thread(new ThreadStart(run)); thread.Start(); Control.CheckForIllegalCrossThreadCalls = false; } else { MessageBox.Show("IP不符"); } #endregion }
public int getcityId(string cityName) { try { String Url = "https://map.baidu.com/?newmap=1&reqflag=pcmap&biz=1&from=webmap&da_par=direct&pcevaname=pc4.1&qt=s&da_src=searchBox.button&wd=" + cityName + "&c=289&src=0&wd2=&pn=0&sug=0&l=12&b=(13461858.87,3636969.979999999;13584738.87,3670185.979999999)&from=webmap&biz_forward={%22scaler%22:1,%22styles%22:%22pl%22}&sug_forward=&tn=B_NORMAL_MAP&nn=0&u_loc=13166533,3998088&ie=utf-8"; string html = Method.GetUrl(Url); MatchCollection Matchs = Regex.Matches(html, @"""code"":([\s\S]*?),", RegexOptions.IgnoreCase); int cityId = Convert.ToInt32(Matchs[0].Groups[1].Value); return(cityId); } catch (System.Exception ex) { ex.ToString(); return(1); } }
public void Che() { try { string city = label6.Text; if (city == "") { MessageBox.Show("请选择城市!"); return; } for (int i = 1; i <= page; i++) { String Url = "http://" + city + ".ganji.com/ershouche/a1o" + i + "/"; string html = Method.GetUrl(Url); MatchCollection TitleMatchs = Regex.Matches(html, @"id=""puid-([\s\S]*?)""", RegexOptions.IgnoreCase | RegexOptions.Multiline); foreach (Match NextMatch in TitleMatchs) { textBox14.Text += "https://3g.ganji.com/" + city + "_ershouche/" + NextMatch.Groups[1].Value + "x" + "\r\n"; textBox14.SelectionStart = textBox14.Text.Length; //设定光标位置 textBox14.ScrollToCaret(); //滚动到光标处 } string tm1 = DateTime.Now.ToString(); //获取系统时间 textBox3.Text += tm1 + "-->正在采集第" + i + "页\r\n"; textBox3.SelectionStart = textBox3.Text.Length; //设定光标位置 textBox3.ScrollToCaret(); Application.DoEvents(); System.Threading.Thread.Sleep(500); //网址获取时间间隔 固定不变 string[] lines = textBox14.Text.Split(new string[] { "\r\n" }, StringSplitOptions.None); for (int j = 0; j < lines.Length - 1; j++) { this.index = this.skinDataGridView1.Rows.Add(); String Url1 = lines[j]; string strhtml = Method.GetUrl(Url1); //定义的GetRul方法 返回 reader.ReadToEnd() string rxg = @"title>([\s\S]*?)</title>"; string Rxg = @"联系人</span>([\s\S]*?)</span>"; string Rxg1 = @"data-phone=""([\s\S]*?)"""; string Rxg2 = @"province=([\s\S]*?);city=([\s\S]*?);"; Match titles = Regex.Match(strhtml, rxg); Match contacts = Regex.Match(strhtml, Rxg); Match tell = Regex.Match(strhtml, Rxg1); Match region = Regex.Match(strhtml, Rxg2); this.skinDataGridView1.Rows[index].Cells[0].Value = titles.Groups[1].Value; this.skinDataGridView1.CurrentCell = this.skinDataGridView1.Rows[index].Cells[0]; //让datagridview滚动到当前行 string temp = Regex.Replace(contacts.Groups[1].Value, "<[^>]*>", ""); //去除所有标签去除所有标签 string temp1 = Regex.Replace(temp, "\\s+", ""); //去除所有标签去除空格 this.skinDataGridView1.Rows[index].Cells[1].Value = temp1; this.skinDataGridView1.Rows[index].Cells[2].Value = tell.Groups[1].Value; this.skinDataGridView1.Rows[index].Cells[3].Value = region.Groups[2].Value; if (skinButton13.Text == "已停止") { return; } Application.DoEvents(); System.Threading.Thread.Sleep(time); //内容获取间隔,可变量 } textBox14.Text = ""; } } catch (System.Exception ex) { textBox3.Text = ex.ToString(); //MessageBox.Show(ex.ToString()); } }
public void ershoufang() { try { string[] headers = { "标题", "联系人", "电话", "地区", "小区", "价格", "网址" }; setDatagridview(skinDataGridView1, 7, headers); string city = label6.Text; if (city == "") { MessageBox.Show("请选择城市!"); return; } for (int i = 1; i <= page; i++) { String Url = "https://" + city + ".58.com/ershoufang/0/pn" + i + "/"; string html = Method.GetUrl(Url); MatchCollection TitleMatchs = Regex.Matches(html, @"esf_id:([\s\S]*?),", RegexOptions.IgnoreCase | RegexOptions.Multiline); ArrayList lists = new ArrayList(); foreach (Match NextMatch in TitleMatchs) { lists.Add("https://" + city + ".58.com/ershoufang/" + NextMatch.Groups[1].Value + "x.shtml"); //滚动到光标处 } string tm1 = DateTime.Now.ToString(); //获取系统时间 textBox3.Text += tm1 + "-->正在采集第" + i + "页\r\n"; textBox3.SelectionStart = textBox3.Text.Length; //设定光标位置 textBox3.ScrollToCaret(); foreach (string url in lists) { this.index = this.skinDataGridView1.Rows.Add(); string strhtml = Method.GetUrl(url); Match titles = Regex.Match(strhtml, @"<h1 class=""c_333 f20"">([\s\S]*?)</h1>"); Match contacts = Regex.Match(strhtml, @"""linkman"":""([\s\S]*?)"""); //unicode Match tell = Regex.Match(strhtml, @"<p class='phone-num'>([\s\S]*?)<"); Match area = Regex.Match(strhtml, @"quyu'\)"">([\s\S]*?)<"); Match xiaoqu = Regex.Match(strhtml, @"xiaoqu_name'\)"">([\s\S]*?)<"); Match price = Regex.Match(strhtml, @"MinPrice':'([\s\S]*?)'"); textBox14.Text = contacts.Groups[1].Value.Trim(); this.skinDataGridView1.Rows[index].Cells[0].Value = titles.Groups[1].Value.Trim(); this.skinDataGridView1.Rows[index].Cells[1].Value = Unicode2String(contacts.Groups[1].Value.Trim()); this.skinDataGridView1.Rows[index].Cells[2].Value = tell.Groups[1].Value.Trim(); this.skinDataGridView1.Rows[index].Cells[3].Value = label14.Text + "" + area.Groups[1].Value.Trim(); this.skinDataGridView1.Rows[index].Cells[4].Value = xiaoqu.Groups[1].Value.Trim(); this.skinDataGridView1.Rows[index].Cells[5].Value = price.Groups[1].Value.Trim(); this.skinDataGridView1.Rows[index].Cells[6].Value = url; this.skinDataGridView1.CurrentCell = this.skinDataGridView1.Rows[index].Cells[0]; //让datagridview滚动到当前行 if (skinButton13.Text == "已停止") { return; } Application.DoEvents(); System.Threading.Thread.Sleep(1000); //内容获取间隔,可变量 } } } catch (System.Exception ex) { textBox3.Text = ex.ToString(); } }
public void chewei() { try { string city = label6.Text; if (city == "") { MessageBox.Show("请选择城市!"); return; } for (int i = 1; i <= page; i++) { String Url = "http://" + city + ".58.com/cheku/0/pn" + i + "/"; string html = Method.GetUrl(Url); MatchCollection TitleMatchs = Regex.Matches(html, @"<h2 class='title'>([\s\S]*?)<a href=""([\s\S]*?)""", RegexOptions.IgnoreCase | RegexOptions.Multiline); foreach (Match NextMatch in TitleMatchs) { textBox14.Text += NextMatch.Groups[2].Value + "\r\n"; textBox14.SelectionStart = textBox14.Text.Length; //设定光标位置 textBox14.ScrollToCaret(); //滚动到光标处 } string tm1 = DateTime.Now.ToString(); //获取系统时间 textBox3.Text += tm1 + "-->正在采集第" + i + "页\r\n"; textBox3.SelectionStart = textBox3.Text.Length; //设定光标位置 textBox3.ScrollToCaret(); Application.DoEvents(); System.Threading.Thread.Sleep(500); //网址获取时间间隔 固定不变 string[] lines = textBox14.Text.Split(new string[] { "\r\n" }, StringSplitOptions.None); for (int j = 0; j < lines.Length - 1; j++) { this.index = this.skinDataGridView1.Rows.Add(); String Url1 = lines[j]; string strhtml = Method.GetUrl(Url1); //定义的GetRul方法 返回 reader.ReadToEnd() string title = @"<title>([\s\S]*?)</title>"; string Rxg = @"<span class='f14 c_333 jjrsay'>([\s\S]*?)</span>"; string Rxg1 = @"<p class='phone-num'>([\s\S]*?)</p>"; string Rxg2 = @"province=([\s\S]*?);"; Match titles = Regex.Match(strhtml, title); Match contacts = Regex.Match(strhtml, Rxg); Match tell = Regex.Match(strhtml, Rxg1); Match region = Regex.Match(strhtml, Rxg2); this.skinDataGridView1.Rows[index].Cells[0].Value = titles.Groups[1].Value; this.skinDataGridView1.Rows[index].Cells[1].Value = contacts.Groups[1].Value; this.skinDataGridView1.Rows[index].Cells[2].Value = tell.Groups[1].Value; this.skinDataGridView1.Rows[index].Cells[3].Value = region.Groups[1].Value; this.skinDataGridView1.Rows[index].Cells[4].Value = lines[j]; this.skinDataGridView1.Columns[2].FillWeight = 40; //设置列宽 this.skinDataGridView1.CurrentCell = this.skinDataGridView1.Rows[index].Cells[0]; //让datagridview滚动到当前行 if (skinButton13.Text == "已停止") { return; } Application.DoEvents(); System.Threading.Thread.Sleep(time); //内容获取间隔,可变量 } textBox14.Text = ""; } } catch (System.Exception ex) { textBox3.Text = ex.ToString(); //MessageBox.Show(ex.ToString()); } }
public void run() { string city = Method.Get58pinyin(comboBox1.SelectedItem.ToString()); try { if (city == "") { MessageBox.Show("请选择城市!"); return; } for (int i = 1; i < 71; i++) { String Url = "http://" + city + ".58.com/ershoufang/pn" + i + "/" + "?key=" + textBox1.Text.Trim(); string html = Method.GetUrl(Url); MatchCollection TitleMatchs = Regex.Matches(html, @"<li logr=""([\s\S]*?)_([\s\S]*?)_([\s\S]*?)_([\s\S]*?)_", RegexOptions.IgnoreCase | RegexOptions.Multiline); ArrayList lists = new ArrayList(); foreach (Match NextMatch in TitleMatchs) { lists.Add("http://" + city + ".58.com/ershoufang/" + NextMatch.Groups[4].Value + "x.shtml"); //滚动到光标处 } if (lists.Count < 0) { return; } for (int j = 0; j < lists.Count; j++) { this.index = this.dataGridView1.Rows.Add(); String Url1 = lists[j].ToString(); // str = str.Substring(str.Length - i) 从右边开始取i个字符 string Url2 = "http://m.58.com/" + city + "/ershoufang/" + Url1.Substring(Url1.Length - 21); //获取二手房手机端的网址 textBox2.Text = Url2; string strhtml = Method.GetUrl(Url1); //定义的GetRul方法 返回 reader.ReadToEnd() string strhtml2 = Method.GetUrl(Url2); //请求手机端网址 string rxg = @"<h1 class=""c_333 f20"">([\s\S]*?)</h1>"; //标题 string Rxg = @"<h2 class=""agent-title"">([\s\S]*?)</h2>"; //手机端正则匹配联系人 string Rxg1 = @"<p class='phone-num'>([\s\S]*?)<"; //电话 Match titles = Regex.Match(strhtml, rxg); Match contacts = Regex.Match(strhtml2, Rxg); //手机端正则匹配联系人 Match tell = Regex.Match(strhtml, Rxg1); this.dataGridView1.Rows[index].Cells[0].Value = titles.Groups[1].Value; this.dataGridView1.Rows[index].Cells[1].Value = contacts.Groups[1].Value; this.dataGridView1.Rows[index].Cells[2].Value = tell.Groups[1].Value; this.dataGridView1.CurrentCell = this.dataGridView1.Rows[index].Cells[0]; while (this.zanting == false) { Application.DoEvents();//如果loader是false表明正在加载,,则Application.DoEvents()意思就是处理其他消息。阻止当前的队列继续执行。 } if (this.status == false) { return; } Application.DoEvents(); System.Threading.Thread.Sleep(1000); //内容获取间隔,可变量 } } } catch (System.Exception ex) { MessageBox.Show(ex.ToString()); } }
public void gaode() { try { string[] citys = textBox1.Text.Trim().Split(new string[] { "\r\n" }, StringSplitOptions.None); string[] keywords = textBox2.Text.Trim().Split(new string[] { "\r\n" }, StringSplitOptions.None); int pages = 200; foreach (string city in citys) { int citycode = gaodeCityId(city); foreach (string keyword in keywords) { string keywordutf8 = System.Web.HttpUtility.UrlEncode(keyword, System.Text.Encoding.GetEncoding("utf-8")); for (int i = 0; i <= pages; i++) { String Url = "http://m.amap.com/service/poi/keywords.json?pagenum=" + i + "&user_loc=undefined&geoobj=&city=" + citycode + "&keywords=" + keywordutf8; string html = Method.GetUrl(Url); MatchCollection TitleMatchs = Regex.Matches(html, @"""diner_flag"":([\s\S]*?),""id"":""([\s\S]*?)""", RegexOptions.IgnoreCase); ArrayList lists = new ArrayList(); foreach (Match NextMatch in TitleMatchs) { lists.Add(NextMatch.Groups[2].Value); } if (lists.Count == 0) //当前页没有网址数据跳过之后的网址采集,进行下个foreach采集 { break; } string tm1 = DateTime.Now.ToString(); //获取系统时间 textBox3.Text += tm1 + "-->正在采集" + city + "" + keyword + "第" + i + "页\r\n"; Application.DoEvents(); Thread.Sleep(100); foreach (string poid in lists) { int index = this.dataGridView1.Rows.Add(); string Url1 = "https://www.amap.com/detail/" + poid + "?citycode=" + citycode; string strhtml = Method.GetUrl(Url1); string title = @"<h4 class=""detail_title"">([\s\S]*?)</h4>"; string Rxg = @"""telephone"":""([\s\S]*?)"""; string Rxg1 = @"""address"":""([\s\S]*?)"""; MatchCollection titles = Regex.Matches(strhtml, title); MatchCollection tell = Regex.Matches(strhtml, Rxg); MatchCollection address = Regex.Matches(strhtml, Rxg1); if (visualButton2.Text == "已停止") { return; } foreach (Match match in titles) { this.dataGridView1.Rows[index].Cells[0].Value = index; this.dataGridView1.Rows[index].Cells[1].Value = match.Groups[1].Value; this.dataGridView1.CurrentCell = this.dataGridView1.Rows[index].Cells[0]; } foreach (Match match in tell) { this.dataGridView1.Rows[index].Cells[2].Value += match.Groups[1].Value.Trim() + ","; } foreach (Match match in address) { this.dataGridView1.Rows[index].Cells[3].Value = match.Groups[1].Value; } this.dataGridView1.Rows[index].Cells[4].Value = keyword; this.dataGridView1.Rows[index].Cells[5].Value = city; Application.DoEvents(); Thread.Sleep(500); //内容获取间隔,可变量 } } } } } catch (System.Exception ex) { textBox3.Text = ex.ToString(); } }
public void tengxun() { try { string[] citys = textBox1.Text.Trim().Split(new string[] { "\r\n" }, StringSplitOptions.None); string[] keywords = textBox2.Text.Trim().Split(new string[] { "\r\n" }, StringSplitOptions.None); int pages = 250; foreach (string city in citys) { string cityutf8 = System.Web.HttpUtility.UrlEncode(city, System.Text.Encoding.GetEncoding("utf-8")); foreach (string keyword in keywords) { string keywordutf8 = System.Web.HttpUtility.UrlEncode(keyword, System.Text.Encoding.GetEncoding("utf-8")); for (int i = 0; i <= pages; i++) { String Url = "http://map.qq.com/m/place/result/city=" + cityutf8 + "&word=" + keywordutf8 + "&bound=&page=" + i + "&cpos=&mode=list"; textBox3.Text = Url; string html = Method.GetUrl(Url); MatchCollection TitleMatchs = Regex.Matches(html, @"poid=([\s\S]*?)""", RegexOptions.IgnoreCase); ArrayList lists = new ArrayList(); foreach (Match NextMatch in TitleMatchs) { lists.Add(NextMatch.Groups[1].Value); } if (lists.Count == 0) //当前页没有网址数据跳过之后的网址采集,进行下个foreach采集 { break; } string tm1 = DateTime.Now.ToString(); //获取系统时间 textBox3.Text += tm1 + "-->正在采集" + city + "" + keyword + "第" + i + "页\r\n"; Application.DoEvents(); Thread.Sleep(100); foreach (string poid in lists) { int index = this.dataGridView1.Rows.Add(); string Url1 = "http://map.qq.com/m/detail/poi/poid=" + poid; string strhtml = Method.GetUrl(Url1); string title = @"<div class=""poiDetailTitle "">([\s\S]*?)</div>"; string Rxg = @"<a href=""tel:([\s\S]*?)"""; string Rxg1 = @"span class=""poiDetailAddrTxt"">([\s\S]*?)</span>"; Match titles = Regex.Match(strhtml, title); Match tell = Regex.Match(strhtml, Rxg); Match address = Regex.Match(strhtml, Rxg1); if (visualButton2.Text == "已停止") { return; } if (tell.Groups[1].Value.Trim() != "") { this.dataGridView1.Rows[index].Cells[0].Value = index; this.dataGridView1.Rows[index].Cells[1].Value = titles.Groups[1].Value; this.dataGridView1.CurrentCell = this.dataGridView1.Rows[index].Cells[0]; this.dataGridView1.Rows[index].Cells[2].Value += tell.Groups[1].Value.Trim() + ","; this.dataGridView1.Rows[index].Cells[3].Value = address.Groups[1].Value; this.dataGridView1.Rows[index].Cells[4].Value = keyword; this.dataGridView1.Rows[index].Cells[5].Value = city; } Application.DoEvents(); Thread.Sleep(500); //内容获取间隔,可变量 } } } } } catch (System.Exception ex) { textBox3.Text = ex.ToString(); } }
public void txMap() { try { string[] citys = textBox1.Text.Trim().Split(new string[] { "\r\n" }, StringSplitOptions.None); string[] keywords = textBox2.Text.Trim().Split(new string[] { "\r\n" }, StringSplitOptions.None); int pages = 200; foreach (string city in citys) { int cityid = getcityId(city + "市"); //获取 citycode; foreach (string keyword in keywords) { for (int i = 0; i <= pages; i++) { String Url = "https://map.baidu.com/?newmap=1&reqflag=pcmap&biz=1&from=webmap&da_par=direct&pcevaname=pc4.1&qt=con&from=webmap&c=" + cityid + "&wd=" + keyword + "&wd2=&pn=" + i + "&nn=" + i + "0&db=0&sug=0&addr=0&&da_src=pcmappg.poi.page&on_gel=1&src=7&gr=3&l=13&tn=B_NORMAL_MAP&u_loc=13167420,3999298&ie=utf-8"; string html = Method.GetUrl(Url); MatchCollection TitleMatchs = Regex.Matches(html, @"""primary_uid"":""([\s\S]*?)""", RegexOptions.IgnoreCase); ArrayList lists = new ArrayList(); foreach (Match NextMatch in TitleMatchs) { lists.Add(NextMatch.Groups[1].Value); } if (lists.Count == 0) //当前页没有网址数据跳过之后的网址采集,进行下个foreach采集 { break; } string tm1 = DateTime.Now.ToString(); //获取系统时间 textBox3.Text += tm1 + "-->正在采集" + city + "" + keyword + "第" + i + "页\r\n"; JsonParser jsonParser = JsonConvert.DeserializeObject <JsonParser>(html); foreach (Content content in jsonParser.Content) { int index = this.dataGridView1.Rows.Add(); this.dataGridView1.Rows[index].Cells[0].Value = index; this.dataGridView1.Rows[index].Cells[1].Value = content.name; this.dataGridView1.Rows[index].Cells[2].Value = content.tel; this.dataGridView1.Rows[index].Cells[3].Value = content.addr; this.dataGridView1.Rows[index].Cells[4].Value = keyword.Trim(); this.dataGridView1.Rows[index].Cells[5].Value = city; this.dataGridView1.CurrentCell = this.dataGridView1.Rows[index].Cells[0]; //让datagridview滚动到当前行 if (visualButton2.Text == "已停止") { return; } } Application.DoEvents(); Thread.Sleep(100); //内容获取间隔,可变量 } } } } catch (System.Exception ex) { textBox1.Text = ex.ToString(); } }
public void run() { try { string city = label6.Text; if (city == "") { MessageBox.Show("请选择城市!"); return; } string item = label1.Text; if (item == "") { MessageBox.Show("请选择分类!"); return; } int page = 50; for (int i = 1; i <= page; i++) { String Url = "http://b2b.huangye88.com/" + city + "/" + item + "/pn" + i + "/"; string html = Method.GetUrl(Url); MatchCollection TitleMatchs = Regex.Matches(html, @"<h4><a href=""([\s\S]*?)""", RegexOptions.IgnoreCase | RegexOptions.Multiline); foreach (Match NextMatch in TitleMatchs) { textBox14.Text += NextMatch.Groups[1].Value + "\r\n"; textBox14.SelectionStart = textBox14.Text.Length; //设定光标位置 textBox14.ScrollToCaret(); //滚动到光标处 } string tm1 = DateTime.Now.ToString(); //获取系统时间 textBox1.Text += tm1 + "-->正在采集第" + i + "页\r\n"; textBox1.SelectionStart = textBox1.Text.Length; //设定光标位置 textBox1.ScrollToCaret(); Application.DoEvents(); System.Threading.Thread.Sleep(500); //网址获取时间间隔 固定不变 string[] lines = textBox14.Text.Split(new string[] { "\r\n" }, StringSplitOptions.None); for (int j = 0; j < lines.Length - 1; j++) { int index = this.skinDataGridView1.Rows.Add(); String Url1 = lines[j]; string strhtml = Method.GetUrl(Url1); //定义的GetRul方法 返回 reader.ReadToEnd() string rxg = @"<dl class=""bottom"">([\s\S]*?)名称:([\s\S]*?)<"; //公司 string Rxg = @"<dl class=""bottom"">([\s\S]*?)手机:([\s\S]*?)<"; //电话 string Rxg1 = @"联系人:([\s\S]*?)rel=""nofollow"">([\s\S]*?)</a>"; //联系人 string Rxg2 = @"<meta name=""Description"" content=""([\s\S]*?)"""; //介绍 MatchCollection company = Regex.Matches(strhtml, rxg); MatchCollection tel = Regex.Matches(strhtml, Rxg); MatchCollection contacts = Regex.Matches(strhtml, Rxg1); MatchCollection introduction = Regex.Matches(strhtml, Rxg2); foreach (Match NextMatch in company) { this.skinDataGridView1.Rows[index].Cells[0].Value = NextMatch.Groups[2].Value; this.skinDataGridView1.CurrentCell = this.skinDataGridView1.Rows[index].Cells[0]; //让datagridview滚动到当前行 } foreach (Match NextMatch in tel) { this.skinDataGridView1.Rows[index].Cells[1].Value = NextMatch.Groups[2].Value; } foreach (Match NextMatch in contacts) { this.skinDataGridView1.Rows[index].Cells[2].Value = NextMatch.Groups[2].Value; } foreach (Match NextMatch in introduction) { this.skinDataGridView1.Rows[index].Cells[3].Value = NextMatch.Groups[1].Value; } if (skinButton2.Text == "已停止") { skinButton1.Enabled = false; return; } Application.DoEvents(); System.Threading.Thread.Sleep(Convert.ToInt32(500)); //内容获取间隔,可变量 } textBox14.Text = ""; } } catch (System.Exception ex) { MessageBox.Show(ex.ToString()); } }
//string temp = Regex.Replace(NextMatch.Groups[1].Value, "<[^>]*>", ""); //去除所有标签去除所有标签 #region 生意转让、商铺出租、商铺出售 public void shangpu(object item) { string[] headers = { "标题", "联系人", "电话", "地区", "网址" }; setDatagridview(skinDataGridView1, 5, headers); try { string city = label6.Text; if (city == "") { MessageBox.Show("请选择城市!"); return; } for (int i = 1; i <= page; i++) { String Url = "http://" + city + ".58.com/" + item.ToString() + "/0/pn" + i + "/"; string html = Method.GetUrl(Url); MatchCollection TitleMatchs = Regex.Matches(html, @"<li logr=""([\s\S]*?)_0_([\s\S]*?)_([\s\S]*?)_", RegexOptions.IgnoreCase | RegexOptions.Multiline); foreach (Match NextMatch in TitleMatchs) { textBox14.Text += "https://" + city + ".58.com/shangpu/" + NextMatch.Groups[3].Value + "x.shtml" + "\r\n"; textBox14.SelectionStart = textBox14.Text.Length; //设定光标位置 textBox14.ScrollToCaret(); //滚动到光标处 } string tm1 = DateTime.Now.ToString(); //获取系统时间 textBox3.Text += tm1 + "-->正在采集第" + i + "页\r\n"; textBox3.SelectionStart = textBox3.Text.Length; //设定光标位置 textBox3.ScrollToCaret(); Application.DoEvents(); Thread.Sleep(500); //网址获取时间间隔 固定不变 string[] lines = textBox14.Text.Split(new string[] { "\r\n" }, StringSplitOptions.None); for (int j = 0; j < lines.Length - 1; j++) { this.index = this.skinDataGridView1.Rows.Add(); String Url1 = lines[j]; string strhtml = Method.GetUrl(Url1); //定义的GetRul方法 返回 reader.ReadToEnd() string title = @"<h1 class=""c_000 f20"">([\s\S]*?)</h1>"; string Rxg = @"<span class='f14 c_333 jjrsay'>([\s\S]*?)</span>"; string Rxg1 = @"<p class='phone-num'>([\s\S]*?)</p>"; string Rxg2 = @"详细地址:</span>([\s\S]*?)</span>"; Match titles = Regex.Match(strhtml, title); Match contacts = Regex.Match(strhtml, Rxg); Match tell = Regex.Match(strhtml, Rxg1); Match region = Regex.Match(strhtml, Rxg2); this.skinDataGridView1.Rows[index].Cells[0].Value = titles.Groups[1].Value.Trim(); this.skinDataGridView1.Rows[index].Cells[1].Value = contacts.Groups[1].Value.Trim(); this.skinDataGridView1.Rows[index].Cells[2].Value = tell.Groups[1].Value; string temp = Regex.Replace(region.Groups[1].Value, "<[^>]*>", ""); this.skinDataGridView1.Rows[index].Cells[3].Value = temp.Trim().Replace(" ", "").Replace(" ", ""); this.skinDataGridView1.Rows[index].Cells[4].Value = lines[j]; this.skinDataGridView1.CurrentCell = this.skinDataGridView1.Rows[index].Cells[0]; //让datagridview滚动到当前行 if (skinButton13.Text == "已停止") { return; } Application.DoEvents(); Thread.Sleep(time); //内容获取间隔,可变量 } textBox14.Text = ""; } } catch (System.Exception ex) { ex.ToString(); } }
public void ershoufang() { try { string[] headers = { "标题", "联系人", "电话", "地区", "小区", "面积", "价格", "网址" }; setDatagridview(skinDataGridView1, 8, headers); string city = label6.Text; if (city == "") { MessageBox.Show("请选择城市!"); return; } for (int i = 1; i <= page; i++) { String Url = "http://" + city + ".58.com/ershoufang/0/pn" + i + "/"; string html = Method.GetUrl(Url); MatchCollection TitleMatchs = Regex.Matches(html, @"<li logr=""([\s\S]*?)_0_([\s\S]*?)_([\s\S]*?)_", RegexOptions.IgnoreCase | RegexOptions.Multiline); foreach (Match NextMatch in TitleMatchs) { textBox14.Text += "http://" + city + ".58.com/ershoufang/" + NextMatch.Groups[3].Value + "x.shtml" + "\r\n"; textBox14.SelectionStart = textBox14.Text.Length; //设定光标位置 textBox14.ScrollToCaret(); //滚动到光标处 } string tm1 = DateTime.Now.ToString(); //获取系统时间 textBox3.Text += tm1 + "-->正在采集第" + i + "页\r\n"; textBox3.SelectionStart = textBox3.Text.Length; //设定光标位置 textBox3.ScrollToCaret(); string[] lines = textBox14.Text.Split(new string[] { "\r\n" }, StringSplitOptions.None); for (int j = 0; j < lines.Length - 1; j++) { this.index = this.skinDataGridView1.Rows.Add(); String Url1 = lines[j]; // str = str.Substring(str.Length - i) 从右边开始取i个字符 string Url2 = "http://m.58.com/" + city + "/ershoufang/" + Url1.Substring(Url1.Length - 21); //获取二手房手机端的网址 string strhtml = Method.GetUrl(Url1); //定义的GetRul方法 返回 reader.ReadToEnd() string strhtml2 = Method.GetUrl(Url2); //请求手机端网址 string title = @"<h1 class=""c_333 f20"">([\s\S]*?)</h1>"; //标题 string Rxg = @"<h2 class=""agent-title"">([\s\S]*?)</h2>"; //手机端正则匹配联系人 string Rxg1 = @"<p class='phone-num'>([\s\S]*?)<"; //电话 string Rxg2 = @"<li class=""address-info"">([\s\S]*?) -"; //手机端地区 string Rxg3 = @"小区:([\s\S]*?)</h2>"; //手机端小区 string Rxg4 = @"面积</p>([\s\S]*?)</p>"; //手机端面积去除标签 string Rxg5 = @"售价</p>([\s\S]*?)</p>"; //手机端售价去除标签 Match titles = Regex.Match(strhtml, title); Match contacts = Regex.Match(strhtml2, Rxg); //手机端正则匹配联系人 Match tell = Regex.Match(strhtml, Rxg1); Match area = Regex.Match(strhtml2, Rxg2); Match xiaoqu = Regex.Match(strhtml2, Rxg3); Match mianji = Regex.Match(strhtml2, Rxg4); Match price = Regex.Match(strhtml2, Rxg5); this.skinDataGridView1.Rows[index].Cells[0].Value = titles.Groups[1].Value; this.skinDataGridView1.Rows[index].Cells[1].Value = contacts.Groups[1].Value; this.skinDataGridView1.Rows[index].Cells[2].Value = tell.Groups[1].Value; this.skinDataGridView1.Rows[index].Cells[3].Value = label14.Text + "" + area.Groups[1].Value; this.skinDataGridView1.Rows[index].Cells[4].Value = xiaoqu.Groups[1].Value; string temp = Regex.Replace(mianji.Groups[1].Value, "<[^>]*>", ""); this.skinDataGridView1.Rows[index].Cells[5].Value = temp.Trim(); string temp1 = Regex.Replace(price.Groups[1].Value, "<[^>]*>", ""); this.skinDataGridView1.Rows[index].Cells[6].Value = temp1.Trim(); this.skinDataGridView1.Rows[index].Cells[7].Value = lines[j]; this.skinDataGridView1.CurrentCell = this.skinDataGridView1.Rows[index].Cells[0]; //让datagridview滚动到当前行 if (skinButton13.Text == "已停止") { return; } Application.DoEvents(); System.Threading.Thread.Sleep(time); //内容获取间隔,可变量 } textBox14.Text = ""; } } catch (System.Exception ex) { textBox3.Text = ex.ToString(); } }
//string temp = Regex.Replace(NextMatch.Groups[1].Value, "<[^>]*>", ""); //去除所有标签去除所有标签 #region 生意转让、商铺出租、商铺出售 public void shangpu(object item) { string[] headers = { "标题", "联系人", "电话", "地区", "网址", "面积" }; setDatagridview(skinDataGridView1, 6, headers); try { string city = label6.Text; if (city == "") { MessageBox.Show("请选择城市!"); return; } for (int i = 1; i <= page; i++) { String Url = "https://" + city + ".58.com/" + item.ToString() + "/0/pn" + i + "/"; string html = Method.GetUrl(Url); MatchCollection TitleMatchs = Regex.Matches(html, @"https://[a-z]+.58.com/[a-z]+/[0-9]+x.shtml", RegexOptions.IgnoreCase | RegexOptions.Multiline); ArrayList lists = new ArrayList(); foreach (Match NextMatch in TitleMatchs) { if (!lists.Contains(NextMatch.Groups[0].Value)) { lists.Add(NextMatch.Groups[0].Value); } } string tm1 = DateTime.Now.ToString(); //获取系统时间 textBox3.Text += tm1 + "-->正在采集第" + i + "页\r\n"; textBox3.SelectionStart = textBox3.Text.Length; //设定光标位置 textBox3.ScrollToCaret(); Application.DoEvents(); Thread.Sleep(500); //网址获取时间间隔 固定不变 foreach (string list in lists) { this.index = this.skinDataGridView1.Rows.Add(); String Url1 = list; string strhtml = Method.GetUrl(Url1); //定义的GetRul方法 返回 reader.ReadToEnd() Match titles = Regex.Match(strhtml, @"<title>([\s\S]*?)-"); Match contacts = Regex.Match(strhtml, @"<p class=""poster-name"">([\s\S]*?)</p>"); Match tell = Regex.Match(strhtml, @"""phone"":""([\s\S]*?)"""); Match region = Regex.Match(strhtml, @"域:</span>([\s\S]*?)</p>"); Match mianji = Regex.Match(strhtml, @"<span class=""up"">([\s\S]*?)</span>"); this.skinDataGridView1.Rows[index].Cells[0].Value = titles.Groups[1].Value.Trim(); this.skinDataGridView1.Rows[index].Cells[1].Value = contacts.Groups[1].Value.Trim(); this.skinDataGridView1.Rows[index].Cells[2].Value = tell.Groups[1].Value; string temp = Regex.Replace(region.Groups[1].Value, "<[^>]*>", ""); this.skinDataGridView1.Rows[index].Cells[3].Value = temp.Trim().Replace(" ", "").Replace(" ", ""); this.skinDataGridView1.Rows[index].Cells[4].Value = list; this.skinDataGridView1.Rows[index].Cells[5].Value = mianji.Groups[1].Value.Trim(); this.skinDataGridView1.CurrentCell = this.skinDataGridView1.Rows[index].Cells[0]; //让datagridview滚动到当前行 if (skinButton13.Text == "已停止") { return; } Application.DoEvents(); Thread.Sleep(time); //内容获取间隔,可变量 } textBox14.Text = ""; } } catch (System.Exception ex) { ex.ToString(); } }
public void map_360() { try { string[] citys = textBox1.Text.Trim().Split(new string[] { "\r\n" }, StringSplitOptions.None); string[] keywords = textBox2.Text.Trim().Split(new string[] { "\r\n" }, StringSplitOptions.None); int pages = 100; foreach (string city in citys) { string cityutf8 = System.Web.HttpUtility.UrlEncode(city, System.Text.Encoding.GetEncoding("utf-8")); foreach (string keyword in keywords) { string keywordutf8 = System.Web.HttpUtility.UrlEncode(keyword, System.Text.Encoding.GetEncoding("utf-8")); for (int i = 1; i <= pages; i++) { String Url = "https://ditu.so.com/app/pit?jsoncallback=jQuery18308131636402501483_1525852464213&keyword=" + keywordutf8 + "&cityname=" + cityutf8 + "&batch=" + i + "%2c" + (i + 1) + "%2c" + (i + 2) + "%2c" + (i + 3) + "%2c" + (i + 4) + "&number=10"; string html = Method.GetUrl(Url); MatchCollection TitleMatchs = Regex.Matches(html, @"""pguid"":""([\s\S]*?)""", RegexOptions.IgnoreCase); ArrayList lists = new ArrayList(); foreach (Match NextMatch in TitleMatchs) { lists.Add(NextMatch.Groups[1].Value); } if (lists.Count == 0) //当前页没有网址数据跳过之后的网址采集,进行下个foreach采集 { break; } string tm1 = DateTime.Now.ToString(); //获取系统时间 textBox3.Text += tm1 + "-->正在采集" + city + "" + keyword + "第" + i + "页\r\n"; Application.DoEvents(); Thread.Sleep(100); foreach (string poid in lists) { int index = this.dataGridView1.Rows.Add(); string Url1 = "https://m.map.so.com/onebox/?type=detail&id=" + poid + "&mso_x=&mso_y=&d=mobile&src=map_wap&fields=movies_all"; string strhtml = Method.GetUrl(Url1); string title = @"data_poi_name = ""([\s\S]*?)"""; string Rxg = @"href=""tel:([\s\S]*?)"""; string Rxg1 = @"data_poi_address = ""([\s\S]*?)"""; MatchCollection titles = Regex.Matches(strhtml, title); MatchCollection tell = Regex.Matches(strhtml, Rxg); MatchCollection address = Regex.Matches(strhtml, Rxg1); if (visualButton2.Text == "已停止") { return; } foreach (Match match in titles) { this.dataGridView1.Rows[index].Cells[0].Value = index; this.dataGridView1.Rows[index].Cells[1].Value = match.Groups[1].Value; this.dataGridView1.CurrentCell = this.dataGridView1.Rows[index].Cells[0]; } foreach (Match match in tell) { this.dataGridView1.Rows[index].Cells[2].Value += match.Groups[1].Value.Trim() + ","; } foreach (Match match in address) { this.dataGridView1.Rows[index].Cells[3].Value = match.Groups[1].Value; } this.dataGridView1.Rows[index].Cells[4].Value = keyword; this.dataGridView1.Rows[index].Cells[5].Value = city; Application.DoEvents(); Thread.Sleep(500); //内容获取间隔,可变量 } } } } } catch (System.Exception ex) { textBox3.Text = ex.ToString(); } }
public void Run() { try { int page = 100; string[] keywords = skinTextBox2.Text.Split(new string[] { "\r\n" }, StringSplitOptions.None); string city = label1.Text; if (city == "") { MessageBox.Show("请选择城市!"); return; } foreach (string keyword in keywords) { //string keywordtogb2312 = System.Web.HttpUtility.UrlEncode(keyword, System.Text.Encoding.GetEncoding("gb2312")); for (int i = 1; i <= page; i++) { string Url = "https://s.hc360.com/company/search.html?kwd=" + keyword + "&k=0&z=" + city + "&pnum=" + i; textBox14.Text = Url; //String Url = "https://s.hc360.com/?w=" + keywordtogb2312 + "&mc=enterprise&ee="+i+"&z=" + citycode; string strhtml = Method.GetUrl(Url); //定义的GetRul方法 返回 reader.ReadToEnd() string Rxg = @"<h3><a data-exposurelog=""([\s\S]*?)"" href=""([\s\S]*?)"""; MatchCollection all = Regex.Matches(strhtml, Rxg); foreach (Match NextMatch in all) { textBox14.Text += NextMatch.Groups[2].Value + "\r\n"; textBox14.SelectionStart = textBox14.Text.Length; //设定光标位置 textBox14.ScrollToCaret(); //滚动到光标处 } if (textBox14.Text == "") //当前页没有网址数据跳过之后的网址采集,进行下个foreach采集 { break; } string tm1 = DateTime.Now.ToString(); //获取系统时间 textBox1.Text += tm1 + "-->正在采集" + label1.Text + "第" + i + "页" + keyword + "\r\n"; textBox1.SelectionStart = textBox1.Text.Length; //设定光标位置 textBox1.ScrollToCaret(); Application.DoEvents(); System.Threading.Thread.Sleep(1000); string[] lines1 = textBox14.Text.Split(new string[] { "\r\n" }, StringSplitOptions.None); for (int j = 0; j < lines1.Length - 1; j++) { int index = this.skinDataGridView1.Rows.Add(); //利用dataGridView1.Rows.Add()事件为DataGridView控件增加新的行,该函数返回添加新行的索引号,即新行的行号,然后可以通过该索引号操作该行的各个单元格,如dataGridView1.Rows[index].Cells[0].Value = "1"。这是很常用也是很简单的方法。 String Url1 = lines1[j].ToString(); string strhtml1 = Method.GetUrl(Url1); //定义的GetRul方法 返回 reader.ReadToEnd() string Rxg0 = @"infoname=""([\s\S]*?)"""; string Rxg1 = @"联系人</span><span>:([\s\S]*?)</span>"; string Rxg2 = @"电话</span><span>:([\s\S]*?)</span>"; string Rxg3 = @"手机</span><span>:([\s\S]*?)</span>"; string Rxg4 = @"<p>地址:([\s\S]*?)&"; MatchCollection name = Regex.Matches(strhtml1, Rxg0); MatchCollection contacts = Regex.Matches(strhtml1, Rxg1); MatchCollection phone = Regex.Matches(strhtml1, Rxg2); MatchCollection tell = Regex.Matches(strhtml1, Rxg3); MatchCollection addr = Regex.Matches(strhtml1, Rxg4); foreach (Match NextMatch in name) { string temp = Regex.Replace(NextMatch.Groups[1].Value, "<[^>]*>", ""); //去除所有标签去除所有标签 string temp1 = Regex.Replace(temp, "\\s+", ""); //去除所有标签去除空格 this.skinDataGridView1.Rows[index].Cells[0].Value = temp1; this.skinDataGridView1.CurrentCell = this.skinDataGridView1.Rows[index].Cells[0]; //让datagridview滚动到当前行 } foreach (Match NextMatch in contacts) { string temp = Regex.Replace(NextMatch.Groups[1].Value, "<[^>]*>", ""); //去除所有标签去除所有标签 string temp1 = Regex.Replace(temp, "\\s+", ""); //去除所有标签去除空格 this.skinDataGridView1.Rows[index].Cells[1].Value = temp1; } foreach (Match NextMatch in phone) { string temp = Regex.Replace(NextMatch.Groups[1].Value, "<[^>]*>", ""); //去除所有标签去除所有标签 string temp1 = Regex.Replace(temp, "\\s+", ""); //去除所有标签去除空格 this.skinDataGridView1.Rows[index].Cells[2].Value = temp1; } foreach (Match NextMatch in tell) { string temp = Regex.Replace(NextMatch.Groups[1].Value, "<[^>]*>", ""); //去除所有标签去除所有标签 string temp1 = Regex.Replace(temp, "\\s+", ""); //去除所有标签去除空格 this.skinDataGridView1.Rows[index].Cells[3].Value = temp1; } foreach (Match NextMatch in addr) { this.skinDataGridView1.Rows[index].Cells[4].Value = NextMatch.Groups[1].Value.Trim(); } if (button3.Text == "已停止") //停止事件触发 { button2.Enabled = false; return; } Application.DoEvents(); System.Threading.Thread.Sleep(1000); } textBox14.Text = ""; } } } catch (System.Exception ex) { textBox1.Text = ex.ToString(); } }
public void baidu() { try { string[] citys = textBox1.Text.Trim().Split(new string[] { "\r\n" }, StringSplitOptions.None); string[] keywords = textBox2.Text.Trim().Split(new string[] { "\r\n" }, StringSplitOptions.None); int pages = 10; foreach (string city in citys) { int cityid = getcityId(city + "市"); //获取 citycode; foreach (string keyword in keywords) { for (int i = 0; i <= pages; i++) { int j = i - 1 > 0 ? i - 1 :0; String Url = "https://map.baidu.com/?newmap=1&reqflag=pcmap&biz=1&from=webmap&da_par=direct&pcevaname=pc4.1&qt=con&from=webmap&c=" + cityid + "&wd=" + keyword + "&wd2=&pn=" + i + "&nn=" + j + "0&db=0&sug=0&addr=0&&da_src=pcmappg.poi.page&on_gel=1&src=7&gr=3&l=13.2&auth=6GNgMOxNx%40CM2KzLPeYAvKP725L6c0z5uxHLVTTxHNNtBnlQADZZzy1uVt1GgvPUDZYOYIZuVt1cv3uztHee%40ewWvPWv3GuxtVwi04960vyACFIMOSU7ucEWe1GD8zv7u%40ZPuHt0A%3DH73uzCCyoET1jlBhlADM5ZYYDMJ7zlp55CKBvaaZyY&device_ratio=1&tn=B_NORMAL_MAP&u_loc=13167726,4000141&ie=utf-8"; string html = Method.GetUrl(Url); MatchCollection TitleMatchs = Regex.Matches(html, @"""primary_uid"":""([\s\S]*?)""", RegexOptions.IgnoreCase); ArrayList lists = new ArrayList(); foreach (Match NextMatch in TitleMatchs) { lists.Add(NextMatch.Groups[1].Value); } if (lists.Count == 0) //当前页没有网址数据跳过之后的网址采集,进行下个foreach采集 { break; } string tm1 = DateTime.Now.ToString(); //获取系统时间 textBox3.Text += tm1 + "-->正在采集" + city + "" + keyword + "第" + i + "页\r\n"; JsonParser jsonParser = JsonConvert.DeserializeObject <JsonParser>(html); foreach (Content content in jsonParser.Content) { int index = this.dataGridView1.Rows.Add(); this.dataGridView1.Rows[index].Cells[0].Value = index; this.dataGridView1.Rows[index].Cells[1].Value = content.name; this.dataGridView1.Rows[index].Cells[2].Value = content.tel; this.dataGridView1.Rows[index].Cells[3].Value = content.addr; this.dataGridView1.Rows[index].Cells[4].Value = keyword.Trim(); this.dataGridView1.Rows[index].Cells[5].Value = city; this.dataGridView1.CurrentCell = this.dataGridView1.Rows[index].Cells[0]; //让datagridview滚动到当前行 if (visualButton2.Text == "已停止") { return; } } Application.DoEvents(); Thread.Sleep(10); //内容获取间隔,可变量 } } } } catch (System.Exception ex) { ex.ToString(); } }
public void run() { try { string city = label6.Text; string area = label10.Text; if (city == "") { MessageBox.Show("请选择城市!"); return; } string item = label1.Text; string item1 = label5.Text; if (item == "") { MessageBox.Show("请选择分类!"); return; } int page = 70; for (int i = 1; i <= page; i++) { String Url = "http://" + city + ".58.com/" + area + "/" + item + "/pn" + i + "/"; string html = Method.GetUrl(Url); MatchCollection TitleMatchs = Regex.Matches(html, @"<a href=""https://qy.58.com([\s\S]*?)""", RegexOptions.IgnoreCase | RegexOptions.Multiline); ArrayList lists = new ArrayList(); foreach (Match NextMatch in TitleMatchs) { lists.Add("http://qy.m.58.com/m_detail/" + NextMatch.Groups[1].Value); } string tm1 = DateTime.Now.ToString(); //获取系统时间 label3.Text = tm1 + "-->正在采集第" + i + "页\r\n"; Application.DoEvents(); System.Threading.Thread.Sleep(500); //网址获取时间间隔 固定不变 foreach (string list in lists) { int index = this.skinDataGridView1.Rows.Add(); String Url1 = list; string strhtml = Method.GetUrl(Url1); //定义的GetRul方法 返回 reader.ReadToEnd() string rxg = @"<h1>([\s\S]*?)</h1>"; //公司 string Rxg = @"<a href=""tel:([\s\S]*?)"""; //电话 string Rxg1 = @"</span><span>([\s\S]*?)</span>"; //联系人 string Rxg2 = @"<dt>公司地址:</dt>([\s\S]*?)</dd>"; string Rxg3 = @"<div class=""retTit""><strong>([\s\S]*?)</strong>"; Match company = Regex.Match(strhtml, rxg); Match tel = Regex.Match(strhtml, Rxg); Match contacts = Regex.Match(strhtml, Rxg1); Match addr = Regex.Match(strhtml, Rxg2); Match job = Regex.Match(strhtml, Rxg3); this.skinDataGridView1.Rows[index].Cells[0].Value = company.Groups[1].Value; this.skinDataGridView1.CurrentCell = this.skinDataGridView1.Rows[index].Cells[0]; //让datagridview滚动到当前行 this.skinDataGridView1.Rows[index].Cells[1].Value = tel.Groups[1].Value; this.skinDataGridView1.Rows[index].Cells[2].Value = contacts.Groups[1].Value; string temp = Regex.Replace(addr.Groups[1].Value, "<[^>]*>", ""); //去除所有标签去除所有标签 string temp1 = Regex.Replace(temp, "\\s+", ""); //去除所有标签去除空格 this.skinDataGridView1.Rows[index].Cells[3].Value = temp1; this.skinDataGridView1.Rows[index].Cells[4].Value = job.Groups[1].Value; if (button3.Text == "已停止") { return; } this.skinDataGridView1.CurrentCell = this.skinDataGridView1.Rows[index].Cells[0]; //让datagridview滚动到当前行 Application.DoEvents(); System.Threading.Thread.Sleep(1000); //内容获取间隔,可变量 } } } catch (System.Exception ex) { label3.Text = ex.ToString(); } }
public void baidu() { try { string[] citys = textBox1.Text.Trim().Split(new string[] { "\r\n" }, StringSplitOptions.None); string[] keywords = textBox2.Text.Trim().Split(new string[] { "\r\n" }, StringSplitOptions.None); int pages = 200; foreach (string city in citys) { int cityid = getcityId(city + "市"); //获取 citycode; foreach (string keyword in keywords) { for (int i = 0; i <= pages; i++) { int j = i - 1 > 0 ? i - 1 :0; String Url = "https://map.baidu.com/?newmap=1&reqflag=pcmap&biz=1&from=webmap&da_par=direct&pcevaname=pc4.1&qt=con&from=webmap&c=" + cityid + "&wd=" + keyword + "&wd2=&pn=" + i + "&nn=" + j + "0&db=0&sug=0&addr=0&&da_src=pcmappg.poi.page&on_gel=1&src=7&gr=3&l=9.557396536572428&auth=Y%3DyW9VVIUF45OV8VGZCZb85Tzf4HRUPIuxHENxTzTzztComRB199Ay1uVt1GgvPUDZYOYIZuVt1cv3uVtGccZcuVtPWv3GuNtZComRdXmB1F234Q6W89AcEWe1GD8zv7u%40ZPuNtkB1FAprGnrFHQNWHaJ9caEmqQQDICfy9GUIsxA2wEjjg2JK&device_ratio=1&tn=B_NORMAL_MAP&u_loc=13177040,3994297&ie=utf-8&b=(12730445.232221898,4765711.534422691;13205701.20474543,4916707.941505569)&t=1547192932734"; string html = Method.GetUrl(Url); MatchCollection TitleMatchs = Regex.Matches(html, @"""primary_uid"":""([\s\S]*?)""", RegexOptions.IgnoreCase); ArrayList lists = new ArrayList(); foreach (Match NextMatch in TitleMatchs) { lists.Add(NextMatch.Groups[1].Value); } if (lists.Count == 0) //当前页没有网址数据跳过之后的网址采集,进行下个foreach采集 { break; } string tm1 = DateTime.Now.ToString(); //获取系统时间 textBox3.Text += tm1 + "-->正在采集" + city + "" + keyword + "第" + i + "页\r\n"; JsonParser jsonParser = JsonConvert.DeserializeObject <JsonParser>(html); foreach (Content content in jsonParser.Content) { int index = this.dataGridView1.Rows.Add(); this.dataGridView1.Rows[index].Cells[0].Value = index; this.dataGridView1.Rows[index].Cells[1].Value = content.name; this.dataGridView1.Rows[index].Cells[2].Value = content.tel; this.dataGridView1.Rows[index].Cells[3].Value = content.addr; this.dataGridView1.Rows[index].Cells[4].Value = keyword.Trim(); this.dataGridView1.Rows[index].Cells[5].Value = city; this.dataGridView1.CurrentCell = this.dataGridView1.Rows[index].Cells[0]; //让datagridview滚动到当前行 if (visualButton2.Text == "已停止") { return; } } Application.DoEvents(); Thread.Sleep(10); //内容获取间隔,可变量 } } } } catch (System.Exception ex) { ex.ToString(); } }