private void GetData(WeatherItemFF index) { try { string urlPat = @"((ht|f)tps?):\/\/([\w\-]+(\.[\w\-]+)*\/)*[\w\-]+(\.[\w\-]+)*\/?(\?([\w\-\.,@?^=%&:\/~\+#]*)+)?"; Regex urlPg = new Regex(urlPat, RegexOptions.Multiline); //this.dgvGetList.DataSource = ds.Tables[0]; //DataTable urldt = ds.Tables[0]; //int count = 0; int indexM = 0; int indexD = 0; string ret = ""; if (urlPg.IsMatch(index.Url)) { HttpWebRequest request = WebRequest.Create(new Uri(index.Url)) as HttpWebRequest; request.Method = "GET"; request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36"; HttpWebResponse response = request.GetResponse() as HttpWebResponse; if (response.StatusCode == HttpStatusCode.OK) { StreamReader sr2 = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("utf-8")); string text2 = sr2.ReadToEnd(); string patten = @"<div[\s]*class=\""table_day(.|\s)*?\"">(.|\s)*?</div>"; Regex rg = new Regex(patten, RegexOptions.Multiline); MatchCollection mchisList = rg.Matches(text2); string datePat = @"<h3><b>(.|\s)*?</b>"; Regex dateRg = new Regex(datePat, RegexOptions.Multiline); string tempPat = @"<li class=\""temp\"">(.|\s)*?<\/li>"; Regex tempRg = new Regex(tempPat, RegexOptions.Multiline); string weatherPat = @"[\u4e00-\u9fa5]+"; Regex weatherRg = new Regex(weatherPat, RegexOptions.Singleline); string num = @"(\-|\+)?\d+(\.\d+)?"; Regex numRg = new Regex(num, RegexOptions.Multiline); //MessageBox.Show(numRg.Match("123").Value); this.lbxGetList.Items.Add(string.Format("{0},{1},{2},{3}", index.CityName, index.CityShortName, index.CityName_EN, index.Url)); this.lbxGetList.TopIndex = this.lbxGetList.Items.Count - 1; if (mchisList != null && mchisList.Count > 0) { for (int i = 0; i < mchisList.Count; i++) { try { WeatherData item = new WeatherData(); ret = dateRg.Match(dateRg.Match(mchisList[i].Value).Value).Value; indexM = ret.IndexOf("月"); indexD = ret.IndexOf("日"); item.DateKey = DateTime.Now.AddDays(i).Year.ToString() + ret.Substring(indexM - 2, 2) + ret.Substring(indexD - 2, 2); item.CityName = index.CityName; item.Weather = weatherRg.Match(tempRg.Match(mchisList[i].Value).Value).Value; item.MinTemp = "0"; item.MaxTemp = "0"; if (numRg.Matches(tempRg.Match(mchisList[i].Value).Value).Count == 1) { item.MinTemp = numRg.Matches(tempRg.Match(mchisList[i].Value).Value)[0].Value; } if (numRg.Matches(tempRg.Match(mchisList[i].Value).Value).Count == 2) { item.MinTemp = numRg.Matches(tempRg.Match(mchisList[i].Value).Value)[0].Value; item.MaxTemp = numRg.Matches(tempRg.Match(mchisList[i].Value).Value)[1].Value; } weatherList.Add(item); } catch (Exception ex) { logger.Error(index.CityName + "-" + ex.Message); } } } } } else { this.lbxErrorCity.Items.Add(string.Format("抓取[{0}]的网址是错误", index.CityName)); this.lbxErrorCity.TopIndex = this.lbxErrorCity.Items.Count - 1; } } catch (Exception ex) { logger.Error("抓取[" + index.CityName + "]数据发生错误," + index.Url, ex); this.lbxErrorCity.Items.Add(string.Format("抓取[{0}]数据发生错误", index.CityName)); this.lbxErrorCity.TopIndex = this.lbxErrorCity.Items.Count - 1; } Thread.Sleep(1000); }
private void GetData(WeatherItem index) { //this.lbxGetList.Items.Add(string.Format("{0},{1},{2},{3},{4}", index.CityName, index.DateKey, index.CityShortName, index.CityName_EN, index.Url)); try { string urlPat = @"((ht|f)tps?):\/\/([\w\-]+(\.[\w\-]+)*\/)*[\w\-]+(\.[\w\-]+)*\/?(\?([\w\-\.,@?^=%&:\/~\+#]*)+)?"; Regex urlPg = new Regex(urlPat, RegexOptions.Multiline); //this.dgvGetList.DataSource = ds.Tables[0]; //DataTable urldt = ds.Tables[0]; //int count = 0; if (urlPg.IsMatch(index.Url)) { HttpWebRequest request = WebRequest.Create(new Uri(index.Url)) as HttpWebRequest; request.Method = "GET"; request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36"; HttpWebResponse response = request.GetResponse() as HttpWebResponse; if (response.StatusCode == HttpStatusCode.OK) { StreamReader sr2 = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("GB2312")); string text2 = sr2.ReadToEnd(); string patten = @"<div\s*class=\""tqtongji2\"">(.|\s)*?</div>"; Regex rg = new Regex(patten, RegexOptions.Multiline); //this.textBox1.Text = rg.Match(text2).Value; string pat2 = @"<ul>(.|\s)*?</ul>"; Regex rg2 = new Regex(pat2, RegexOptions.Multiline); MatchCollection mchisList = rg2.Matches(rg.Match(text2).Value); string datePat = @"<a[\s\S]*href=\""[\s\S]*\"">(.|\s)*?</a>"; Regex dateRg = new Regex(datePat, RegexOptions.Multiline); string datePat2 = @">(.|\s)*?<"; Regex dateRg2 = new Regex(datePat2, RegexOptions.Multiline); string tempPat = @"<li>(\-|\+)?\d+(\.\d+)?</li>"; Regex tempRg = new Regex(tempPat, RegexOptions.Multiline); string weatherPat = @"<li>[\u4e00-\u9fa5]*</li>"; Regex weatherRg = new Regex(weatherPat, RegexOptions.Multiline); string chinesePat = @"[\u4e00-\u9fa5]+"; Regex chineseRg = new Regex(chinesePat, RegexOptions.Multiline); string num = @"(\-|\+)?\d+(\.\d+)?"; Regex numRg = new Regex(num, RegexOptions.Multiline); //MessageBox.Show(numRg.Match("123").Value); this.lbxGetList.Items.Add(string.Format("{0},{1},{2},{3},{4}", index.CityName, index.DateKey, index.CityShortName, index.CityName_EN, index.Url)); this.lbxGetList.TopIndex = this.lbxGetList.Items.Count - 1; if (mchisList != null && mchisList.Count > 0) { for (int i = 0; i < mchisList.Count; i++) { //DataRow dr = weatherdt.NewRow(); if (tempRg.Matches(mchisList[i].Value).Count >= 2) { WeatherData item = new WeatherData(); item.DateKey = dateRg2.Match(dateRg.Match(mchisList[i].Value).Value).Value.Replace("-", "").Replace("<", "").Replace(">", ""); item.CityName = index.CityName; item.MaxTemp = numRg.Match(tempRg.Matches(mchisList[i].Value)[0].Value).Value; item.MinTemp = numRg.Match(tempRg.Matches(mchisList[i].Value)[1].Value).Value; if (weatherRg.Matches(mchisList[i].Value).Count > 0) { item.Weather = chineseRg.Match(weatherRg.Matches(mchisList[i].Value)[0].Value).Value; } weatherList.Add(item); } } } } } else { this.lbxErrorCity.Items.Add(string.Format("抓取[{0}]的网址是错误", index.CityName)); this.lbxErrorCity.TopIndex = this.lbxErrorCity.Items.Count - 1; } } catch (Exception ex) { logger.Error("抓取[" + index.CityName + "]数据发生错误", ex); this.lbxErrorCity.Items.Add(string.Format("抓取[{0}]、[{1}]数据发生错误", index.CityName, index.DateKey)); this.lbxErrorCity.TopIndex = this.lbxErrorCity.Items.Count - 1; } Thread.Sleep(1000); }