private void ReadInfo(GetPage page, string strLink, List <PackageInfo> rList) { const string strUrl = "http://www.gdgpo.gov.cn"; string strContent = page.GetPageInfo(strLink); string[] strRegs = { "title=\"[^\"]*", //项目名称 "<em>.*<", //时间 page.GetValidKey, "/showNotice/id[^\"]*", //详情url地址(序号) "项目编号[^<]*</span>", //编号 "预算金额(元)[^(span)]*span", //价格 "受.*的委托", //学校 }; Regex[] rUrl = new Regex[strRegs.Length]; for (int i = 0; i < strRegs.Length; i++) { rUrl[i] = new Regex(strRegs[i]); } MatchCollection mList = rUrl[0].Matches(strContent); for (int i = 0; i < mList.Count; i++) { if (rUrl[2].Matches(mList[i].Value).Count == 0) {//没发现 continue; } try { PackageInfo p = new PackageInfo(); p.Name = mList[i].Value.Substring(7); p.Time = rUrl[1].Matches(strContent)[i].Value.Substring(4); p.Time = p.Time.Substring(0, p.Time.Length - 1); p.Index = rUrl[3].Matches(strContent)[i].Value; p.Url = strUrl; string strDetail = page.GetPageInfo(strUrl + p.Index); if (strDetail.Length > 0) { p.Number = rUrl[4].Match(strDetail).Value.Substring(5); p.Number = p.Number.Substring(0, p.Number.IndexOf('<')); p.Price = rUrl[5].Match(strDetail).Value.Substring(8); p.Price = p.Price.Substring(0, p.Price.IndexOf('<')); p.Seller = rUrl[6].Match(strDetail).Value; } rList.Add(p); } catch (Exception e) { new SaveInfo().WriteError(e); } } }
public List <PackageInfo> GetPackage() { const string strUrl = "http://www.gdedulscg.cn/"; GetPage page = new GetPage(); string strUrlLink = "http://www.gdedulscg.cn/home/bill/billlist"; string strContent = page.GetPageInfo(strUrlLink); string[] strRegs = { "list_title_num_data.*</div>", //编号 "list_title_unit_data.*</div>", //学校(采购单位) "<div.*list_title_theme_data.*</div>", //项目名称 "list_title_high_data.*</div>", //学校报价 "list_title_time_data.*</div>", //发布时间 page.GetValidKey, "=\"[^\"]*\"", //项目名称 - 具体文字 ">.*</div>", //编号,价格,学校,时间 "see_info.*;", //序号 }; List <PackageInfo> rList = new List <PackageInfo>(); Regex[] rUrl = new Regex[strRegs.Length]; for (int i = 0; i < strRegs.Length; i++) { rUrl[i] = new Regex(strRegs[i]); } MatchCollection mList = rUrl[2].Matches(strContent); for (int i = 0; i < mList.Count; i++) { if (rUrl[5].Matches(mList[i].Value).Count == 0) {//没发现 continue; } try { rList.Insert(0, new PackageInfo() { Name = rUrl[6].Matches(mList[i].Value)[0].Value.Substring(2), Number = rUrl[7].Matches(rUrl[0].Matches(strContent)[i].Value)[0].Value.Substring(1), Index = rUrl[8].Matches(mList[i].Value)[0].Value.Substring(8), Price = rUrl[7].Matches(rUrl[3].Matches(strContent)[i].Value)[0].Value.Substring(1), Seller = rUrl[7].Matches(rUrl[1].Matches(strContent)[i].Value)[0].Value.Substring(1), Time = rUrl[7].Matches(rUrl[4].Matches(strContent)[i].Value)[0].Value.Substring(1), Url = strUrl }); } catch (Exception e) { new SaveInfo().WriteError(e); } } return(rList); }
public List <PackageInfo> GetPackage() { GetPage page = new GetPage(); string[] strUrlLink = { "http://www.gdgpo.gov.cn/queryMoreCityCountyInfoList2.do", "http://www.gdgpo.gov.cn/queryMoreCityCountyInfoList2/channelCode/00051.html" }; List <PackageInfo> rList = new List <PackageInfo>(); foreach (string str in strUrlLink) { this.ReadInfo(page, str, rList); } return(rList); }
private string GetNewsContent() { GetPage page = new GetPage(); string strUrlLink = "https://search.cctv.com/ifsearch.php?page=1&qtext=%E6%96%B0%E9%97%BB%E8%81%94%E6%92%AD&sort=relevance&pageSize=20&type=video&vtime=-1&datepid=1&channel=&pageflag=0&qtext_str=%E6%96%B0%E9%97%BB%E8%81%94%E6%92%AD"; string strContent = page.GetPageInfo(strUrlLink); string strRegex = DateTime.Today.ToString("yyyyMMdd") + ".*http[^(\")]*shtml.*" + DateTime.Today.ToString("yyyy-MM-dd"); Regex rDateUrl = new Regex(strRegex); strContent = rDateUrl.Match(strContent).Value; if (strContent.Equals(string.Empty)) { return(string.Empty); } strRegex = "http[^(\")]*shtml"; Regex rUrl = new Regex(strRegex); strContent = rUrl.Match(strContent).Value; strContent = page.GetPageInfo(strContent.Replace("\\/", "/")); //有时内容会加换行导致p的正则无法匹配 //Regex rContent = new Regex("<p>.*</p>"); //strContent = rContent.Matches(strContent)[2].Value; //int strStart = strContent.IndexOf("本期节目"); //strContent = strContent.Substring(strStart); //strContent = strContent.Substring(0, strContent.Length - 4); int strStart = strContent.IndexOf("本期节目主要内容"); int strEnd = strContent.IndexOf("(《新闻联播》"); if (strStart > 0 && strEnd > strStart) { strContent = strContent.Substring(strStart, strEnd - strStart); } else { strContent = "内容解析错误"; } return(strContent); }