private void btnImportYDN_Click(object sender, EventArgs e) { try { //http://www.ydniu.com/Zx/jingcai/List_3_2_1.aspx string prefix = @"http://www.ydniu.com"; //获取url列表 String page = txtPage.Text; String tempurl = @"http://www.ydniu.com/Zx/jingcai/List_3_2_{0}.aspx"; string totalUrl = string.Format(tempurl, page); //String totalUrl = @"http://www.ydniu.com/Zx/jingcai/List_3_2_0.aspx"; string totalHtmlData = GetHtmlFromUrl(totalUrl); HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument(); document.LoadHtml(totalHtmlData); HtmlNode rootNode = document.DocumentNode; HtmlNode parentNode = rootNode.SelectSingleNode(@"//div[@class='zx_leftssq']"); HtmlNodeCollection dds = parentNode.SelectNodes("./dl/dd"); List<string> detailUrls = new List<string>(); foreach (HtmlNode dd in dds) { HtmlNode a = dd.ChildNodes[0]; String detailUrl = a.Attributes["href"].Value; detailUrls.Add(detailUrl); } List<AppYuce> yuces = new List<AppYuce>(); foreach (String detailUrl in detailUrls) { String realUrl = prefix + detailUrl; string detailHtml = GetHtmlFromUrl(realUrl); //HtmlAgilityPack.HtmlDocument detailDocument = new HtmlAgilityPack.HtmlDocument(); document.LoadHtml(detailHtml); HtmlNode detailRootNode = document.DocumentNode; //先取title,再提取xingqi,提取主队和客队 //title包含的信息太多,编号,联赛 String title = detailRootNode.SelectSingleNode(@"//title").InnerText.Trim(); Console.WriteLine(detailUrl); Console.WriteLine(title); //如果不是以“周”开头的可以不用处理 if (title.IndexOf("预测分析") == -1) { //需要加入手工处理表 AppYuceBadUrl badUrl = new AppYuceBadUrl(); badUrl.title = title; badUrl.url = realUrl; badUrl.prefix = prefix; badUrl.creator = "system"; badUrl.createtime = DateTime.Now; new AppYuceDAL().InsertBadUrl(badUrl); continue; } int firstSpaceIndex = title.IndexOf(" "); int lastSpaceIndex = title.LastIndexOf(" "); String firstP = title.Substring(0, firstSpaceIndex); String secondP = title.Substring(lastSpaceIndex); string weekday = ""; string bianhao = ""; string liansai = ""; Regex reg = new Regex(@"\d{3}"); Match m = reg.Match(firstP); if (m.Length != 0) { //编号为3位 bianhao = m.ToString(); weekday = firstP.Substring(0, 2); liansai = firstP.Substring(5); } else { Regex reg2 = new Regex(@"\d{2}"); Match m2 = reg2.Match(firstP); if (m2.Length != 0) { //编号为2位 bianhao = m2.ToString(); bianhao = "0" + bianhao; weekday = firstP.Substring(0, 2); liansai = firstP.Substring(4); } else { //没有编号 bianhao = ""; weekday = firstP.Substring(0, 2); liansai = firstP.Substring(2); } } AppYuce yuce = new AppYuce(); yuce.bianhao = bianhao; yuce.weekday = weekday; yuce.liansai = liansai; int vsIndex = secondP.ToUpper().IndexOf("VS"); yuce.kedui = secondP.Substring(vsIndex + 2, secondP.Length - vsIndex - 6); yuce.zhudui = secondP.Substring(0, vsIndex); //获取操作时间 HtmlNode node = detailRootNode.SelectSingleNode(@"//div[@class='fuzu']"); string fuzuString = node.InnerText.Trim(); string timeString = fuzuString.Substring(0, 19); DateTime publishTime = Convert.ToDateTime(timeString); DateTime bisaiTime = GetMatchTime(publishTime, weekday).Date; yuce.riqi = bisaiTime.ToString("yyyy-MM-dd"); yuce.author = GetAuthor(fuzuString); yuce.url = realUrl; yuce.title = title; yuce.operPerson = "system"; yuce.operateTime = DateTime.Now; //经检验,下面算法不适合用来获取推荐结果,只有小编蜗牛居适用 //比较准确的算法是先查找“竞彩推荐:”,再往后搜索2个字符,如果是数字即加入结果中, //没有“竞彩推荐:”,则搜索“推荐:”,然后再往后搜索2个字符,获得结果 yuce.spfresult = GetSpfResult(detailHtml); //获取推荐的结果 //int tuijianIndex = detailHtml.IndexOf("推荐:"); //int spanIndex = detailHtml.Substring(tuijianIndex).IndexOf("</span>"); //yuce.spfresult = detailHtml.Substring(tuijianIndex + 3 , spanIndex - 3); yuces.Add(yuce); } //添加预测到数据库 new AppYuceDAL().InsertAppYuceList(yuces); } catch (Exception ex) { Console.WriteLine(ex.Message); } MessageBox.Show("操作成功!"); }
private void GetYuceDetailInThread(Object param) { try { string prefix = @"http://www.ydniu.com"; string detailUrl = param.ToString(); List<AppYuce> yuces = new List<AppYuce>(); String realUrl = prefix + detailUrl; string detailHtml = GetHtmlFromUrl(realUrl); HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument(); document.LoadHtml(detailHtml); HtmlNode detailRootNode = document.DocumentNode; //先取title,再提取xingqi,提取主队和客队 //title包含的信息太多,编号,联赛 String title = detailRootNode.SelectSingleNode(@"//title").InnerText.Replace(" ", " ").Trim(); Console.WriteLine(detailUrl); Console.WriteLine(title); //如果不是以“周”开头的可以不用处理 if (title.IndexOf("预测分析") == -1) { //需要加入手工处理表 AppYuceBadUrl badUrl = new AppYuceBadUrl(); badUrl.title = title; badUrl.url = realUrl; badUrl.prefix = prefix; badUrl.creator = "system"; badUrl.createtime = DateTime.Now; new AppYuceDAL().InsertBadUrl(badUrl); return; } int firstSpaceIndex = title.IndexOf(" "); int lastSpaceIndex = title.LastIndexOf(" "); String firstP = title.Substring(0, firstSpaceIndex); String secondP = title.Substring(lastSpaceIndex); string weekday = ""; string bianhao = ""; string liansai = ""; Regex reg = new Regex(@"\d{3}"); Match m = reg.Match(firstP); if (m.Length != 0) { //编号为3位 bianhao = m.ToString(); weekday = firstP.Substring(0, 2); liansai = firstP.Substring(5); } else { Regex reg2 = new Regex(@"\d{2}"); Match m2 = reg2.Match(firstP); if (m2.Length != 0) { //编号为2位 bianhao = m2.ToString(); bianhao = "0" + bianhao; weekday = firstP.Substring(0, 2); liansai = firstP.Substring(4); } else { //没有编号 bianhao = ""; weekday = firstP.Substring(0, 2); liansai = firstP.Substring(2); } } AppYuce yuce = new AppYuce(); yuce.bianhao = bianhao; yuce.weekday = weekday; yuce.liansai = liansai; int vsIndex = secondP.ToUpper().IndexOf("VS"); yuce.kedui = secondP.Substring(vsIndex + 2, secondP.Length - vsIndex - 6); yuce.zhudui = secondP.Substring(0, vsIndex); //获取操作时间 HtmlNode node = detailRootNode.SelectSingleNode(@"//div[@class='fuzu']"); string fuzuString = node.InnerText.Trim(); string timeString = fuzuString.Substring(0, 19); DateTime publishTime = Convert.ToDateTime(timeString); DateTime bisaiTime = GetMatchTime(publishTime, weekday).Date; yuce.riqi = bisaiTime.ToString("yyyy-MM-dd"); yuce.author = GetAuthor(fuzuString); yuce.url = realUrl; yuce.title = title; yuce.operPerson = "system"; yuce.operateTime = DateTime.Now; //经检验,下面算法不适合用来获取推荐结果,只有小编蜗牛居适用 //比较准确的算法是先查找“竞彩推荐:”,再往后搜索2个字符,如果是数字即加入结果中, //没有“竞彩推荐:”,则搜索“推荐:”,然后再往后搜索2个字符,获得结果 yuce.spfresult = GetSpfResult(detailHtml); //获取推荐的结果 //int tuijianIndex = detailHtml.IndexOf("推荐:"); //int spanIndex = detailHtml.Substring(tuijianIndex).IndexOf("</span>"); //yuce.spfresult = detailHtml.Substring(tuijianIndex + 3 , spanIndex - 3); yuces.Add(yuce); //添加预测到数据库 new AppYuceDAL().InsertAppYuceList(yuces); Console.WriteLine(param.ToString()); } catch (Exception ex) { Console.WriteLine(ex.Message); } }
private void button3_Click(object sender, EventArgs e) { try { int startPage = Convert.ToInt32(txtPage.Text); int endPage = Convert.ToInt32(txtPageEnd.Text); for (int i = startPage; i <= endPage; i++) { string prefix = "http://www.ydniu.com"; String page = i.ToString(); String tempurl = @"http://www.ydniu.com/info/jczq/cpyc/{0}/"; string totalUrl = string.Format(tempurl, page); string totalHtmlData = GetHtmlFromUrl(totalUrl); HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument(); document.LoadHtml(totalHtmlData); HtmlNode rootNode = document.DocumentNode; HtmlNode parentNode = rootNode.SelectSingleNode(@"//ul[@class='zx_list']"); HtmlNodeCollection dds = parentNode.SelectNodes("./li/div/a"); List<string> detailUrls = new List<string>(); foreach (HtmlNode dd in dds) { //HtmlNode a = dd.ChildNodes[0]; String detailUrl = dd.Attributes["href"].Value; detailUrls.Add(detailUrl); } List<AppYuce> yuces = new List<AppYuce>(); String realUrl = ""; String title = ""; foreach (String detailUrl in detailUrls) { try { realUrl = prefix + detailUrl; string detailHtml = GetHtmlFromUrl(realUrl); //HtmlAgilityPack.HtmlDocument detailDocument = new HtmlAgilityPack.HtmlDocument(); document.LoadHtml(detailHtml); HtmlNode detailRootNode = document.DocumentNode; //找到title title = detailRootNode.SelectSingleNode(@"//h1[@class='title']").InnerText.Trim(); //预处理title,把空格带进来 title = title.Replace(" "," "); //如果不是以“周”开头的可以不用处理 //如果找不到VS,也是错误的url if (title.Substring(0, 1) != "周" || title.Replace("vs","VS").IndexOf("VS") == -1 || title.IndexOf("推荐分析") == -1 || title.IndexOf(" ") == -1) { //需要加入手工处理表 AppYuceBadUrl badUrl = new AppYuceBadUrl(); badUrl.title = title; badUrl.url = realUrl; badUrl.prefix = prefix; badUrl.creator = "system"; badUrl.createtime = DateTime.Now; new AppYuceDAL().InsertBadUrl(badUrl); continue; } //提取星期,编号,主队,客队数据 string xingqi = title.Substring(0, 2); string bianhao = "0" + title.Substring(2, 2); int spaceindex = title.IndexOf(" "); string liansai = title.Substring(4, spaceindex - 4); int vsIndex = title.Replace("vs","VS").IndexOf("VS"); int tuijianIndex = title.IndexOf("推荐分析"); string zhudui = title.Substring(spaceindex, vsIndex - spaceindex); string kedui = title.Substring(vsIndex + 2, tuijianIndex - vsIndex - 2); AppYuce yuce = new AppYuce(); yuce.bianhao = bianhao; yuce.weekday = xingqi; yuce.liansai = liansai; yuce.zhudui = zhudui; yuce.kedui = kedui; yuce.title = title; yuce.url = realUrl; yuce.operateTime = DateTime.Now; yuce.operPerson = "wulin"; string time = detailRootNode.SelectSingleNode("//div[@class='zx_article']/span[@class='time']").InnerText; string dateString = time.Trim().Substring(0, 10); yuce.riqi = GetRiqiFromWeekday(dateString, xingqi); //获取预测结果 HtmlNode spfNode = detailRootNode.SelectSingleNode("//div[@class='zx_article']/p/a/span"); string yucespfstring = ""; if (spfNode != null) { yucespfstring = spfNode.InnerText; } else { HtmlNode spfnode2 = detailRootNode.SelectSingleNode("//div[@class='zx_article']/p/a"); if (spfnode2 != null && spfnode2.InnerText.Contains("推荐:")) { yucespfstring = spfnode2.InnerText; } else { HtmlNode spfnode3 = detailRootNode.SelectSingleNode("//div[@class='zx_article']/p/strong/span"); if (spfnode3 != null) { yucespfstring = spfnode3.InnerText; } else { HtmlNode spfnode4 = detailRootNode.SelectSingleNode("//div[@class='zx_article']/p/span/a"); if (spfnode4 != null) { yucespfstring = spfnode4.InnerText; } else { HtmlNode spfnode5 = detailRootNode.SelectSingleNode("//div[@class='zx_article']/p/a/strong/span"); if (spfnode5 != null) { yucespfstring = spfnode5.InnerText; } else { HtmlNode spfnode6 = detailRootNode.SelectSingleNode("//div[@class='zx_article']/p/strong/strong/span"); if (spfnode6 != null) { yucespfstring = spfnode6.InnerText; } else { HtmlNode spfnode7 = detailRootNode.SelectSingleNode("//div[@class='zx_article']/blockquote/p/a/span"); if (spfnode7 != null) { yucespfstring = spfnode7.InnerText; } else { HtmlNode spfnode81 = detailRootNode.SelectSingleNode("//div[@class='zx_article']/p/span/strong/span"); if (spfnode81 != null) { yucespfstring = spfnode81.InnerText; } else { HtmlNode spfnode8 = detailRootNode.SelectSingleNode("//div[@class='zx_article']/p/span"); if (spfnode8 != null && (spfnode8.InnerText.Contains("推荐:")|| spfnode8.InnerText.Contains("推荐:"))) { yucespfstring = spfnode8.InnerText; } else { //这里需要遍历所有p节点 HtmlNodeCollection hnc = detailRootNode.SelectNodes("//div[@class='zx_article']/p"); foreach (HtmlNode node in hnc) { if (node.InnerText.Contains("竞彩推荐")) { yucespfstring = node.InnerText.Trim(); break; } } } } } } } } } } } yuce.spfrawresult = yucespfstring; //预处理数据,将(变为中文括号 yucespfstring = yucespfstring.Replace("(", "(").Replace(")", ")").Replace("/", ""); yuce.spfresult = yucespfstring.Substring(yucespfstring.IndexOf(":") + 1); yuce.rangqiushu = "0"; if (yucespfstring.IndexOf("(") != -1) { yuce.rangqiushu = yucespfstring.Substring(yucespfstring.IndexOf("(") + 1, yucespfstring.IndexOf(")") - yucespfstring.IndexOf("(") - 1); } yuces.Add(yuce); } catch(Exception ex) { AppYuceBadUrl badUrl = new AppYuceBadUrl(); badUrl.title = title; badUrl.url = realUrl; badUrl.prefix = prefix; badUrl.creator = "system"; badUrl.createtime = DateTime.Now; new AppYuceDAL().InsertBadUrl(badUrl); } } new AppYuceDAL().InsertAppYuceList(yuces); Console.WriteLine("一定牛第" + i.ToString() + "页处理完毕"); } } catch (Exception ex) { MessageBox.Show(ex.Message); } MessageBox.Show("操作成功!"); }