/// <summary> /// 获取列表 /// </summary> /// <param name="html">网页源码</param> /// <param name="contentRegex">缩小范围</param> /// <param name="nameRegex">书名正则</param> /// <param name="dateRegex">更新时间正则</param> /// <param name="countRegex">章节数正则</param> /// <param name="webRegex">来源个数正则</param> /// <param name="urlRegex">网址的正则</param> void GetList(string url, string contentRegex, string nameRegex, string dateRegex = "", string countRegex = "", string webRegex = "", string urlRegex = "") { string html = GetHtml(url); //提取列表总数据 string content = Regex.Match(html, contentRegex).Groups["content"].Value; //去除被注释的部分 content = Regex.Replace(content, @"\<!--[\s\S]+?--\>", ""); //提取li标签 MatchCollection ms = Regex.Matches(content, @"\<li\>[\s\S]+?\</li\>");// @"\<li[\s\S]+?\<a.href=""(?<url>.+?)""\>(?<name>.+?)\</a\>[\s\S]+\>\s*(?<count>[^\<\s]+)\s*\<[\s\S]+green""\>(?<date>.+?)\<[\S\s]+所有\<b\>(?<web>\d+)\</b\>个[\s\S]+?\</li\>"); foreach (Match item in ms) { //Match m = Regex.Match(item.Value, @"\<a.href="".+(?<url>md=[^&]+).+""\>(?<name>.+?)\</a\>[\s\S]+\>\s*(?<count>[^\<\s]+)\s*\<[\s\S]+green""\>(?<date>.+?)\<[\S\s]+所有\<b\>(?<web>\d+)\</b\>个"); BookData book = new BookData(); //提取书名 string tem1 = Regex.Replace(Regex.Replace(Regex.Match(item.Value, nameRegex).Groups["name"].Value, @"\s*", ""), @"\<.+?\>", ""); book.Kind = Regex.Match(tem1, @"\[(?<kind>.+?)\]").Groups["kind"].Value; //去掉分类信息 string tem2 = Regex.Replace(tem1, @"\[.+?\]", ""); //分别提取书名和作者 Match match = Regex.Match(tem2, @"(?<name>.+?)-(?<writer>[^-]+)"); book.Name = match.Groups["name"].Value; book.Writer = match.Groups["writer"].Value; //提取最近更新时间 book.DateTime = Regex.Match(item.Value, dateRegex).Groups["date"].Value; //提取书的章数 book.Count = Regex.Match(item.Value, countRegex).Groups["count"].Value; //提取来源的个数,但有些只有一个来源,需进行判断 //string web = "0"; //if (Regex.IsMatch(item.Value, webRegex)) //{ // web = Regex.Match(item.Value, webRegex).Groups["web"].Value; //} book.Web = Regex.Match(item.Value, webRegex).Groups["web"].Value; //提取网址中的 md= string md = Regex.Match(item.Value, urlRegex).Groups["url"].Value; if (md != "") { book.Url = "http://k.sogou.com/list?" + md; } BookList.Items.Add(book); } }
private void BookList_MouseDoubleClick(object sender, MouseButtonEventArgs e) { if (BookList.SelectedIndex >= 0 && BookList.SelectedIndex < BookList.Items.Count) { BookData book = BookList.SelectedItem as BookData; //选中项还原 if (book.Url != "" && book.Url != null) { //调用系统默认浏览器打开网页。 System.Diagnostics.Process.Start(book.Url); } else { //如果没有网址表明是从表单获取的数据 NameTb.Text = book.Name; } } }
/// <summary> /// 获取列表 /// </summary> /// <param name="html">网页源码</param> /// <param name="contentRegex">缩小范围</param> /// <param name="nameRegex">书名正则</param> /// <param name="dateRegex">更新时间正则</param> /// <param name="countRegex">章节数正则</param> /// <param name="webRegex">来源个数正则</param> /// <param name="urlRegex">网址的正则</param> void GetList(string url,string contentRegex,string nameRegex,string dateRegex="",string countRegex="",string webRegex="",string urlRegex="") { string html = GetHtml(url); //提取列表总数据 string content = Regex.Match(html, contentRegex).Groups["content"].Value; //去除被注释的部分 content = Regex.Replace(content, @"\<!--[\s\S]+?--\>", ""); //提取li标签 MatchCollection ms = Regex.Matches(content, @"\<li\>[\s\S]+?\</li\>");// @"\<li[\s\S]+?\<a.href=""(?<url>.+?)""\>(?<name>.+?)\</a\>[\s\S]+\>\s*(?<count>[^\<\s]+)\s*\<[\s\S]+green""\>(?<date>.+?)\<[\S\s]+所有\<b\>(?<web>\d+)\</b\>个[\s\S]+?\</li\>"); foreach (Match item in ms) { //Match m = Regex.Match(item.Value, @"\<a.href="".+(?<url>md=[^&]+).+""\>(?<name>.+?)\</a\>[\s\S]+\>\s*(?<count>[^\<\s]+)\s*\<[\s\S]+green""\>(?<date>.+?)\<[\S\s]+所有\<b\>(?<web>\d+)\</b\>个"); BookData book = new BookData(); //提取书名 string tem1 = Regex.Replace(Regex.Replace(Regex.Match(item.Value, nameRegex).Groups["name"].Value,@"\s*",""), @"\<.+?\>", ""); book.Kind = Regex.Match(tem1, @"\[(?<kind>.+?)\]").Groups["kind"].Value; //去掉分类信息 string tem2 = Regex.Replace(tem1, @"\[.+?\]",""); //分别提取书名和作者 Match match = Regex.Match(tem2, @"(?<name>.+?)-(?<writer>[^-]+)"); book.Name = match.Groups["name"].Value; book.Writer = match.Groups["writer"].Value; //提取最近更新时间 book.DateTime = Regex.Match(item.Value, dateRegex).Groups["date"].Value; //提取书的章数 book.Count = Regex.Match(item.Value, countRegex).Groups["count"].Value; //提取来源的个数,但有些只有一个来源,需进行判断 //string web = "0"; //if (Regex.IsMatch(item.Value, webRegex)) //{ // web = Regex.Match(item.Value, webRegex).Groups["web"].Value; //} book.Web = Regex.Match(item.Value, webRegex).Groups["web"].Value; //提取网址中的 md= string md=Regex.Match(item.Value, urlRegex).Groups["url"].Value; if (md!="") { book.Url = "http://k.sogou.com/list?" + md; } BookList.Items.Add(book); } }