Exemplo n.º 1
0
        /// <summary>
        /// 获取列表
        /// </summary>
        /// <param name="html">网页源码</param>
        /// <param name="contentRegex">缩小范围</param>
        /// <param name="nameRegex">书名正则</param>
        /// <param name="dateRegex">更新时间正则</param>
        /// <param name="countRegex">章节数正则</param>
        /// <param name="webRegex">来源个数正则</param>
        /// <param name="urlRegex">网址的正则</param>
        void GetList(string url, string contentRegex, string nameRegex, string dateRegex = "", string countRegex = "", string webRegex = "", string urlRegex = "")
        {
            string html = GetHtml(url);

            //提取列表总数据
            string content = Regex.Match(html, contentRegex).Groups["content"].Value;

            //去除被注释的部分
            content = Regex.Replace(content, @"\<!--[\s\S]+?--\>", "");
            //提取li标签
            MatchCollection ms = Regex.Matches(content, @"\<li\>[\s\S]+?\</li\>");// @"\<li[\s\S]+?\<a.href=""(?<url>.+?)""\>(?<name>.+?)\</a\>[\s\S]+\>\s*(?<count>[^\<\s]+)\s*\<[\s\S]+green""\>(?<date>.+?)\<[\S\s]+所有\<b\>(?<web>\d+)\</b\>个[\s\S]+?\</li\>");

            foreach (Match item in ms)
            {
                //Match m = Regex.Match(item.Value, @"\<a.href="".+(?<url>md=[^&]+).+""\>(?<name>.+?)\</a\>[\s\S]+\>\s*(?<count>[^\<\s]+)\s*\<[\s\S]+green""\>(?<date>.+?)\<[\S\s]+所有\<b\>(?<web>\d+)\</b\>个");
                BookData book = new BookData();
                //提取书名
                string tem1 = Regex.Replace(Regex.Replace(Regex.Match(item.Value, nameRegex).Groups["name"].Value, @"\s*", ""), @"\<.+?\>", "");

                book.Kind = Regex.Match(tem1, @"\[(?<kind>.+?)\]").Groups["kind"].Value;
                //去掉分类信息
                string tem2 = Regex.Replace(tem1, @"\[.+?\]", "");
                //分别提取书名和作者
                Match match = Regex.Match(tem2, @"(?<name>.+?)-(?<writer>[^-]+)");
                book.Name   = match.Groups["name"].Value;
                book.Writer = match.Groups["writer"].Value;

                //提取最近更新时间
                book.DateTime = Regex.Match(item.Value, dateRegex).Groups["date"].Value;
                //提取书的章数
                book.Count = Regex.Match(item.Value, countRegex).Groups["count"].Value;
                //提取来源的个数,但有些只有一个来源,需进行判断
                //string web = "0";
                //if (Regex.IsMatch(item.Value, webRegex))
                //{
                //    web = Regex.Match(item.Value, webRegex).Groups["web"].Value;
                //}
                book.Web = Regex.Match(item.Value, webRegex).Groups["web"].Value;
                //提取网址中的 md=
                string md = Regex.Match(item.Value, urlRegex).Groups["url"].Value;
                if (md != "")
                {
                    book.Url = "http://k.sogou.com/list?" + md;
                }

                BookList.Items.Add(book);
            }
        }
Exemplo n.º 2
0
 private void BookList_MouseDoubleClick(object sender, MouseButtonEventArgs e)
 {
     if (BookList.SelectedIndex >= 0 && BookList.SelectedIndex < BookList.Items.Count)
     {
         BookData book = BookList.SelectedItem as BookData;     //选中项还原
         if (book.Url != "" && book.Url != null)
         {
             //调用系统默认浏览器打开网页。
             System.Diagnostics.Process.Start(book.Url);
         }
         else
         {
             //如果没有网址表明是从表单获取的数据
             NameTb.Text = book.Name;
         }
     }
 }
        /// <summary>
        /// 获取列表
        /// </summary>
        /// <param name="html">网页源码</param>
        /// <param name="contentRegex">缩小范围</param>
        /// <param name="nameRegex">书名正则</param>
        /// <param name="dateRegex">更新时间正则</param>
        /// <param name="countRegex">章节数正则</param>
        /// <param name="webRegex">来源个数正则</param>
        /// <param name="urlRegex">网址的正则</param>
        void GetList(string url,string contentRegex,string nameRegex,string dateRegex="",string countRegex="",string webRegex="",string urlRegex="")
        {
            string html = GetHtml(url);

            //提取列表总数据
            string content = Regex.Match(html, contentRegex).Groups["content"].Value;
            //去除被注释的部分
            content = Regex.Replace(content, @"\<!--[\s\S]+?--\>", "");
            //提取li标签
            MatchCollection ms = Regex.Matches(content, @"\<li\>[\s\S]+?\</li\>");// @"\<li[\s\S]+?\<a.href=""(?<url>.+?)""\>(?<name>.+?)\</a\>[\s\S]+\>\s*(?<count>[^\<\s]+)\s*\<[\s\S]+green""\>(?<date>.+?)\<[\S\s]+所有\<b\>(?<web>\d+)\</b\>个[\s\S]+?\</li\>");

            foreach (Match item in ms)
            {
                //Match m = Regex.Match(item.Value, @"\<a.href="".+(?<url>md=[^&]+).+""\>(?<name>.+?)\</a\>[\s\S]+\>\s*(?<count>[^\<\s]+)\s*\<[\s\S]+green""\>(?<date>.+?)\<[\S\s]+所有\<b\>(?<web>\d+)\</b\>个");
                BookData book = new BookData();
                //提取书名
                string tem1 = Regex.Replace(Regex.Replace(Regex.Match(item.Value, nameRegex).Groups["name"].Value,@"\s*",""), @"\<.+?\>", "");

                book.Kind = Regex.Match(tem1, @"\[(?<kind>.+?)\]").Groups["kind"].Value;
                //去掉分类信息
                string tem2 = Regex.Replace(tem1, @"\[.+?\]","");
                //分别提取书名和作者
                Match match = Regex.Match(tem2, @"(?<name>.+?)-(?<writer>[^-]+)");
                book.Name = match.Groups["name"].Value;
                book.Writer = match.Groups["writer"].Value;

                //提取最近更新时间
                book.DateTime = Regex.Match(item.Value, dateRegex).Groups["date"].Value;
                //提取书的章数
                book.Count = Regex.Match(item.Value, countRegex).Groups["count"].Value;
                //提取来源的个数,但有些只有一个来源,需进行判断
                //string web = "0";
                //if (Regex.IsMatch(item.Value, webRegex))
                //{
                //    web = Regex.Match(item.Value, webRegex).Groups["web"].Value;
                //}
                book.Web = Regex.Match(item.Value, webRegex).Groups["web"].Value;
                //提取网址中的 md=
                string md=Regex.Match(item.Value, urlRegex).Groups["url"].Value;
                if (md!="")
                {
                    book.Url = "http://k.sogou.com/list?" + md;
                }

                BookList.Items.Add(book);
            }
        }