public void CollectBooks(List <string> BookNeedCollect, string ListPageUrl, string NextPageUrl, string BookUrlRule, string BookInfoRule, string ChapterListUrl, string encoding, string urlTitleRule, string ContentRule) { BookHelper bh = new BookHelper("http://www.aizr.net/"); begin: string listHtml = Url.GetHtml(ListPageUrl, encoding); Match m_books = listHtml.GetMatchGroup(BookUrlRule); while (m_books.Success) { string bookUrl = m_books.Groups["url"].Value.AppendToDomain(ListPageUrl); string bookTitle = m_books.Groups["title"].Value; if (BookNeedCollect.Count != 1 || BookNeedCollect.First() != "*") { if (BookNeedCollect.Where(p => p == bookTitle).Count() == 0) { m_books = m_books.NextMatch();//不需要采集的书籍 continue; } } if (bh.SearchBook(bookTitle, "", "").Count > 0) { m_books = m_books.NextMatch();//已经存在的书籍 continue; } string bookInfoHtml = Url.GetHtml(bookUrl, encoding);; Match m_bookInfo = bookInfoHtml.GetMatchGroup(BookInfoRule); if (m_bookInfo.Success) { //获取到书籍信息,并且添加到系统 string title = m_bookInfo.Groups["title"].Value; string author = m_bookInfo.Groups["author"].Value; string cls = m_bookInfo.Groups["class"].Value; string length = (m_bookInfo.Groups["length"].Value.ToInt32() * 1024).ToS(); string intro = m_bookInfo.Groups["intro"].Value; //处理类别 Class c = bh.GetClass(cls); //添加书籍 bh.BookAdd(title, author, c.ID, intro, length.ToInt64()); //处理章节 string chapterListUrl = bookInfoHtml.GetMatch(ChapterListUrl).First().AppendToDomain(bookUrl); Collect(chapterListUrl, title, urlTitleRule , ContentRule, encoding); } }//结束书籍列表书籍采集 //开始判断是够有下一页 Match m_NextPage = listHtml.GetMatchGroup(NextPageUrl); while (m_NextPage.Success) { ListPageUrl = m_NextPage.Groups["key"].Value.AppendToDomain(ListPageUrl); goto begin;; } }
public void CollectBooks(List <string> BookNeedCollect, string ListPageUrl, string NextPageUrl, string BookUrlRule, string BookInfoRule, string ChapterListUrl, string encoding, string urlTitleRule, string ContentRule, string NextContentUrl) { BookHelper bh = new BookHelper("http://aizr.net/"); begin: SetStatus("打开列表页面"); string listHtml = Url.GetHtml(ListPageUrl, encoding); Match m_books = listHtml.GetMatchGroup(BookUrlRule); while (m_books.Success) { string bookUrl = m_books.Groups["url"].Value.AppendToDomain(ListPageUrl); string bookTitle = m_books.Groups["title"].Value.TrimHTML(); if (BookNeedCollect.Count != 1 || BookNeedCollect.First() != "*") { if (BookNeedCollect.Where(p => p == bookTitle).Count() == 0) { m_books = m_books.NextMatch();//不需要采集的书籍 continue; } } SetStatus("验证是否存在"); string bookInfoHtml = Url.GetHtml(bookUrl, encoding); if (bh.SearchBook(bookTitle, "", "").Count > 0) { //m_books = m_books.NextMatch();//已经存在的书籍 //continue; } else { //不存在 SetStatus("打开书籍页面"); Match m_bookInfo = bookInfoHtml.GetMatchGroup(BookInfoRule); if (m_bookInfo.Success) { //获取到书籍信息,并且添加到系统 string title = m_bookInfo.Groups["title"].Value.TrimHTML(); string author = m_bookInfo.Groups["author"].Value; //string author = "robot"; string cls = m_bookInfo.Groups["class"].Value; string length = m_bookInfo.Groups["length"].Value; string intro = m_bookInfo.Groups["intro"].Value.TrimHTML(); string imageUrl = m_bookInfo.Groups["image"].Value.ToS(); bookTitle = title; //处理类别 Class c = bh.GetClass(cls.Length > 0 ? cls : "其他"); //添加书籍 Book b = bh.BookAdd(title, author, c.ID, intro, length.ToInt64()); //if (b.ID > 0 || imageUrl.IsNullOrEmpty() == false) //{ // imageUrl = imageUrl.AppendToDomain(bookUrl); // Url.DownFile(imageUrl, System.Environment.CurrentDirectory + "\\Face.jpg"); // Voodoo.IO.ImageHelper.MakeThumbnail(System.Environment.CurrentDirectory + "\\Face.jpg", // System.Environment.CurrentDirectory + "\\stand.jpg", // 120, // 150, // "Cut"); // bh.SetBookFace(b.ID, System.Environment.CurrentDirectory + "\\stand.jpg"); //} } } //处理章节 string chapterListUrl = bookInfoHtml.GetMatch(ChapterListUrl).First().AppendToDomain(bookUrl); Collect(chapterListUrl, bookTitle, urlTitleRule , ContentRule, NextContentUrl, encoding); m_books = m_books.NextMatch(); }//结束书籍列表书籍采集 //开始判断是够有下一页 Match m_NextPage = listHtml.GetMatchGroup(NextPageUrl); while (m_NextPage.Success) { ListPageUrl = m_NextPage.Groups["key"].Value.AppendToDomain(ListPageUrl); goto begin;; } }