public static Book ParseSingleBook(string url) { //Rule rule = new Rule(); //rule.name = "劝学网"; //rule.title_rule = ".title"; //rule.auther_rule = ".auther"; //rule.cover_rule = ""; //rule.introduction_rule = ".index_center_td p"; //rule.catalog_rule = ".index_left_td a,.index_center_td a"; //rule.content_rule = "div.title,div.item,p"; try { Uri uri = new Uri(url); BookRule rule = RuleManager.GetBookRule("", "http://" + uri.Host); Book book = new Book(); book.rule = rule.name; book.baseurl = url; ParseCatalog(uri, book, rule); ParseContent(uri, book, rule); return book; } catch (Exception err) { throw err; } }
/// <summary> /// 解析预览 /// </summary> /// <param name="url"></param> /// <param name="rule"></param> /// <returns>返回书对象</returns> public static Book ParsePreviewBook(string url, BookRule rule) { try { Uri uri = new Uri(url); Book book = new Book(); book.rule = rule.name; book.baseurl = url; ParseCatalog(uri, book, rule); ParseContent(uri, book, rule, 5);//预览5章 return book; } catch (Exception err) { throw err; } }
private void button1_Click_1(object sender, EventArgs e) { if (openFileDialog1.ShowDialog() == DialogResult.OK) { string filename = openFileDialog1.FileName; Book book = new Book(); book.SendMail(txtemail.Text, 2, filename); MessageBox.Show("操作成功!"); } }
private static bool ParseHtml(string baseurl,string html) { Uri baseuri = new Uri(baseurl); BookRule rule = RuleManager.GetBookRule("", "http://" + baseuri.Host); Book book = new Book(); book.rule = rule.name; book.baseurl = baseuri.ToString(); ParseCatalog(baseuri,html, book, rule); ParseContent(baseuri, book, rule); //判断不是一本书就返回 if (string.IsNullOrEmpty(book.title) || string.IsNullOrEmpty(book.auther) || book.catalogs == null) { return false; } book.SaveBook("bookxml/" + book.title + ".xml"); return true; }
private static void ParseContent(Uri uri, Book book, BookRule rule,int pagenum) { if (rule.content_rule != "" && book.catalogs!=null) { MultiThreadingWorker thWork = new MultiThreadingWorker(); thWork.threadCount = 20; thWork.workContent = new WorkContent(ParsePage); for (int i = 0; i < book.catalogs.Count; i++) { if (pagenum > 0 && pagenum < i + 1) break;//预览 thWork.AddWork(book.catalogs[i]); } thWork.Start(); } }
private static void ParseContent(Uri uri, Book book, BookRule rule) { ParseContent(uri, book, rule, 0); }
private static void ParseCatalog(Uri uri,string html, Book book, BookRule rule) { string buffer; NSoup.Nodes.Document doc = html == null ? NSoupHelper.GetNSoupDoc(uri.ToString(), rule.charset, out buffer) : NSoupHelper.GetNSoupDoc(html); if (doc == null) return; if (rule.title_rule != "") { book.title = doc.Select(rule.title_rule).Text; } if (rule.auther_rule != "") { book.auther = doc.Select(rule.auther_rule).Text; } if (rule.cover_rule != "") { book.cover = doc.Select(rule.cover_rule).Text; } if (rule.introduction_rule != "") { book.introduction = doc.Select(rule.introduction_rule).Text; } if (rule.catalog_rule != "") { var catalog = doc.Select(rule.catalog_rule); if (catalog.Count > 0) { book.catalogs = new List<BookCatalog>(); for (int i = 0; i < catalog.Count; i++) { BookCatalog bc = new BookCatalog(); bc.index = i; bc.url = catalog[i].Attr("href"); bc.text = catalog[i].Text(); bc.baseurl = uri.ToString(); bc.bookrule = rule; book.catalogs.Add(bc); } } } }
private static void ParseCatalog(Uri uri, Book book, BookRule rule) { ParseCatalog(uri, null, book, rule); }