static void NewRule() { BookRule r = new BookRule(); Type type = typeof(BookRule); object obj = Activator.CreateInstance(type); PropertyInfo[] props = type.GetProperties(BindingFlags.Public | BindingFlags.Instance); foreach (PropertyInfo p in props) { Console.WriteLine(string.Format("{0}:", p.Name)); var value = Console.ReadLine(); if (p.PropertyType == typeof(string)) { p.SetValue(r, value, null); } else if (p.PropertyType == typeof(int)) { //Int 类型 p.SetValue(r, value.ToInt32(), null); } else { //Boolean类型的数据 p.SetValue(r, value.ToBoolean(), null); } } r.Save(); }
/// <summary> /// 打开书籍信息页面 /// </summary> /// <param name="r"></param> /// <param name="url"></param> public void OpenInfoPage(BookRule r, string url) { w("打开书籍页面:" + url); string html = Url.GetHtml(url, r.CharSet); var info = (BookInfo)SetMatchResult(typeof(BookInfo), html, r.InfoRule).FirstOrDefault(); ContentFilter f = new ContentFilter(); info.intro = f.Filter(info.intro); CurBook = GetCurrentBook(info); //下载设置图片 if (info.image.IsNullOrEmpty() == false) { GetImage(r, info.image.AppendToDomain(RootUrl)); string nPath = string.Format("{0}{1}.jpg", System.AppDomain.CurrentDomain.BaseDirectory, "xxx"); string upUrl = string.Format("{0}?a=savebookface&id={1}", ApiUrl, CurBook.ID); Url.UpLoadFile(nPath, upUrl, false); } //判断是够需要打开章节列表页面 if (r.ChapterListUrlRule.IsNullOrEmpty() == false && html.GetMatchGroup(r.ChapterListUrlRule).Groups.Count > 0 ) { string url_ChapterList = html.GetMatch(r.ChapterListUrlRule).FirstOrDefault().AppendToDomain(RootUrl); string html_ChapterList = Url.GetHtml(url_ChapterList, r.CharSet); OpenChapterList(r, html_ChapterList); } else { OpenChapterList(r, html); } }
/// <summary> /// 下载封面 /// </summary> /// <param name="r"></param> /// <param name="url"></param> public void GetImage(BookRule r, string url) { string path = string.Format("{0}{1}_old.jpg", System.AppDomain.CurrentDomain.BaseDirectory, "xxx"); string nPath = string.Format("{0}{1}.jpg", System.AppDomain.CurrentDomain.BaseDirectory, "xxx"); Url.DownFile(url, path); Voodoo.IO.ImageHelper.MakeThumbnail(path, nPath, r.FaceWidth, r.FaceHeight); }
/// <summary> /// 打开书籍列表页面 /// </summary> /// <param name="r"></param> /// <param name="url"></param> public void OpenListPage(BookRule r, string url = "") { if (url.IsNullOrEmpty()) { url = r.ListUrl; } w("打开列表页面:" + url); try { string listHtml = Url.GetHtml(url, r.CharSet); var books = Convert <TitleAndUrl>(SetMatchResult(typeof(TitleAndUrl), listHtml, r.ListRule)); while (books.Count > 0) { var book = books.First(); try { if (Common.ContentFilter.GetBlackList().Contains(book.title)) { red(); w(string.Format("黑名单:{0}", book.title)); white(); books.Remove(book); continue; } book.url = book.url.AppendToDomain(RootUrl); OpenInfoPage(r, book.url); books.Remove(book); } catch (Exception ex) { red(); w(string.Format("打开书籍页面失败:{0}", ex.Message)); white(); books.Remove(book); } } //列表翻页 if (!r.ListUrlNextRule.IsNullOrEmpty() && listHtml.GetMatchGroup(r.ListUrlNextRule).Groups.Count > 0 ) { OpenListPage(r, listHtml.GetMatch(r.ListUrlNextRule).First().AppendToDomain(RootUrl)); } } catch (Exception ex) { red(); w(string.Format("打开列表页面失败:{0}", ex.Message)); white(); } }
/// <summary> /// 打开章节列表页面 /// </summary> /// <param name="r"></param> /// <param name="html"></param> public void OpenChapterList(BookRule r, string html) { var chapters = Convert <TitleAndUrl>(SetMatchResult(typeof(TitleAndUrl), html, r.ChapterListRule)); while (chapters.Count > 0) { var c = chapters.First(); if (c.title.IsNullOrEmpty()) { break; } if (CurBook.LastChapterID == 0) { //书籍没有章节 break; } if (c.title != CurBook.LastChapterTitle) { chapters.Remove(c); } else { chapters.Remove(c); break; } } while (chapters.Count > 0) { var chapter = chapters.First(); using (DataEntities ent = new DataEntities()) { if ((from l in ent.BookChapter where l.ID == CurBook.ID && l.Title == chapter.title select l).Count() > 0) { return;//如果这个章节已经存在,则不采集整个书籍 } } if (chapter.title.IsNullOrEmpty()) { break; } try { OpenChapterPage(r, chapter.url.AppendToDomain(RootUrl)); chapters.Remove(chapter); } catch (Exception ex) { //如果某一章节打开失败,则需要跳过章节的采集 red(); w(ex.Message); white(); break; } } }
/// <summary> /// 遍历规则 /// </summary> public void FechRules() { var rules = BookRule.GetAll(); foreach (var rule in rules) { Connstr = rule.ConnStr; RootUrl = string.Format("http://{0}/", rule.SiteDomain); ApiUrl = string.Format("{0}e/api/xmlrpc.aspx", rule.TargetSiteUrl); OpenListPage(rule); } }
/// <summary> /// 获取章节正文 /// </summary> /// <param name="r"></param> /// <param name="html"></param> /// <returns></returns> public string GetChapterContent(BookRule r, string html) { StringBuilder sb = new StringBuilder(); var regexResult = (ChapterContent)SetMatchResult(typeof(ChapterContent), html, r.ContentRule).FirstOrDefault(); sb.Append(regexResult.content); if (r.NextContentRule.IsNullOrEmpty() == false && html.GetMatchGroup(r.NextContentRule).Groups.Count > 0) { string nextHtml = Url.GetHtml(html.GetMatch(r.NextContentRule).FirstOrDefault().AppendToDomain(RootUrl)); sb.Append(GetChapterContent(r, nextHtml)); } return(sb.ToS()); }
/// <summary> /// 打开章节内容页面 /// </summary> /// <param name="r"></param> /// <param name="url"></param> public void OpenChapterPage(BookRule r, string url) { int errorCount = 0; begin: try { Console.WriteLine(string.Format("打开章节:{0}", url)); string html = Url.GetHtml(url, r.CharSet); var result = (ChapterContent)SetMatchResult(typeof(ChapterContent), html, r.ContentRule).FirstOrDefault(); string chapterContent = GetChapterContent(r, html); ContentFilter f = new ContentFilter(); chapterContent = f.Filter(chapterContent); chapterContent = chapterContent.HtmlDeCode(); SaveChapter(result, chapterContent); //判断是否翻页 if (r.NextChapterUrlRule.IsNullOrEmpty() == false && html.GetMatchGroup(r.NextChapterUrlRule).Groups.Count > 0 ) { //处理下一页 OpenChapterPage(r, html.GetMatch(r.NextChapterUrlRule).FirstOrDefault().AppendToDomain(RootUrl)); } } catch { errorCount++; if (errorCount < 3) { goto begin; } else { throw new Exception("章节打开分析失败。"); } } }