Exemple #1
0
        static void NewRule()
        {
            BookRule r = new BookRule();

            Type   type = typeof(BookRule);
            object obj  = Activator.CreateInstance(type);

            PropertyInfo[] props = type.GetProperties(BindingFlags.Public | BindingFlags.Instance);
            foreach (PropertyInfo p in props)
            {
                Console.WriteLine(string.Format("{0}:", p.Name));

                var value = Console.ReadLine();
                if (p.PropertyType == typeof(string))
                {
                    p.SetValue(r, value, null);
                }
                else if (p.PropertyType == typeof(int))
                {
                    //Int 类型
                    p.SetValue(r, value.ToInt32(), null);
                }
                else
                {
                    //Boolean类型的数据
                    p.SetValue(r, value.ToBoolean(), null);
                }
            }
            r.Save();
        }
Exemple #2
0
        /// <summary>
        /// 打开书籍信息页面
        /// </summary>
        /// <param name="r"></param>
        /// <param name="url"></param>
        public void OpenInfoPage(BookRule r, string url)
        {
            w("打开书籍页面:" + url);
            string html = Url.GetHtml(url, r.CharSet);
            var    info = (BookInfo)SetMatchResult(typeof(BookInfo), html, r.InfoRule).FirstOrDefault();

            ContentFilter f = new ContentFilter();

            info.intro = f.Filter(info.intro);

            CurBook = GetCurrentBook(info);

            //下载设置图片
            if (info.image.IsNullOrEmpty() == false)
            {
                GetImage(r, info.image.AppendToDomain(RootUrl));
                string nPath = string.Format("{0}{1}.jpg", System.AppDomain.CurrentDomain.BaseDirectory, "xxx");
                string upUrl = string.Format("{0}?a=savebookface&id={1}", ApiUrl, CurBook.ID);
                Url.UpLoadFile(nPath, upUrl, false);
            }

            //判断是够需要打开章节列表页面
            if (r.ChapterListUrlRule.IsNullOrEmpty() == false &&
                html.GetMatchGroup(r.ChapterListUrlRule).Groups.Count > 0
                )
            {
                string url_ChapterList  = html.GetMatch(r.ChapterListUrlRule).FirstOrDefault().AppendToDomain(RootUrl);
                string html_ChapterList = Url.GetHtml(url_ChapterList, r.CharSet);
                OpenChapterList(r, html_ChapterList);
            }
            else
            {
                OpenChapterList(r, html);
            }
        }
Exemple #3
0
        /// <summary>
        /// 下载封面
        /// </summary>
        /// <param name="r"></param>
        /// <param name="url"></param>
        public void GetImage(BookRule r, string url)
        {
            string path  = string.Format("{0}{1}_old.jpg", System.AppDomain.CurrentDomain.BaseDirectory, "xxx");
            string nPath = string.Format("{0}{1}.jpg", System.AppDomain.CurrentDomain.BaseDirectory, "xxx");

            Url.DownFile(url, path);
            Voodoo.IO.ImageHelper.MakeThumbnail(path, nPath, r.FaceWidth, r.FaceHeight);
        }
Exemple #4
0
        /// <summary>
        /// 打开书籍列表页面
        /// </summary>
        /// <param name="r"></param>
        /// <param name="url"></param>
        public void OpenListPage(BookRule r, string url = "")
        {
            if (url.IsNullOrEmpty())
            {
                url = r.ListUrl;
            }

            w("打开列表页面:" + url);

            try
            {
                string listHtml = Url.GetHtml(url, r.CharSet);


                var books = Convert <TitleAndUrl>(SetMatchResult(typeof(TitleAndUrl), listHtml, r.ListRule));
                while (books.Count > 0)
                {
                    var book = books.First();

                    try
                    {
                        if (Common.ContentFilter.GetBlackList().Contains(book.title))
                        {
                            red();
                            w(string.Format("黑名单:{0}", book.title));
                            white();
                            books.Remove(book);
                            continue;
                        }

                        book.url = book.url.AppendToDomain(RootUrl);
                        OpenInfoPage(r, book.url);
                        books.Remove(book);
                    }
                    catch (Exception ex)
                    {
                        red();
                        w(string.Format("打开书籍页面失败:{0}", ex.Message));
                        white();
                        books.Remove(book);
                    }
                }

                //列表翻页
                if (!r.ListUrlNextRule.IsNullOrEmpty() &&
                    listHtml.GetMatchGroup(r.ListUrlNextRule).Groups.Count > 0
                    )
                {
                    OpenListPage(r, listHtml.GetMatch(r.ListUrlNextRule).First().AppendToDomain(RootUrl));
                }
            }
            catch (Exception ex)
            {
                red();
                w(string.Format("打开列表页面失败:{0}", ex.Message));
                white();
            }
        }
Exemple #5
0
        /// <summary>
        /// 打开章节列表页面
        /// </summary>
        /// <param name="r"></param>
        /// <param name="html"></param>
        public void OpenChapterList(BookRule r, string html)
        {
            var chapters = Convert <TitleAndUrl>(SetMatchResult(typeof(TitleAndUrl), html, r.ChapterListRule));

            while (chapters.Count > 0)
            {
                var c = chapters.First();
                if (c.title.IsNullOrEmpty())
                {
                    break;
                }
                if (CurBook.LastChapterID == 0)
                {
                    //书籍没有章节
                    break;
                }
                if (c.title != CurBook.LastChapterTitle)
                {
                    chapters.Remove(c);
                }
                else
                {
                    chapters.Remove(c);
                    break;
                }
            }

            while (chapters.Count > 0)
            {
                var chapter = chapters.First();

                using (DataEntities ent = new DataEntities())
                {
                    if ((from l in ent.BookChapter where l.ID == CurBook.ID && l.Title == chapter.title select l).Count() > 0)
                    {
                        return;//如果这个章节已经存在,则不采集整个书籍
                    }
                }

                if (chapter.title.IsNullOrEmpty())
                {
                    break;
                }
                try
                {
                    OpenChapterPage(r, chapter.url.AppendToDomain(RootUrl));
                    chapters.Remove(chapter);
                }
                catch (Exception ex)
                {
                    //如果某一章节打开失败,则需要跳过章节的采集
                    red();
                    w(ex.Message);
                    white();
                    break;
                }
            }
        }
Exemple #6
0
        /// <summary>
        /// 遍历规则
        /// </summary>
        public void FechRules()
        {
            var rules = BookRule.GetAll();

            foreach (var rule in rules)
            {
                Connstr = rule.ConnStr;
                RootUrl = string.Format("http://{0}/", rule.SiteDomain);
                ApiUrl  = string.Format("{0}e/api/xmlrpc.aspx", rule.TargetSiteUrl);
                OpenListPage(rule);
            }
        }
Exemple #7
0
        /// <summary>
        /// 获取章节正文
        /// </summary>
        /// <param name="r"></param>
        /// <param name="html"></param>
        /// <returns></returns>
        public string GetChapterContent(BookRule r, string html)
        {
            StringBuilder sb          = new StringBuilder();
            var           regexResult = (ChapterContent)SetMatchResult(typeof(ChapterContent), html, r.ContentRule).FirstOrDefault();

            sb.Append(regexResult.content);

            if (r.NextContentRule.IsNullOrEmpty() == false &&
                html.GetMatchGroup(r.NextContentRule).Groups.Count > 0)
            {
                string nextHtml = Url.GetHtml(html.GetMatch(r.NextContentRule).FirstOrDefault().AppendToDomain(RootUrl));
                sb.Append(GetChapterContent(r, nextHtml));
            }

            return(sb.ToS());
        }
Exemple #8
0
        /// <summary>
        /// 打开章节内容页面
        /// </summary>
        /// <param name="r"></param>
        /// <param name="url"></param>
        public void OpenChapterPage(BookRule r, string url)
        {
            int errorCount = 0;

begin:
            try
            {
                Console.WriteLine(string.Format("打开章节:{0}", url));
                string html   = Url.GetHtml(url, r.CharSet);
                var    result = (ChapterContent)SetMatchResult(typeof(ChapterContent), html, r.ContentRule).FirstOrDefault();

                string        chapterContent = GetChapterContent(r, html);
                ContentFilter f = new ContentFilter();
                chapterContent = f.Filter(chapterContent);

                chapterContent = chapterContent.HtmlDeCode();

                SaveChapter(result, chapterContent);

                //判断是否翻页
                if (r.NextChapterUrlRule.IsNullOrEmpty() == false &&
                    html.GetMatchGroup(r.NextChapterUrlRule).Groups.Count > 0
                    )
                {
                    //处理下一页
                    OpenChapterPage(r, html.GetMatch(r.NextChapterUrlRule).FirstOrDefault().AppendToDomain(RootUrl));
                }
            }
            catch
            {
                errorCount++;
                if (errorCount < 3)
                {
                    goto begin;
                }
                else
                {
                    throw new Exception("章节打开分析失败。");
                }
            }
        }