Beispiel #1
0
        public void CollectBooks(List <string> BookNeedCollect, string ListPageUrl, string NextPageUrl, string BookUrlRule, string BookInfoRule, string ChapterListUrl, string encoding, string urlTitleRule, string ContentRule)
        {
            BookHelper bh = new BookHelper("http://www.aizr.net/");

begin:
            string listHtml = Url.GetHtml(ListPageUrl, encoding);
            Match m_books = listHtml.GetMatchGroup(BookUrlRule);

            while (m_books.Success)
            {
                string bookUrl   = m_books.Groups["url"].Value.AppendToDomain(ListPageUrl);
                string bookTitle = m_books.Groups["title"].Value;
                if (BookNeedCollect.Count != 1 || BookNeedCollect.First() != "*")
                {
                    if (BookNeedCollect.Where(p => p == bookTitle).Count() == 0)
                    {
                        m_books = m_books.NextMatch();//不需要采集的书籍
                        continue;
                    }
                }
                if (bh.SearchBook(bookTitle, "", "").Count > 0)
                {
                    m_books = m_books.NextMatch();//已经存在的书籍
                    continue;
                }

                string bookInfoHtml = Url.GetHtml(bookUrl, encoding);;
                Match  m_bookInfo   = bookInfoHtml.GetMatchGroup(BookInfoRule);
                if (m_bookInfo.Success)
                {
                    //获取到书籍信息,并且添加到系统
                    string title  = m_bookInfo.Groups["title"].Value;
                    string author = m_bookInfo.Groups["author"].Value;
                    string cls    = m_bookInfo.Groups["class"].Value;
                    string length = (m_bookInfo.Groups["length"].Value.ToInt32() * 1024).ToS();
                    string intro  = m_bookInfo.Groups["intro"].Value;

                    //处理类别
                    Class c = bh.GetClass(cls);
                    //添加书籍
                    bh.BookAdd(title, author, c.ID, intro, length.ToInt64());

                    //处理章节
                    string chapterListUrl = bookInfoHtml.GetMatch(ChapterListUrl).First().AppendToDomain(bookUrl);
                    Collect(chapterListUrl, title, urlTitleRule
                            , ContentRule, encoding);
                }
            }//结束书籍列表书籍采集
            //开始判断是够有下一页
            Match m_NextPage = listHtml.GetMatchGroup(NextPageUrl);

            while (m_NextPage.Success)
            {
                ListPageUrl = m_NextPage.Groups["key"].Value.AppendToDomain(ListPageUrl);
                goto begin;;
            }
        }
Beispiel #2
0
        public void CollectBooks(List <string> BookNeedCollect, string ListPageUrl, string NextPageUrl, string BookUrlRule, string BookInfoRule, string ChapterListUrl, string encoding, string urlTitleRule, string ContentRule, string NextContentUrl)
        {
            BookHelper bh = new BookHelper("http://aizr.net/");

begin:
            SetStatus("打开列表页面");
            string listHtml = Url.GetHtml(ListPageUrl, encoding);
            Match  m_books  = listHtml.GetMatchGroup(BookUrlRule);

            while (m_books.Success)
            {
                string bookUrl   = m_books.Groups["url"].Value.AppendToDomain(ListPageUrl);
                string bookTitle = m_books.Groups["title"].Value.TrimHTML();
                if (BookNeedCollect.Count != 1 || BookNeedCollect.First() != "*")
                {
                    if (BookNeedCollect.Where(p => p == bookTitle).Count() == 0)
                    {
                        m_books = m_books.NextMatch();//不需要采集的书籍
                        continue;
                    }
                }
                SetStatus("验证是否存在");
                string bookInfoHtml = Url.GetHtml(bookUrl, encoding);
                if (bh.SearchBook(bookTitle, "", "").Count > 0)
                {
                    //m_books = m_books.NextMatch();//已经存在的书籍
                    //continue;
                }
                else
                {
                    //不存在
                    SetStatus("打开书籍页面");
                    Match m_bookInfo = bookInfoHtml.GetMatchGroup(BookInfoRule);
                    if (m_bookInfo.Success)
                    {
                        //获取到书籍信息,并且添加到系统
                        string title  = m_bookInfo.Groups["title"].Value.TrimHTML();
                        string author = m_bookInfo.Groups["author"].Value;
                        //string author = "robot";
                        string cls    = m_bookInfo.Groups["class"].Value;
                        string length = m_bookInfo.Groups["length"].Value;
                        string intro  = m_bookInfo.Groups["intro"].Value.TrimHTML();

                        string imageUrl = m_bookInfo.Groups["image"].Value.ToS();

                        bookTitle = title;

                        //处理类别
                        Class c = bh.GetClass(cls.Length > 0 ? cls : "其他");

                        //添加书籍
                        Book b = bh.BookAdd(title, author, c.ID, intro, length.ToInt64());

                        //if (b.ID > 0 || imageUrl.IsNullOrEmpty() == false)
                        //{
                        //    imageUrl = imageUrl.AppendToDomain(bookUrl);
                        //    Url.DownFile(imageUrl, System.Environment.CurrentDirectory + "\\Face.jpg");
                        //    Voodoo.IO.ImageHelper.MakeThumbnail(System.Environment.CurrentDirectory + "\\Face.jpg",
                        //        System.Environment.CurrentDirectory + "\\stand.jpg",
                        //        120,
                        //        150,
                        //        "Cut");
                        //    bh.SetBookFace(b.ID, System.Environment.CurrentDirectory + "\\stand.jpg");
                        //}
                    }
                }
                //处理章节
                string chapterListUrl = bookInfoHtml.GetMatch(ChapterListUrl).First().AppendToDomain(bookUrl);
                Collect(chapterListUrl, bookTitle, urlTitleRule
                        , ContentRule, NextContentUrl, encoding);
                m_books = m_books.NextMatch();
            }//结束书籍列表书籍采集
            //开始判断是够有下一页
            Match m_NextPage = listHtml.GetMatchGroup(NextPageUrl);

            while (m_NextPage.Success)
            {
                ListPageUrl = m_NextPage.Groups["key"].Value.AppendToDomain(ListPageUrl);
                goto begin;;
            }
        }