예제 #1
0
        public override void GetWholeCollection(string firstColectionUrl)
        {
            var uri    = new Uri(firstColectionUrl);
            var webber = new Webber(uri.Scheme + "://" + uri.Host);

            var url = firstColectionUrl;

            do
            {
                var html = webber.GetStringAsync(url).Result;
                var doc  = new HtmlDocument();
                doc.LoadHtml(html);

                var div      = doc.DocumentNode.SelectSingleNode("//*[@class='tbTabContent']");
                var nextPage = doc.DocumentNode.SelectSingleNode("//a[@class='tbPagingNext icon']");

                // get list of content
                if (div != null)
                {
                    var anchors = div.Descendants("a");

                    foreach (var anchor in anchors)
                    {
                        // get link for each book
                        var link       = anchor.Attributes["href"].Value;
                        var setting    = new ThuVienHoaSen();
                        var bookHelper = new BookHelper(setting);
                        // create kindle file for each book
                        var kindleFile = bookHelper.CreateKindleFiles(link);
                    }
                }

                url = nextPage != null ? nextPage.Attributes["href"].Value: string.Empty;
                // fetch next load!
            } while (!string.IsNullOrEmpty(url) && !url.Contains("javascript"));
        }
        // get the all content of a book and return a book data
        public Book GetOneWholeHtml(string firstpage)
        {
            var html = string.Empty;

            // 1. download

            // special for note
            if (string.IsNullOrEmpty(domainHost))
            {
                var uri = new Uri(firstpage);
                html = File.ReadAllText(uri.AbsolutePath);
                return(GetBookInformation(GetContentDiv(html)));
            }

            // continue as normal
            webber = new Webber(domainHost);
            html   = webber.GetStringAsync(firstpage).Result;

            // 2. parse to get links of chapters
            links = GetLinks(html);
            // 3. get content div
            var contentDiv = GetContentDiv(html);
            // 4. get book information: title, publisher, author
            var book = GetBookInformation(contentDiv);

            // only 1 page!
            if (links == null)
            {
                links = new List <KeyValuePair <string, string> >()
                {
                    new KeyValuePair <string, string>(book.Title, firstpage)
                };
            }

            // 6. get table of content to book
            book.TableOfContent = HtmlTableOfContent();
            // 7. loop and download each page per chapter
            var count = 1;

            foreach (var link in links)
            {
                // current chapter
                System.Console.WriteLine(link.Key);
                // 8. download each page/content
                html = webber.GetStringAsync(link.Value).Result;
                // 9. get main contain of chapter/page
                var div = GetContentDiv(html);
                // 10. download images
                var images = FixImages(div);
                // 11. add to book chapter
                book.Chapters.Add(new Chapter
                {
                    Title   = link.Key,
                    Content = div,
                    Number  = count,
                    Images  = images
                });
                count = count + 1;
            }


            return(book);
        }
예제 #3
0
 public GeneralSite(string domainHost)
 {
     this.domainHost = domainHost;
     webber          = new Webber(domainHost);
     dataDeligate    = webber.GetStringAsync;
 }