public override void GetWholeCollection(string firstColectionUrl) { var uri = new Uri(firstColectionUrl); var webber = new Webber(uri.Scheme + "://" + uri.Host); var url = firstColectionUrl; do { var html = webber.GetStringAsync(url).Result; var doc = new HtmlDocument(); doc.LoadHtml(html); var div = doc.DocumentNode.SelectSingleNode("//*[@class='tbTabContent']"); var nextPage = doc.DocumentNode.SelectSingleNode("//a[@class='tbPagingNext icon']"); // get list of content if (div != null) { var anchors = div.Descendants("a"); foreach (var anchor in anchors) { // get link for each book var link = anchor.Attributes["href"].Value; var setting = new ThuVienHoaSen(); var bookHelper = new BookHelper(setting); // create kindle file for each book var kindleFile = bookHelper.CreateKindleFiles(link); } } url = nextPage != null ? nextPage.Attributes["href"].Value: string.Empty; // fetch next load! } while (!string.IsNullOrEmpty(url) && !url.Contains("javascript")); }
// get the all content of a book and return a book data public Book GetOneWholeHtml(string firstpage) { var html = string.Empty; // 1. download // special for note if (string.IsNullOrEmpty(domainHost)) { var uri = new Uri(firstpage); html = File.ReadAllText(uri.AbsolutePath); return(GetBookInformation(GetContentDiv(html))); } // continue as normal webber = new Webber(domainHost); html = webber.GetStringAsync(firstpage).Result; // 2. parse to get links of chapters links = GetLinks(html); // 3. get content div var contentDiv = GetContentDiv(html); // 4. get book information: title, publisher, author var book = GetBookInformation(contentDiv); // only 1 page! if (links == null) { links = new List <KeyValuePair <string, string> >() { new KeyValuePair <string, string>(book.Title, firstpage) }; } // 6. get table of content to book book.TableOfContent = HtmlTableOfContent(); // 7. loop and download each page per chapter var count = 1; foreach (var link in links) { // current chapter System.Console.WriteLine(link.Key); // 8. download each page/content html = webber.GetStringAsync(link.Value).Result; // 9. get main contain of chapter/page var div = GetContentDiv(html); // 10. download images var images = FixImages(div); // 11. add to book chapter book.Chapters.Add(new Chapter { Title = link.Key, Content = div, Number = count, Images = images }); count = count + 1; } return(book); }
public GeneralSite(string domainHost) { this.domainHost = domainHost; webber = new Webber(domainHost); dataDeligate = webber.GetStringAsync; }