public string GetCoverImage(EBook book) { HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); WebRequestHandler wrh = new WebRequestHandler(); WebClientHandler wch = new WebClientHandler(); EmailHandler eh = new EmailHandler(); //Search URL: https://www.goodreads.com/search?q= //Search Result URL: /book/show/77566.Hyperion //has the link to the page with the image we want string hostSiteSearchPage = "https://www.goodreads.com/search?q="; string htmlData = wrh.ProcessWebRequest(hostSiteSearchPage, book.uriBookTitle); string coverImageLink = GetCoverImageLink(htmlData, book); //Page that has the image we want hostSiteSearchPage = "https://www.goodreads.com"; htmlData = wrh.ProcessWebRequest(hostSiteSearchPage, coverImageLink); htmlDoc.LoadHtml(htmlData); var link = htmlDoc.DocumentNode.Descendants("img") .First(x => x.Attributes["id"] != null && x.Attributes["id"].Value == "coverImage"); string coverImageHref = link.Attributes["src"].Value; if (coverImageHref != "") { var temp = coverImageHref.Split('.'); book.coverImageLocation += "." + temp[temp.Length - 1]; return(coverImageHref); } return(""); }
/* * Main function for program. * TODO: init */ public void DownloadBook() { EBook book = new EBook(); WebRequestHandler wrh = new WebRequestHandler(); HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); HTMLScraper scraper = new HTMLScraper(); EmailHandler emailHandler = new EmailHandler(); WebClientHandler wch = new WebClientHandler(); string downloadFolder = ""; string coverImageHref = ""; string hostSiteSearchPage = "http://libgen.io/search.php?req="; string htmlData = ""; string filePath = ""; string downloadPage = ""; string downloadLink = ""; try { tbLog.Text += "Initializing"; lblDownloadPercent.Text = "Downloading"; book.bookTitle = tbBookName.Text; book.author = tbAuthor.Text; downloadFolder = tbDownloadFolder.Text; Directory.CreateDirectory(downloadFolder); book.uriBookTitle = Regex.Replace(book.bookTitle + " - " + book.author, @"\s+", "+"); filePath = downloadFolder + "\\" + book.bookTitle + " - " + book.author + book.fileType; //C:\users\Alex\Downloads\Outliers-Malcolm Gladwell.mobi book.coverImageLocation = downloadFolder + "\\" + book.bookTitle + " Cover"; //Parse htmldata with XPath and return link for file types mobi, epub, pdf //returns htmlData page for initial search htmlData = wrh.ProcessWebRequest(hostSiteSearchPage, book.uriBookTitle); //Will set book.filetype, returns html with link to download downloadPage = scraper.GetBookLinks(htmlData, book); book.fileLocation = filePath + book.fileType; htmlData = wrh.ProcessWebRequest(downloadPage, ""); downloadLink = scraper.GetDownloadLink(htmlData); if (downloadLink != "") { lblDownloadPercent.Text = "Downloading " + book.fileType; //Download the EBook Download(downloadLink, book.fileLocation, book); if (book.fileType == ".pdf" || book.fileType == ".epub") { coverImageHref = scraper.GetCoverImage(book); Download(coverImageHref, book.coverImageLocation, book); } emailHandler.SendEmail(book, tbKindleEmail.Text); } else { tbLog.Text += "No link results"; } } catch (Exception s) { //TODO Log error } }