/// <summary> /// Retrieves the book's description, image URL, and rating from the book's Amazon URL. /// </summary> /// <param name="amazonUrl">Book's Amazon URL</param> public void GetAmazonInfo(string amazonUrl) { if (amazonUrl == "") { return; } HtmlDocument bookDoc = new HtmlDocument() { OptionAutoCloseOnEnd = true }; bookDoc.LoadHtml(HttpDownloader.GetPageHtml(amazonUrl)); GetAmazonInfo(bookDoc); }
public static BookInfo AmazonSearchBook(string title, string author) { BookInfo result = null; string authorTrim = ""; Regex regex = new Regex(@"( [A-Z]\.)", RegexOptions.Compiled); Match match = Regex.Match(author, @"( [A-Z]\.)", RegexOptions.Compiled); if (match.Success) { foreach (Match m in regex.Matches(author)) { authorTrim = author.Replace(m.Value, m.Value.Trim()); } } else { authorTrim = author; } if (title.IndexOf(" (") >= 0) { title = title.Substring(0, title.IndexOf(" (")); } string searchUrl = @"http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Ddigital-text&field-keywords=" + Uri.EscapeDataString(title + " " + authorTrim + " kindle edition"); HAP.HtmlDocument searchDoc = new HAP.HtmlDocument(); searchDoc.LoadHtml(HttpDownloader.GetPageHtml(searchUrl)); HAP.HtmlNode node = searchDoc.DocumentNode.SelectSingleNode("//li[@id='result_0']"); //At least attempt to verify it might be the same book? //Ignore case of title if (node != null && node.InnerText.IndexOf(title, StringComparison.OrdinalIgnoreCase) >= 0) { string foundASIN = node.GetAttributeValue("data-asin", ""); node = node.SelectSingleNode(".//div/div/div/div[@class='a-fixed-left-grid-col a-col-right']/div/a"); if (node != null) { result = new BookInfo(node.InnerText, author, foundASIN); result.amazonUrl = node.GetAttributeValue("href", ""); // Grab the true link for good measure } } return(result); }
public static string GetPageHtml(string url) { HttpDownloader http = new HttpDownloader(url); return(http.GetPage()); }
public async Task Populate(string inputFile) { string input; using (StreamReader streamReader = new StreamReader(inputFile, Encoding.UTF8)) input = streamReader.ReadToEnd(); ilOtherBooks.Images.Clear(); dgvOtherBooks.Rows.Clear(); JObject sa = JObject.Parse(input); var tempData = sa["data"]["seriesPosition"]; if (tempData != null) { string position = tempData["positionInSeries"].ToString(); string total = tempData["totalInSeries"].ToString(); string name = tempData["seriesName"].ToString(); lblSeries.Text = $"This is book {position} of {total} in {name}"; if (position == "1") { pbPreviousCover.Visible = false; lblPreviousHeading.Visible = false; lblPreviousTitle.Visible = false; lblSeries.Left = 12; lblSeries.Width = 312; } else { lblSeries.Left = 80; lblSeries.Width = 244; pbPreviousCover.Visible = true; lblPreviousHeading.Visible = true; lblPreviousTitle.Visible = true; } } else { lblSeries.Text = "This book is not part of a series..."; pbPreviousCover.Image = Resources.missing_image; lblPreviousHeading.Visible = false; lblPreviousTitle.Visible = false; } tempData = sa["data"]["popularHighlightsText"]?["localizedText"]?["en-US"]; if (tempData != null) { Match popularHighlightsText = Regex.Match(tempData.ToString(), @"((\d+) passages have been highlighted (\d+) times)"); if (popularHighlightsText.Success) { lblHighlights.Text = popularHighlightsText.Groups[1].Value; } } tempData = sa["data"]["bookDescription"]; if (tempData != null) { lblTitle.Text = tempData["title"].ToString(); lblAuthor.Text = tempData["authors"][0].ToString(); titlePopup = lblAuthor.Text; lblDescription.Text = tempData["description"].ToString(); descriptionPopup = lblDescription.Text; Match rating = Regex.Match(tempData["amazonRating"].ToString(), @"(\d+)"); if (rating.Success) { pbRating.Image = (Image)Resources.ResourceManager.GetObject($"STAR{rating.Groups[1].Value}"); } lblVotes.Text = $"({tempData["numberOfReviews"]} votes)"; } tempData = sa["data"]["authorBios"]?["authors"]?[0]; if (tempData != null) { string imageUrl = tempData["imageUrl"]?.ToString() ?? ""; if (imageUrl != "") { pbAuthorImage.Image = Functions.MakeGrayscale3(await HttpDownloader.GetImage(imageUrl)); } lblBiography.Text = tempData["bio"]?.ToString(); biographyPopup = lblBiography.Text; } tempData = sa["data"]["authorRecs"]?["recommendations"]; if (tempData != null) { // TODO: Figure out why otherBooks is here but not used //var otherBooks = new List<Tuple<string, string, string, string>>(); foreach (var rec in tempData) { string imageUrl = rec["imageUrl"]?.ToString() ?? ""; string author = rec["authors"][0].ToString(); string title = rec["title"].ToString(); //otherBooks.Add(new Tuple<string, string, string, string>(rec["asin"].ToString(), title, author, imageUrl)); if (imageUrl != "") { ilOtherBooks.Images.Add(Functions.MakeGrayscale3(await HttpDownloader.GetImage(imageUrl))); } dgvOtherBooks.Rows.Add(ilOtherBooks.Images[ilOtherBooks.Images.Count - 1], $"{title}\n{author}"); } } tempData = sa["data"]["readingTime"]; if (tempData != null) { lblReadingTime.Text = $"{tempData["hours"]} hours and {tempData["minutes"]} minutes to read"; tempData = sa["data"]["readingPages"]; if (tempData != null) { lblReadingTime.Text = $"{lblReadingTime.Text} ({tempData["pagesInBook"]} pages)"; } } tempData = sa["data"]["previousBookInTheSeries"]; if (tempData != null) { lblPreviousTitle.Text = tempData["title"].ToString(); string imageUrl = tempData["imageUrl"]?.ToString() ?? ""; if (imageUrl != "") { pbPreviousCover.Image = Functions.MakeGrayscale3(await HttpDownloader.GetImage(imageUrl)); } } }
/// <summary> /// Search Shelfari for series info, scrape series page, and return next title in series. /// </summary> /// <param name="searchHtmlDoc">Book's Shelfari page, pre-downloaded</param> private string GetNextInSeriesTitle2(HtmlAgilityPack.HtmlDocument searchHtmlDoc) { bool hasSeries = false; string series = ""; string seriesShort = ""; string seriesURL = ""; int currentSeriesIndex = 0; int currentSeriesCount = 0; string nextTitle = ""; //Check if book's Shelfari page contains series info HtmlAgilityPack.HtmlNode node = searchHtmlDoc.DocumentNode.SelectSingleNode("//span[@class='series']"); if (node != null) { //Series name and book number series = node.InnerText.Trim(); //Convert book number string to integer Int32.TryParse(series.Substring(series.LastIndexOf(" ") + 1), out currentSeriesIndex); //Parse series Shelfari URL seriesURL = node.SelectSingleNode("//span[@class='series']/a[@href]") .GetAttributeValue("href", ""); seriesShort = node.FirstChild.InnerText.Trim(); //Add series name and book number to log, if found searchHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(String.Format(seriesURL))); //Parse number of books in series and convert to integer node = searchHtmlDoc.DocumentNode.SelectSingleNode("//h2[@class='f_m']"); string test = node.FirstChild.InnerText.Trim(); Match match = Regex.Match(test, @"\d+"); if (match.Success) { Int32.TryParse(match.Value, out currentSeriesCount); } hasSeries = true; //Check if there is a next book if (currentSeriesIndex < currentSeriesCount) { //Add series name and book number to log, if found main.Log(String.Format("This is book {0} of {1} in the {2} Series...", currentSeriesIndex, currentSeriesCount, seriesShort)); foreach (HtmlAgilityPack.HtmlNode seriesItem in searchHtmlDoc.DocumentNode.SelectNodes(".//ol/li")) { node = seriesItem.SelectSingleNode(".//div/span[@class='series bold']"); if (node != null) { if (node.InnerText.Contains((currentSeriesIndex + 1).ToString())) { node = seriesItem.SelectSingleNode(".//h3/a"); //Parse title of the next book nextTitle = node.InnerText.Trim(); //Add next book in series to log, if found main.Log(String.Format("The next book in this series is {0}!", nextTitle)); return(nextTitle); } } } } if (hasSeries) { return(""); } } return(""); }
private BookInfo GetNextInSeries() { BookInfo nextBook = null; if (curBook.shelfariUrl == "") { return(null); } // Get title of next book HtmlAgilityPack.HtmlDocument searchHtmlDoc = new HtmlAgilityPack.HtmlDocument(); searchHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(curBook.shelfariUrl)); string nextTitle = GetNextInSeriesTitle(searchHtmlDoc); // If search failed, try other method //if (nextTitle == "") // nextTitle = GetNextInSeriesTitle2(searchHtmlDoc); if (nextTitle != "") { // Search author's other books for the book (assumes next in series was written by the same author...) // Returns the first one found, though there should probably not be more than 1 of the same name anyway nextBook = authorProfile.otherBooks.FirstOrDefault(bk => bk.title == nextTitle); if (nextBook == null) { // Attempt to search Amazon for the book instead nextBook = Functions.AmazonSearchBook(nextTitle, curBook.author); if (nextBook != null) { nextBook.GetAmazonInfo(nextBook.amazonUrl); //fill in desc, imageurl, and ratings } } // Try to fill in desc, imageurl, and ratings using Shelfari Kindle edition link instead if (nextBook == null) { HtmlDocument bookDoc = new HtmlDocument() { OptionAutoCloseOnEnd = true }; bookDoc.LoadHtml(HttpDownloader.GetPageHtml(nextShelfariUrl)); Match match = Regex.Match(bookDoc.DocumentNode.InnerHtml, "('B[A-Z0-9]{9}')"); if (match.Success) { string cleanASIN = match.Value.Replace("'", String.Empty); nextBook = new BookInfo(nextTitle, curBook.author, cleanASIN); nextBook.GetAmazonInfo("http://www.amazon.com/dp/" + cleanASIN); } } if (nextBook == null) { main.Log("Book was found to be part of a series, but next book could not be found.\r\n" + "Please report this book and the Shelfari URL and output log to improve parsing."); } } else if (curBook.seriesPosition != curBook.totalInSeries) { main.Log("Unable to find next book in series, the book may not be part of a series, or it is the latest release."); } if (previousTitle != "") { if (curBook.previousInSeries == null) { // Attempt to search Amazon for the book curBook.previousInSeries = Functions.AmazonSearchBook(previousTitle, curBook.author); if (curBook.previousInSeries != null) { curBook.previousInSeries.GetAmazonInfo(curBook.previousInSeries.amazonUrl); //fill in desc, imageurl, and ratings } // Try to fill in desc, imageurl, and ratings using Shelfari Kindle edition link instead if (curBook.previousInSeries == null) { HtmlDocument bookDoc = new HtmlDocument() { OptionAutoCloseOnEnd = true }; bookDoc.LoadHtml(HttpDownloader.GetPageHtml(previousShelfariUrl)); Match match = Regex.Match(bookDoc.DocumentNode.InnerHtml, "('B[A-Z0-9]{9}')"); if (match.Success) { string cleanASIN = match.Value.Replace("'", String.Empty); curBook.previousInSeries = new BookInfo(previousTitle, curBook.author, cleanASIN); curBook.previousInSeries.GetAmazonInfo("http://www.amazon.com/dp/" + cleanASIN); } } } else { main.Log("Book was found to be part of a series, but previous book could not be found.\r\n" + "Please report this book and the Shelfari URL and output log to improve parsing."); } } return(nextBook); }
public bool complete = false; //Set if constructor succeeds in gathering data //Requires an already-built AuthorProfile and the BaseEndActions.txt file public EndActions(AuthorProfile ap, BookInfo book, long erl, frmMain frm) { authorProfile = ap; curBook = book; _erl = erl; main = frm; main.Log("Attempting to find book on Amazon..."); //Generate Book search URL from book's ASIN string ebookLocation = @"http://www.amazon.com/dp/" + book.asin; // Search Amazon for book main.Log("Book found on Amazon!"); main.Log(String.Format("Book's Amazon page URL: {0}", ebookLocation)); HtmlDocument bookHtmlDoc = new HtmlDocument { OptionAutoCloseOnEnd = true }; try { bookHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(ebookLocation)); } catch (Exception ex) { main.Log(String.Format("An error ocurred while downloading book's Amazon page: {0}\r\nYour ASIN may not be correct.", ex.Message)); return; } if (Properties.Settings.Default.saveHtml) { try { main.Log("Saving book's Amazon webpage..."); File.WriteAllText(Environment.CurrentDirectory + String.Format(@"\dmp\{0}.bookpageHtml.txt", curBook.asin), bookHtmlDoc.DocumentNode.InnerHtml); } catch (Exception ex) { main.Log(String.Format("An error ocurred saving bookpageHtml.txt: {0}", ex.Message)); } } try { curBook.GetAmazonInfo(bookHtmlDoc); } catch (Exception ex) { main.Log(String.Format("An error ocurred parsing Amazon info: {0}", ex.Message)); return; } main.Log("Gathering recommended book info..."); //Parse Recommended Author titles and ASINs try { HtmlNodeCollection recList = bookHtmlDoc.DocumentNode.SelectNodes("//li[@class='a-carousel-card a-float-left']"); if (recList == null) { main.Log("Could not find related book list page on Amazon.\r\nUnable to create End Actions."); } if (recList != null) { foreach (HtmlNode item in recList.Where(item => item != null)) { HtmlNode nodeTitle = item.SelectSingleNode(".//div/a"); string nodeTitleCheck = nodeTitle.GetAttributeValue("title", ""); string nodeUrl = nodeTitle.GetAttributeValue("href", ""); string cleanAuthor = ""; if (nodeUrl != "") { nodeUrl = "http://www.amazon.com" + nodeUrl; } if (nodeTitleCheck == "") { nodeTitle = item.SelectSingleNode(".//div/a"); //Remove CR, LF and TAB nodeTitleCheck = nodeTitle.InnerText.CleanString(); } cleanAuthor = item.SelectSingleNode(".//div/div").InnerText.CleanString(); Match match = Regex.Match(nodeTitleCheck, @"Series Reading Order|Edition|eSpecial|\([0-9]+ Book Series\)", RegexOptions.IgnoreCase); if (match.Success) { nodeTitleCheck = ""; continue; } BookInfo newBook = new BookInfo(nodeTitleCheck, cleanAuthor, item.SelectSingleNode(".//div").GetAttributeValue("data-asin", "")); try { //Gather book desc, image url, etc, if using new format if (settings.useNewVersion) { newBook.GetAmazonInfo(nodeUrl); } custAlsoBought.Add(newBook); } catch (Exception ex) { main.Log(String.Format("{0}\r\n{1}\r\nContinuing anyway...", ex.Message, nodeUrl)); } } } } catch (Exception ex) { main.Log("An error occurred parsing the book's amazon page: " + ex.Message); return; } SetPaths(); complete = true; }
public bool GetShelfari() { //Download HTML of Shelfari URL, try 3 times just in case it fails the first time main.Log(String.Format("Downloading Shelfari page... {0}", useSpoilers ? "SHOWING SPOILERS!" : "")); main.Log(String.Format("Shelfari URL: {0}", shelfariURL)); var shelfariHtml = ""; var tries = 3; do { try { //Enable cookies var jar = new CookieContainer(); var client = new HttpDownloader(shelfariURL, jar, "", ""); if (useSpoilers) { //Grab book ID from url (search for 5 digits between slashes) and create spoiler cookie var bookId = Regex.Match(shelfariURL, @"\/\d{5}").Value.Substring(1, 5); var spoilers = new Cookie("ShelfariBookWikiSession", "", "/", "www.shelfari.com") { Value = "{\"SpoilerShowAll\":true%2C\"SpoilerShowCharacters\":true%2C\"SpoilerBookId\":" + bookId + "%2C\"SpoilerShowPSS\":true%2C\"SpoilerShowQuotations\":true%2C\"SpoilerShowParents\":true%2C\"SpoilerShowThemes\":true}" }; jar.Add(spoilers); } shelfariHtml = client.GetPage(); break; } catch { if (tries <= 0) { main.Log("Failed to connect to Shelfari URL."); return false; } } } while (tries-- > 0); //Constants for wiki processing Dictionary<string, string> sections = new Dictionary<string, string> { {"WikiModule_Characters", "character"}, {"WikiModule_Organizations", "topic"}, {"WikiModule_Settings", "topic"}, {"WikiModule_Glossary", "topic"} }; //, {"WikiModule_Themes", "topic"} }; string[] patterns = {@""""}; //, @"\[\d\]", @"\s*?\(.*\)\s*?" }; //Escape quotes, numbers in brackets, and anything within brackets at all string[] replacements = {@"\"""}; //Parse elements from various headers listed in sections HtmlAgilityPack.HtmlDocument shelfariDoc = new HtmlAgilityPack.HtmlDocument(); shelfariDoc.LoadHtml(shelfariHtml); foreach (string header in sections.Keys) { if (!shelfariHtml.Contains(header)) continue; //Skip section if not found on page //Select <li> nodes on page from within the <div id=header> tag, under <ul class=li_6> HtmlNodeCollection characterNodes = shelfariDoc.DocumentNode.SelectNodes("//div[@id='" + header + "']//ul[@class='li_6']/li"); foreach (HtmlNode li in characterNodes) { string tmpString = li.InnerText; Term newTerm = new Term(sections[header]); //Create term as either character/topic if (tmpString.Contains(":")) { newTerm.TermName = tmpString.Substring(0, tmpString.IndexOf(":")); newTerm.Desc = tmpString.Substring(tmpString.IndexOf(":") + 1).Replace("&", "&").Trim(); } else { newTerm.TermName = tmpString; } //newTerm.TermName = newTerm.TermName.PregReplace(patterns, replacements); //newTerm.Desc = newTerm.Desc.PregReplace(patterns, replacements); newTerm.DescSrc = "shelfari"; //Use either the associated shelfari URL of the term or if none exists, use the book's url //Could use a wikipedia page instead as the xray plugin/site does but I decided not to newTerm.DescUrl = (li.InnerHtml.IndexOf("<a href") == 0 ? li.InnerHtml.Substring(9, li.InnerHtml.IndexOf("\"", 9) - 9) : shelfariURL); if (header == "WikiModule_Glossary") newTerm.MatchCase = false; //Default glossary terms to be case insensitive when searching through book if (Terms.Select<Term, string>(t => t.TermName).Contains<string>(newTerm.TermName)) main.Log("Duplicate term \"" + newTerm.TermName + "\" found. Ignoring this duplicate."); else Terms.Add(newTerm); } } // Scrape quotes to attempt matching in ExpandRawML if (Properties.Settings.Default.useNewVersion) { HtmlNodeCollection quoteNodes = shelfariDoc.DocumentNode.SelectNodes("//div[@id='WikiModule_Quotations']/div/ul[@class='li_6']/li"); if (quoteNodes != null) { foreach (HtmlNode quoteNode in quoteNodes) { HtmlNode node = quoteNode.SelectSingleNode(".//blockquote"); if (node == null) continue; string quote = node.InnerText; string character = ""; node = quoteNode.SelectSingleNode(".//cite"); if (node != null) character = node.InnerText; // Remove quotes (sometimes people put unnecessary quotes in the quote as well) quote = Regex.Replace(quote, "^(“){1,2}", ""); quote = Regex.Replace(quote, "(”){1,2}$", ""); notableShelfariQuotes.Add(new string[] {quote, character}); } } } return true; }
public bool complete = false; //Set if constructor succeeded in generating profile public AuthorProfile(BookInfo nBook, frmMain frm) { this.curBook = nBook; this.main = frm; string outputDir; try { if (settings.android) { outputDir = settings.outDir + @"\Android\" + curBook.asin; Directory.CreateDirectory(outputDir); } else { outputDir = settings.useSubDirectories ? Functions.GetBookOutputDirectory(curBook.author, curBook.sidecarName) : settings.outDir; } } catch (Exception ex) { main.Log("Failed to create output directory: " + ex.Message + "\r\nFiles will be placed in the default output directory."); outputDir = settings.outDir; } ApPath = outputDir + @"\AuthorProfile.profile." + curBook.asin + ".asc"; if (!Properties.Settings.Default.overwrite && File.Exists(ApPath)) { main.Log("AuthorProfile file already exists... Skipping!\r\n" + "Please review the settings page if you want to overwite any existing files."); return; } //Process GUID. If in decimal form, convert to hex. if (Regex.IsMatch(curBook.guid, "/[a-zA-Z]/")) { curBook.guid = curBook.guid.ToUpper(); } else { long guidDec; long.TryParse(curBook.guid, out guidDec); curBook.guid = guidDec.ToString("X"); } if (curBook.guid == "0") { main.Log("Something bad happened while converting the GUID."); return; } //Generate Author search URL from author's name string newAuthor = Functions.FixAuthor(curBook.author); string plusAuthorName = newAuthor.Replace(" ", "+"); string amazonAuthorSearchUrl = @"http://www.amazon.com/s/?url=search-alias%3Dstripbooks&field-keywords=" + plusAuthorName; main.Log("Searching for author's page on Amazon..."); // Search Amazon for Author HtmlDocument authorHtmlDoc = new HtmlDocument { OptionAutoCloseOnEnd = true }; string authorsearchHtml = HttpDownloader.GetPageHtml(amazonAuthorSearchUrl); authorHtmlDoc.LoadHtml(authorsearchHtml); if (Properties.Settings.Default.saveHtml) { try { main.Log("Saving Amazon's author search webpage..."); File.WriteAllText(Environment.CurrentDirectory + String.Format(@"\dmp\{0}.authorsearchHtml.txt", curBook.asin), authorHtmlDoc.DocumentNode.InnerHtml); } catch (Exception ex) { main.Log(String.Format("An error ocurred saving authorsearchHtml.txt: {0}", ex.Message)); } } // Try to find Author's page from Amazon search HtmlNode node = authorHtmlDoc.DocumentNode.SelectSingleNode("//*[@id='result_1']"); if (node == null || !node.OuterHtml.Contains("/e/B")) { main.Log("Could not find author's page on Amazon.\r\nUnable to create Author Profile.\r\nEnsure the author metadata field matches the author's name exactly.\r\nSearch results can be viewed at " + amazonAuthorSearchUrl); return; } authorAsin = node.OuterHtml; int index1 = authorAsin.IndexOf("data-asin"); if (index1 > 0) { authorAsin = authorAsin.Substring(index1 + 11, 10); } node = node.SelectSingleNode("//*[@id='result_1']/div/div/div/div/a"); string properAuthor = node.GetAttributeValue("href", "not found"); if (properAuthor == "not found" || properAuthor.IndexOf('/', 1) < 3) { main.Log("Found author's page, but could not parse URL properly. Report this URL on the MobileRead thread: " + amazonAuthorSearchUrl); return; } properAuthor = properAuthor.Substring(1, properAuthor.IndexOf('/', 1) - 1); string authorAmazonWebsiteLocationLog = @"http://www.amazon.com/" + properAuthor + "/e/" + authorAsin; string authorAmazonWebsiteLocation = @"http://www.amazon.com/" + properAuthor + "/e/" + authorAsin + "/ref=la_" + authorAsin + "_rf_p_n_feature_browse-b_2?fst=as%3Aoff&rh=n%3A283155%2Cp_82%3A" + authorAsin + "%2Cp_n_feature_browse-bin%3A618073011&bbn=283155&ie=UTF8&qid=1432378570&rnid=618072011"; main.Log("Author page found on Amazon!"); main.Log(String.Format("Author's Amazon Page URL: {0}", authorAmazonWebsiteLocationLog)); // Load Author's Amazon page string authorpageHtml = HttpDownloader.GetPageHtml(authorAmazonWebsiteLocation); authorHtmlDoc.LoadHtml(authorpageHtml); if (Properties.Settings.Default.saveHtml) { try { main.Log("Saving author's Amazon webpage..."); File.WriteAllText(Environment.CurrentDirectory + String.Format(@"\dmp\{0}.authorpageHtml.txt", curBook.asin), authorHtmlDoc.DocumentNode.InnerHtml); } catch (Exception ex) { main.Log(String.Format("An error ocurred saving authorpageHtml.txt: {0}", ex.Message)); } } // Try to find Author's Biography HtmlNode bio = authorHtmlDoc.DocumentNode.SelectSingleNode("//div[@id='ap-bio' and @class='a-row']/div/div/span"); //Trim authour biography to less than 1000 characters and/or replace more problematic characters. if (bio.InnerText.Trim().Length != 0) { if (bio.InnerText.Length > 1000) { int lastPunc = bio.InnerText.LastIndexOfAny(new char[] { '.', '!', '?' }); int lastSpace = bio.InnerText.LastIndexOf(' '); if (lastPunc > lastSpace) { BioTrimmed = bio.InnerText.Substring(0, lastPunc + 1); } else { BioTrimmed = bio.InnerText.Substring(0, lastSpace) + '\u2026'; } } else { BioTrimmed = bio.InnerText; } BioTrimmed = Functions.CleanString(BioTrimmed); main.Log("Author biography found on Amazon!"); } else { BioTrimmed = "No author biography found on Amazon!"; main.Log("No author biography found on Amazon!"); } // Try to download Author image HtmlNode imageXpath = authorHtmlDoc.DocumentNode.SelectSingleNode("//div[@id='ap-image']/img"); authorImageUrl = imageXpath.GetAttributeValue("src", ""); string downloadedAuthorImage = curBook.path + @"\DownloadedAuthorImage.jpg"; try { using (WebClient webClient = new WebClient()) { webClient.DownloadFile(new Uri(authorImageUrl), downloadedAuthorImage); main.Log("Downloading author image..."); } } catch (Exception ex) { main.Log(String.Format("Failed to download author image: {0}", ex.Message)); return; } main.Log("Resizing and cropping Author image..."); //Resize and Crop Author image Bitmap o = (Bitmap)Image.FromFile(downloadedAuthorImage); Bitmap nb = new Bitmap(o, o.Width, o.Height); int sourceWidth = o.Width; int sourceHeight = o.Height; float nPercent; float nPercentW = (185 / (float)sourceWidth); float nPercentH = (278 / (float)sourceHeight); nPercent = nPercentH > nPercentW ? nPercentH : nPercentW; int destWidth = (int)(sourceWidth * nPercent); int destHeight = (int)(sourceHeight * nPercent); Bitmap b = new Bitmap(destWidth, destHeight); Graphics g = Graphics.FromImage(b); g.InterpolationMode = InterpolationMode.HighQualityBicubic; g.SmoothingMode = SmoothingMode.HighQuality; g.PixelOffsetMode = PixelOffsetMode.HighQuality; g.CompositingQuality = CompositingQuality.HighQuality; g.CompositingMode = CompositingMode.SourceOver; ImageAttributes ia = new ImageAttributes(); ia.SetWrapMode(WrapMode.TileFlipXY); g.DrawImage(nb, 0, 0, destWidth, destHeight); b.Save(curBook.path + @"\ResizedAuthorImage.jpg"); b.Dispose(); g.Dispose(); o.Dispose(); nb.Dispose(); Bitmap target = new Bitmap(185, destHeight); Rectangle cropRect = new Rectangle(((destWidth - 185) / 2), 0, 185, destHeight); using (g = Graphics.FromImage(target)) { g.DrawImage(Image.FromFile(curBook.path + @"\ResizedAuthorImage.jpg"), new Rectangle(0, 0, target.Width, target.Height), cropRect, GraphicsUnit.Pixel); } target.Save(curBook.path + @"\CroppedAuthorImage.jpg"); target.Dispose(); Bitmap bc = new Bitmap(curBook.path + @"\CroppedAuthorImage.jpg"); //Convert Author image to Grayscale and save as jpeg Bitmap bgs = Functions.MakeGrayscale3(bc); ImageCodecInfo[] availableCodecs = ImageCodecInfo.GetImageEncoders(); ImageCodecInfo jpgCodec = availableCodecs.FirstOrDefault(codec => codec.MimeType == "image/jpeg"); if (jpgCodec == null) { throw new NotSupportedException("Encoder for JPEG not found."); } EncoderParameters encoderParams = new EncoderParameters(1); encoderParams.Param[0] = new EncoderParameter(Encoder.ColorDepth, 8L); bgs.Save(curBook.path + @"\FinalImage.jpg", jpgCodec, encoderParams); int authorImageHeight = bgs.Height; bc.Dispose(); //Convert final grayscale Author image to Base64 Format String string base64ImageString = Functions.ImageToBase64(bgs, ImageFormat.Jpeg); main.Log("Grayscale Base-64 encoded author image created!"); bgs.Dispose(); main.Log("Gathering author's other books..."); List <BookInfo> bookList = new List <BookInfo>(); HtmlNodeCollection resultsNodes = authorHtmlDoc.DocumentNode.SelectNodes("//div[@id='mainResults']/ul/li"); foreach (HtmlNode result in resultsNodes) { if (!result.Id.StartsWith("result_")) { continue; } string name, url, asin = ""; HtmlNode otherBook = result.SelectSingleNode(".//div[@class='a-row a-spacing-small']/a/h2"); Match match = Regex.Match(otherBook.InnerText, @"Series Reading Order|Edition|eSpecial|\([0-9]+ Book Series\)", RegexOptions.IgnoreCase); if (match.Success) { continue; } name = otherBook.InnerText; otherBook = result.SelectSingleNode(".//*[@title='Kindle Edition']"); match = Regex.Match(otherBook.OuterHtml, "dp/(B[A-Z0-9]{9})/"); if (match.Success) { asin = match.Groups[1].Value; } //url = otherBook.GetAttributeValue("href", ""); //url = otherBook.GetAttributeValue("href", ""). // Substring(0, otherBook.GetAttributeValue("href", ""). // IndexOf(match.Groups[1].Value) + // match.Groups[1].Length); url = String.Format("http://www.amazon.com/dp/{0}", asin); if (name != "" && url != "" && asin != "") { BookInfo newBook = new BookInfo(name, curBook.author, asin); newBook.amazonUrl = url; bookList.Add(newBook); } } main.Log("Gathering metadata for other books..."); foreach (BookInfo book in bookList) { try { //Gather book desc, image url, etc, if using new format if (settings.useNewVersion) { book.GetAmazonInfo(book.amazonUrl); } otherBooks.Add(book); } catch (Exception ex) { main.Log(String.Format("{0}\r\nURL: {1}\r\nBook: {2}\r\nContinuing anyway...", ex.Message, book.amazonUrl, book.title)); } } main.Log("Writing Author Profile to file..."); //Create list of Asin numbers and titles List <string> authorsOtherBookList = new List <string>(); foreach (BookInfo bk in otherBooks) { authorsOtherBookList.Add(String.Format(@"{{""e"":1,""a"":""{0}"",""t"":""{1}""}}", bk.asin, bk.title)); } //Create finalAuthorProfile.profile.ASIN.asc int unixTimestamp = (Int32)(DateTime.UtcNow.Subtract(new DateTime(1970, 1, 1))).TotalSeconds; try { string authorProfileOutput = @"{""u"":[{""y"":" + authorImageHeight + @",""l"":[""" + string.Join(@""",""", otherBooks.Select(book => book.asin).ToArray()) + @"""],""n"":""" + curBook.author + @""",""a"":""" + authorAsin + @""",""b"":""" + BioTrimmed + @""",""i"":""" + base64ImageString + @"""}],""a"":""" + String.Format(@"{0}"",""d"":{1},""o"":[", curBook.asin, unixTimestamp) + string.Join(",", authorsOtherBookList.ToArray()) + "]}"; File.WriteAllText(ApPath, authorProfileOutput); main.btnPreview.Enabled = true; main.cmsPreview.Items[0].Enabled = true; main.Log("Author Profile file created successfully!\r\nSaved to " + ApPath); } catch (Exception ex) { main.Log("An error occurred while writing the Author Profile file: " + ex.Message); return; } ApTitle = "About " + curBook.author; ApSubTitle = "Kindle Books By " + curBook.author; ApAuthorImage = Image.FromFile(curBook.path + @"\FinalImage.jpg"); EaSubTitle = "More Books By " + curBook.author; complete = true; }
// TODO: Review this... public async Task <bool> Generate() { string outputDir; try { if (_settings.Android) { outputDir = _settings.OutDir + @"\Android\" + _curBook.asin; Directory.CreateDirectory(outputDir); } else { outputDir = _settings.UseSubDirectories ? Functions.GetBookOutputDirectory(_curBook.author, _curBook.sidecarName, true) : _settings.OutDir; } } catch (Exception ex) { Logger.Log("An error occurred creating output directory: " + ex.Message + "\r\nFiles will be placed in the default output directory."); outputDir = _settings.OutDir; } string ApPath = outputDir + @"\AuthorProfile.profile." + _curBook.asin + ".asc"; if (!Properties.Settings.Default.overwrite && File.Exists(ApPath)) { Logger.Log("AuthorProfile file already exists... Skipping!\r\n" + "Please review the settings page if you want to overwite any existing files."); return(false); } DataSources.AuthorSearchResults searchResults = null; // Attempt to download from the alternate site, if present. If it fails in some way, try .com // If the .com search crashes, it will crash back to the caller in frmMain try { searchResults = await DataSources.Amazon.SearchAuthor(_curBook, _settings.AmazonTld); } catch (Exception ex) { Logger.Log("Error searching Amazon." + _settings.AmazonTld + ": " + ex.Message + "\r\n" + ex.StackTrace); } finally { if (searchResults == null) { Logger.Log(String.Format("Failed to find {0} on Amazon." + _settings.AmazonTld, _curBook.author)); if (_settings.AmazonTld != "com") { Logger.Log("Trying again with Amazon.com."); _settings.AmazonTld = "com"; searchResults = await DataSources.Amazon.SearchAuthor(_curBook, _settings.AmazonTld); } } } if (searchResults == null) { return(false); // Already logged error in search function } authorAsin = searchResults.authorAsin; if (Properties.Settings.Default.saveHtml) { try { Logger.Log("Saving author's Amazon webpage..."); File.WriteAllText(Environment.CurrentDirectory + String.Format(@"\dmp\{0}.authorpageHtml.txt", _curBook.asin), searchResults.authorHtmlDoc.DocumentNode.InnerHtml); } catch (Exception ex) { Logger.Log(String.Format("An error occurred saving authorpageHtml.txt: {0}", ex.Message)); } } // Try to find author's biography string bioFile = Environment.CurrentDirectory + @"\ext\" + authorAsin + ".bio"; if (_settings.SaveBio && File.Exists(bioFile)) { if (!readBio(bioFile)) { return(false); } } if (BioTrimmed == "") { // TODO: Let users edit bio in same style as chapters and aliases HtmlNode bio = DataSources.Amazon.GetBioNode(searchResults, _settings.AmazonTld); //Trim authour biography to less than 1000 characters and/or replace more problematic characters. if (bio?.InnerText.Trim().Length > 0) { if (bio.InnerText.Length > 1000) { int lastPunc = bio.InnerText.LastIndexOfAny(new [] { '.', '!', '?' }); int lastSpace = bio.InnerText.LastIndexOf(' '); if (lastPunc > lastSpace) { BioTrimmed = bio.InnerText.Substring(0, lastPunc + 1); } else { BioTrimmed = bio.InnerText.Substring(0, lastSpace) + '\u2026'; } } else { BioTrimmed = bio.InnerText; } BioTrimmed = BioTrimmed.Clean(); Logger.Log("Author biography found on Amazon!"); } } else { File.WriteAllText(bioFile, String.Empty); if (System.Windows.Forms.DialogResult.Yes == System.Windows.Forms.MessageBox.Show( "No author biography found on Amazon!\r\nWould you like to create a biography?", "Biography", System.Windows.Forms.MessageBoxButtons.YesNo, System.Windows.Forms.MessageBoxIcon.Question, System.Windows.Forms.MessageBoxDefaultButton.Button2)) { Functions.RunNotepad(bioFile); if (!readBio(bioFile)) { return(false); } } else { BioTrimmed = "No author biography found on Amazon!"; Logger.Log("An error occurred finding the author biography on Amazon."); } } if (_settings.SaveBio) { if (!File.Exists(bioFile)) { try { Logger.Log("Saving biography to " + bioFile); using (var streamWriter = new StreamWriter(bioFile, false, System.Text.Encoding.UTF8)) { streamWriter.Write(BioTrimmed); } } catch (Exception ex) { Logger.Log("An error occurred while writing biography.\r\n" + ex.Message + "\r\n" + ex.StackTrace); return(false); } } if (System.Windows.Forms.DialogResult.Yes == System.Windows.Forms.MessageBox.Show("Would you like to open the biography file in notepad for editing?", "Biography", System.Windows.Forms.MessageBoxButtons.YesNo, System.Windows.Forms.MessageBoxIcon.Question, System.Windows.Forms.MessageBoxDefaultButton.Button2)) { Functions.RunNotepad(bioFile); if (!readBio(bioFile)) { return(false); } } } // Try to download Author image HtmlNode imageXpath = DataSources.Amazon.GetAuthorImageNode(searchResults, _settings.AmazonTld); authorImageUrl = Regex.Replace(imageXpath.GetAttributeValue("src", ""), @"_.*?_\.", string.Empty); // cleanup to match retail file image links if (authorImageUrl.Contains(@"https://images-na.ssl-images-amazon")) { authorImageUrl = authorImageUrl.Replace(@"https://images-na.ssl-images-amazon", @"http://ecx.images-amazon"); } _curBook.authorImageUrl = authorImageUrl; Bitmap ApAuthorImage; try { Logger.Log("Downloading author image..."); ApAuthorImage = await HttpDownloader.GetImage(authorImageUrl); Logger.Log("Grayscale base64-encoded author image created!"); } catch (Exception ex) { Logger.Log(String.Format("An error occurred downloading the author image: {0}", ex.Message)); return(false); } Logger.Log("Gathering author's other books..."); var bookList = DataSources.Amazon.GetAuthorBooks(searchResults, _curBook.title, _curBook.author, _settings.AmazonTld) ?? DataSources.Amazon.GetAuthorBooksNew(searchResults, _curBook.title, _curBook.author, _settings.AmazonTld); if (bookList != null) { Logger.Log("Gathering metadata for other books..."); var bookBag = new ConcurrentBag <BookInfo>(); await bookList.ParallelForEachAsync(async book => { // TODO: retry a couple times if one fails maybe try { //Gather book desc, image url, etc, if using new format if (_settings.UseNewVersion) { await book.GetAmazonInfo(book.amazonUrl); } bookBag.Add(book); } catch (Exception ex) { Logger.Log(String.Format("An error occurred gathering metadata for other books: {0}\r\nURL: {1}\r\nBook: {2}", ex.Message, book.amazonUrl, book.title)); throw; } }); otherBooks.AddRange(bookBag); } else { Logger.Log("Unable to find other books by this author. If there should be some, check the Amazon URL to ensure it is correct."); } Logger.Log("Writing Author Profile to file..."); var authorOtherBooks = otherBooks.Select(book => new Model.AuthorProfile.Book { E = 1, Asin = book.asin, Title = book.title }).ToArray(); var ap = new Model.AuthorProfile { Asin = _curBook.asin, CreationDate = Functions.UnixTimestampSeconds(), OtherBooks = authorOtherBooks, Authors = new [] { new Model.AuthorProfile.Author { Asin = authorAsin, Bio = BioTrimmed, ImageHeight = ApAuthorImage.Height, Name = _curBook.author, OtherBookAsins = otherBooks.Select(book => book.asin).ToArray(), Picture = Functions.ImageToBase64(ApAuthorImage, ImageFormat.Jpeg) } } }; string authorProfileOutput = JsonConvert.SerializeObject(ap); try { File.WriteAllText(ApPath, authorProfileOutput); Logger.Log("Author Profile file created successfully!\r\nSaved to " + ApPath); } catch (Exception ex) { Logger.Log("An error occurred while writing the Author Profile file: " + ex.Message + "\r\n" + ex.StackTrace); return(false); } ApTitle = "About " + _curBook.author; ApSubTitle = "Kindle Books By " + _curBook.author; EaSubTitle = "More Books By " + _curBook.author; return(true); }
/// <summary> /// Generate the necessities for both old and new formats /// </summary> public async Task <bool> Generate() { Logger.Log("Attempting to find book on Amazon..."); //Generate Book search URL from book's ASIN string ebookLocation = String.Format(@"https://www.amazon.{0}/dp/{1}", _settings.AmazonTld, curBook.asin); // Search Amazon for book //Logger.Log(String.Format("Book's Amazon page URL: {0}", ebookLocation)); HtmlDocument bookHtmlDoc = new HtmlDocument { OptionAutoCloseOnEnd = true }; try { bookHtmlDoc.LoadHtml(await HttpDownloader.GetPageHtmlAsync(ebookLocation)); } catch (Exception ex) { Logger.Log(String.Format("An error ocurred while downloading book's Amazon page: {0}\r\nYour ASIN may not be correct.", ex.Message)); return(false); } Logger.Log("Book found on Amazon!"); if (Properties.Settings.Default.saveHtml) { try { Logger.Log("Saving book's Amazon webpage..."); File.WriteAllText(Environment.CurrentDirectory + String.Format(@"\dmp\{0}.bookpageHtml.txt", curBook.asin), bookHtmlDoc.DocumentNode.InnerHtml); } catch (Exception ex) { Logger.Log(String.Format("An error ocurred saving bookpageHtml.txt: {0}", ex.Message)); } } try { curBook.GetAmazonInfo(bookHtmlDoc); } catch (Exception ex) { Logger.Log(String.Format("An error ocurred parsing Amazon info: {0}", ex.Message)); return(false); } Logger.Log("Gathering recommended book metadata..."); //Parse Recommended Author titles and ASINs try { var recList = bookHtmlDoc.DocumentNode.SelectNodes("//ol[@class='a-carousel' and @role='list']/li[@class='a-carousel-card a-float-left']"); if (recList != null) { var possibleBooks = new List <BookInfo>(); foreach (HtmlNode item in recList.Where(item => item != null)) { HtmlNode nodeTitle = item.SelectSingleNode(".//div/a"); var nodeTitleCheck = nodeTitle.GetAttributeValue("title", ""); var nodeUrl = nodeTitle.GetAttributeValue("href", ""); if (nodeUrl != "") { nodeUrl = "https://www.amazon." + _settings.AmazonTld + nodeUrl; } if (nodeTitleCheck == "") { nodeTitle = item.SelectSingleNode(".//div/a"); //Remove CR, LF and TAB nodeTitleCheck = nodeTitle.InnerText.Clean(); } //Check for duplicate by title if (possibleBooks.Any(bk => bk.title.Contains(nodeTitleCheck))) { continue; } var cleanAuthor = item.SelectSingleNode(".//div/div").InnerText.Clean(); //Exclude the current book title from other books search Match match = Regex.Match(nodeTitleCheck, curBook.title, RegexOptions.IgnoreCase); if (match.Success) { continue; } match = Regex.Match(nodeTitleCheck, @"(Series|Reading) Order|Checklist|Edition|eSpecial|\([0-9]+ Book Series\)", RegexOptions.IgnoreCase); if (match.Success) { continue; } possibleBooks.Add(new BookInfo(nodeTitleCheck, cleanAuthor, item.SelectSingleNode(".//div")?.GetAttributeValue("data-asin", null)) { amazonUrl = nodeUrl }); } var bookBag = new ConcurrentBag <BookInfo>(); await possibleBooks.ParallelForEachAsync(async book => { if (book == null) { return; } // TODO: Make a separate function for this, duplicate here and AuthorProfile try { //Gather book desc, image url, etc, if using new format if (_settings.UseNewVersion) { await book.GetAmazonInfo(book.amazonUrl); } bookBag.Add(book); } catch (Exception ex) { Logger.Log($"Error: {ex}\r\n{book.amazonUrl}"); } }); custAlsoBought.AddRange(bookBag); } //Add sponsored related, if they exist... HtmlNode otherItems = bookHtmlDoc.DocumentNode.SelectSingleNode("//div[@id='view_to_purchase-sims-feature']"); if (otherItems != null) { recList = otherItems.SelectNodes(".//li[@class='a-spacing-medium p13n-sc-list-item']"); if (recList != null) { string sponsTitle, sponsAsin = "", sponsUrl = ""; var possibleBooks = new List <BookInfo>(); // TODO: This entire foreach is pretty much the exact same as the one above... foreach (HtmlNode result in recList.Where(result => result != null)) { HtmlNode otherBook = result.SelectSingleNode(".//div[@class='a-fixed-left-grid-col a-col-left']/a"); if (otherBook == null) { continue; } Match match = Regex.Match(otherBook.GetAttributeValue("href", ""), "dp/(B[A-Z0-9]{9})"); if (!match.Success) { match = Regex.Match(otherBook.GetAttributeValue("href", ""), "gp/product/(B[A-Z0-9]{9})"); } if (match.Success) { sponsAsin = match.Groups[1].Value; sponsUrl = String.Format("https://www.amazon.{1}/dp/{0}", sponsAsin, _settings.AmazonTld); } otherBook = otherBook.SelectSingleNode(".//img"); match = Regex.Match(otherBook.GetAttributeValue("alt", ""), @"(Series|Reading) Order|Checklist|Edition|eSpecial|\([0-9]+ Book Series\)", RegexOptions.IgnoreCase); if (match.Success) { continue; } sponsTitle = otherBook.GetAttributeValue("alt", ""); //Check for duplicate by title if (custAlsoBought.Any(bk => bk.title.Contains(sponsTitle)) || possibleBooks.Any(bk => bk.title.Contains(sponsTitle))) { continue; } otherBook = result.SelectSingleNode(".//a[@class='a-size-small a-link-child']") ?? result.SelectSingleNode(".//span[@class='a-size-small a-color-base']") ?? throw new DataSource.FormatChangedException("Amazon", "Sponsored book author"); // TODO: Throw more format changed exceptions to make it obvious that the site changed var sponsAuthor = otherBook.InnerText.Trim(); possibleBooks.Add(new BookInfo(sponsTitle, sponsAuthor, sponsAsin) { amazonUrl = sponsUrl }); } var bookBag = new ConcurrentBag <BookInfo>(); await possibleBooks.ParallelForEachAsync(async book => { //Gather book desc, image url, etc, if using new format try { if (_settings.UseNewVersion) { await book.GetAmazonInfo(book.amazonUrl); } bookBag.Add(book); } catch (Exception ex) { Logger.Log($"Error: {ex.Message}\r\n{book.amazonUrl}"); } }); custAlsoBought.AddRange(bookBag); } } } catch (Exception ex) { Logger.Log("An error occurred parsing the book's amazon page: " + ex.Message + ex.StackTrace); return(false); } SetPaths(); return(true); }
public static string GetPageHtml(string url) { HttpDownloader http = new HttpDownloader(url); return http.GetPage(); }
private void btnSearchShelfari_Click(object sender, EventArgs e) { if (!File.Exists(txtMobi.Text)) { MessageBox.Show("Specified book was not found.", "Book Not Found"); return; } if (!File.Exists(settings.mobi_unpack)) { MessageBox.Show("Kindleunpack was not found. Please review the settings page.", "Kindleunpack Not Found"); return; } if (!Directory.Exists(settings.outDir)) { MessageBox.Show("Specified output directory does not exist. Please review the settings page.", "Output Directory Not found"); return; } //Create temp dir and ensure it exists string randomFile = Functions.GetTempDirectory(); if (!Directory.Exists(randomFile)) { MessageBox.Show("Temporary path not accessible for some reason.", "Temporary Directory Error"); return; } //0 = asin, 1 = uniqid, 2 = databasename, 3 = rawML, 4 = author, 5 = title //this.TopMost = true; List <string> results; if (settings.useKindleUnpack) { Log("Running Kindleunpack to get metadata..."); results = Functions.GetMetaData(txtMobi.Text, settings.outDir, randomFile, settings.mobi_unpack); } else { Log("Extracting metadata..."); try { results = Functions.GetMetaDataInternal(txtMobi.Text, settings.outDir, false).getResults(); } catch (Exception ex) { Log("Error getting metadata: " + ex.Message); return; } } if (results.Count != 6) { Log(results[0]); return; } // Added author name to log output Log(String.Format("Got metadata!\r\nDatabase Name: {0}\r\nASIN: {1}\r\nAuthor: {2}\r\nTitle: {3}\r\nUniqueID: {4}", results[2], results[0], results[4], results[5], results[1])); //Get Shelfari Search URL Log("Searching for book on Shelfari..."); string shelfariSearchUrlBase = @"http://www.shelfari.com/search/books?Author={0}&Title={1}&Binding={2}"; string[] bindingTypes = { "Hardcover", "Kindle", "Paperback" }; // Search book on Shelfari bool bookFound = false; string shelfariBookUrl = ""; results[4] = Functions.FixAuthor(results[4]); try { HtmlAgilityPack.HtmlDocument shelfariHtmlDoc = new HtmlAgilityPack.HtmlDocument(); for (int j = 0; j <= 1; j++) { for (int i = 0; i < bindingTypes.Length; i++) { Log("Searching for " + bindingTypes[i] + " edition..."); // Insert parameters (mainly for searching with removed diacritics). Seems to work fine without replacing spaces? shelfariHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(String.Format(shelfariSearchUrlBase, results[4], results[5], bindingTypes[i]))); if (!shelfariHtmlDoc.DocumentNode.InnerText.Contains("Your search did not return any results")) { shelfariBookUrl = FindShelfariURL(shelfariHtmlDoc, results[4], results[5]); if (shelfariBookUrl != "") { bookFound = true; if (Properties.Settings.Default.saveHtml) { try { Log("Saving book's Shelfari webpage..."); shelfariHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(shelfariBookUrl)); File.WriteAllText(Environment.CurrentDirectory + String.Format(@"\dmp\{0}.shelfaripageHtml.txt", results[0]), shelfariHtmlDoc.DocumentNode.InnerHtml); } catch (Exception ex) { Log(String.Format("An error ocurred saving shelfaripageHtml.txt: {0}", ex.Message)); } } break; } } if (!bookFound) { Log("Unable to find a " + bindingTypes[i] + " edition of this book on Shelfari!"); } } if (bookFound) { break; } // Attempt to remove diacritics (accented characters) from author & title for searching string newAuthor = results[4].RemoveDiacritics(); string newTitle = results[5].RemoveDiacritics(); if (!results[4].Equals(newAuthor) || !results[5].Equals(newTitle)) { results[4] = newAuthor; results[5] = newTitle; Log("Accented characters detected. Attempting to search without them."); } } } catch (Exception ex) { Log("Error: " + ex.Message); return; } if (bookFound) { Log("Book found on Shelfari!"); Log(results[5] + " by " + results[4]); txtShelfari.Text = shelfariBookUrl; txtShelfari.Refresh(); Log(String.Format("Shelfari URL updated: {0}\r\nYou may want to visit the URL to ensure it is correct and add/modify terms if necessary.", shelfariBookUrl)); } else { Log("Unable to find this book on Shelfari! You may have to search manually."); } try { Directory.Delete(randomFile, true); } catch (Exception) { Log("An error occurred while trying to delete temporary files.\r\nTry deleting these files manually."); } }
public async Task Populate(string inputFile) { string input; using (StreamReader streamReader = new StreamReader(inputFile, Encoding.UTF8)) input = streamReader.ReadToEnd(); ilauthorRecs.Images.Clear(); lvAuthorRecs.Items.Clear(); ilcustomersWhoBoughtRecs.Images.Clear(); lvCustomersWhoBoughtRecs.Items.Clear(); JObject ea = JObject.Parse(input); var tempData = ea["data"]["nextBook"]; if (tempData != null) { lblNextTitle.Text = tempData["title"].ToString(); lblNextAuthor.Text = tempData["authors"][0].ToString(); string imageUrl = tempData["imageUrl"]?.ToString(); if (!string.IsNullOrEmpty(imageUrl)) { pbNextCover.Image = Functions.MakeGrayscale3(await HttpDownloader.GetImage(imageUrl)); } } else { pbNextCover.Visible = false; lblNextTitle.Visible = false; lblNextAuthor.Visible = false; lblNotInSeries.Visible = true; } tempData = ea["data"]["authorRecs"]["recommendations"]; if (tempData != null) { foreach (var rec in tempData) { string imageUrl = rec["imageUrl"]?.ToString(); if (!string.IsNullOrEmpty(imageUrl)) { ilauthorRecs.Images.Add(Functions.MakeGrayscale3(await HttpDownloader.GetImage(imageUrl))); } } ListViewItem_SetSpacing(lvAuthorRecs, 60 + 7, 90 + 7); for (int i = 0; i < ilauthorRecs.Images.Count; i++) { ListViewItem item = new ListViewItem { ImageIndex = i }; lvAuthorRecs.Items.Add(item); } } tempData = ea["data"]["customersWhoBoughtRecs"]["recommendations"]; if (tempData != null) { foreach (var rec in tempData) { var imageUrl = rec["imageUrl"]?.ToString(); if (!string.IsNullOrEmpty(imageUrl)) { ilcustomersWhoBoughtRecs.Images.Add(Functions.MakeGrayscale3(await HttpDownloader.GetImage(imageUrl))); } } ListViewItem_SetSpacing(lvCustomersWhoBoughtRecs, 60 + 7, 90 + 7); for (int i = 0; i < ilcustomersWhoBoughtRecs.Images.Count; i++) { var item = new ListViewItem { ImageIndex = i }; lvCustomersWhoBoughtRecs.Items.Add(item); } } }
public static Task <string> GetPageHtmlAsync(string url, CancellationToken cancellationToken = default) { var http = new HttpDownloader(url); return(Task.Run(async() => await http.GetPageAsync(cancellationToken).ConfigureAwait(false), cancellationToken)); }