/// <summary> /// Retrieves the book's description, image URL, and rating from the book's Amazon URL. /// </summary> /// <param name="amazonUrl">Book's Amazon URL</param> public void GetAmazonInfo(string amazonUrl) { if (amazonUrl == "") { return; } HtmlDocument bookDoc = new HtmlDocument() { OptionAutoCloseOnEnd = true }; bookDoc.LoadHtml(HttpDownloader.GetPageHtml(amazonUrl)); GetAmazonInfo(bookDoc); }
public static BookInfo AmazonSearchBook(string title, string author) { BookInfo result = null; string authorTrim = ""; Regex regex = new Regex(@"( [A-Z]\.)", RegexOptions.Compiled); Match match = Regex.Match(author, @"( [A-Z]\.)", RegexOptions.Compiled); if (match.Success) { foreach (Match m in regex.Matches(author)) { authorTrim = author.Replace(m.Value, m.Value.Trim()); } } else { authorTrim = author; } if (title.IndexOf(" (") >= 0) { title = title.Substring(0, title.IndexOf(" (")); } string searchUrl = @"http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Ddigital-text&field-keywords=" + Uri.EscapeDataString(title + " " + authorTrim + " kindle edition"); HAP.HtmlDocument searchDoc = new HAP.HtmlDocument(); searchDoc.LoadHtml(HttpDownloader.GetPageHtml(searchUrl)); HAP.HtmlNode node = searchDoc.DocumentNode.SelectSingleNode("//li[@id='result_0']"); //At least attempt to verify it might be the same book? //Ignore case of title if (node != null && node.InnerText.IndexOf(title, StringComparison.OrdinalIgnoreCase) >= 0) { string foundASIN = node.GetAttributeValue("data-asin", ""); node = node.SelectSingleNode(".//div/div/div/div[@class='a-fixed-left-grid-col a-col-right']/div/a"); if (node != null) { result = new BookInfo(node.InnerText, author, foundASIN); result.amazonUrl = node.GetAttributeValue("href", ""); // Grab the true link for good measure } } return(result); }
/// <summary> /// Search Shelfari for series info, scrape series page, and return next title in series. /// </summary> /// <param name="searchHtmlDoc">Book's Shelfari page, pre-downloaded</param> private string GetNextInSeriesTitle2(HtmlAgilityPack.HtmlDocument searchHtmlDoc) { bool hasSeries = false; string series = ""; string seriesShort = ""; string seriesURL = ""; int currentSeriesIndex = 0; int currentSeriesCount = 0; string nextTitle = ""; //Check if book's Shelfari page contains series info HtmlAgilityPack.HtmlNode node = searchHtmlDoc.DocumentNode.SelectSingleNode("//span[@class='series']"); if (node != null) { //Series name and book number series = node.InnerText.Trim(); //Convert book number string to integer Int32.TryParse(series.Substring(series.LastIndexOf(" ") + 1), out currentSeriesIndex); //Parse series Shelfari URL seriesURL = node.SelectSingleNode("//span[@class='series']/a[@href]") .GetAttributeValue("href", ""); seriesShort = node.FirstChild.InnerText.Trim(); //Add series name and book number to log, if found searchHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(String.Format(seriesURL))); //Parse number of books in series and convert to integer node = searchHtmlDoc.DocumentNode.SelectSingleNode("//h2[@class='f_m']"); string test = node.FirstChild.InnerText.Trim(); Match match = Regex.Match(test, @"\d+"); if (match.Success) { Int32.TryParse(match.Value, out currentSeriesCount); } hasSeries = true; //Check if there is a next book if (currentSeriesIndex < currentSeriesCount) { //Add series name and book number to log, if found main.Log(String.Format("This is book {0} of {1} in the {2} Series...", currentSeriesIndex, currentSeriesCount, seriesShort)); foreach (HtmlAgilityPack.HtmlNode seriesItem in searchHtmlDoc.DocumentNode.SelectNodes(".//ol/li")) { node = seriesItem.SelectSingleNode(".//div/span[@class='series bold']"); if (node != null) { if (node.InnerText.Contains((currentSeriesIndex + 1).ToString())) { node = seriesItem.SelectSingleNode(".//h3/a"); //Parse title of the next book nextTitle = node.InnerText.Trim(); //Add next book in series to log, if found main.Log(String.Format("The next book in this series is {0}!", nextTitle)); return(nextTitle); } } } } if (hasSeries) { return(""); } } return(""); }
private BookInfo GetNextInSeries() { BookInfo nextBook = null; if (curBook.shelfariUrl == "") { return(null); } // Get title of next book HtmlAgilityPack.HtmlDocument searchHtmlDoc = new HtmlAgilityPack.HtmlDocument(); searchHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(curBook.shelfariUrl)); string nextTitle = GetNextInSeriesTitle(searchHtmlDoc); // If search failed, try other method //if (nextTitle == "") // nextTitle = GetNextInSeriesTitle2(searchHtmlDoc); if (nextTitle != "") { // Search author's other books for the book (assumes next in series was written by the same author...) // Returns the first one found, though there should probably not be more than 1 of the same name anyway nextBook = authorProfile.otherBooks.FirstOrDefault(bk => bk.title == nextTitle); if (nextBook == null) { // Attempt to search Amazon for the book instead nextBook = Functions.AmazonSearchBook(nextTitle, curBook.author); if (nextBook != null) { nextBook.GetAmazonInfo(nextBook.amazonUrl); //fill in desc, imageurl, and ratings } } // Try to fill in desc, imageurl, and ratings using Shelfari Kindle edition link instead if (nextBook == null) { HtmlDocument bookDoc = new HtmlDocument() { OptionAutoCloseOnEnd = true }; bookDoc.LoadHtml(HttpDownloader.GetPageHtml(nextShelfariUrl)); Match match = Regex.Match(bookDoc.DocumentNode.InnerHtml, "('B[A-Z0-9]{9}')"); if (match.Success) { string cleanASIN = match.Value.Replace("'", String.Empty); nextBook = new BookInfo(nextTitle, curBook.author, cleanASIN); nextBook.GetAmazonInfo("http://www.amazon.com/dp/" + cleanASIN); } } if (nextBook == null) { main.Log("Book was found to be part of a series, but next book could not be found.\r\n" + "Please report this book and the Shelfari URL and output log to improve parsing."); } } else if (curBook.seriesPosition != curBook.totalInSeries) { main.Log("Unable to find next book in series, the book may not be part of a series, or it is the latest release."); } if (previousTitle != "") { if (curBook.previousInSeries == null) { // Attempt to search Amazon for the book curBook.previousInSeries = Functions.AmazonSearchBook(previousTitle, curBook.author); if (curBook.previousInSeries != null) { curBook.previousInSeries.GetAmazonInfo(curBook.previousInSeries.amazonUrl); //fill in desc, imageurl, and ratings } // Try to fill in desc, imageurl, and ratings using Shelfari Kindle edition link instead if (curBook.previousInSeries == null) { HtmlDocument bookDoc = new HtmlDocument() { OptionAutoCloseOnEnd = true }; bookDoc.LoadHtml(HttpDownloader.GetPageHtml(previousShelfariUrl)); Match match = Regex.Match(bookDoc.DocumentNode.InnerHtml, "('B[A-Z0-9]{9}')"); if (match.Success) { string cleanASIN = match.Value.Replace("'", String.Empty); curBook.previousInSeries = new BookInfo(previousTitle, curBook.author, cleanASIN); curBook.previousInSeries.GetAmazonInfo("http://www.amazon.com/dp/" + cleanASIN); } } } else { main.Log("Book was found to be part of a series, but previous book could not be found.\r\n" + "Please report this book and the Shelfari URL and output log to improve parsing."); } } return(nextBook); }
public bool complete = false; //Set if constructor succeeds in gathering data //Requires an already-built AuthorProfile and the BaseEndActions.txt file public EndActions(AuthorProfile ap, BookInfo book, long erl, frmMain frm) { authorProfile = ap; curBook = book; _erl = erl; main = frm; main.Log("Attempting to find book on Amazon..."); //Generate Book search URL from book's ASIN string ebookLocation = @"http://www.amazon.com/dp/" + book.asin; // Search Amazon for book main.Log("Book found on Amazon!"); main.Log(String.Format("Book's Amazon page URL: {0}", ebookLocation)); HtmlDocument bookHtmlDoc = new HtmlDocument { OptionAutoCloseOnEnd = true }; try { bookHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(ebookLocation)); } catch (Exception ex) { main.Log(String.Format("An error ocurred while downloading book's Amazon page: {0}\r\nYour ASIN may not be correct.", ex.Message)); return; } if (Properties.Settings.Default.saveHtml) { try { main.Log("Saving book's Amazon webpage..."); File.WriteAllText(Environment.CurrentDirectory + String.Format(@"\dmp\{0}.bookpageHtml.txt", curBook.asin), bookHtmlDoc.DocumentNode.InnerHtml); } catch (Exception ex) { main.Log(String.Format("An error ocurred saving bookpageHtml.txt: {0}", ex.Message)); } } try { curBook.GetAmazonInfo(bookHtmlDoc); } catch (Exception ex) { main.Log(String.Format("An error ocurred parsing Amazon info: {0}", ex.Message)); return; } main.Log("Gathering recommended book info..."); //Parse Recommended Author titles and ASINs try { HtmlNodeCollection recList = bookHtmlDoc.DocumentNode.SelectNodes("//li[@class='a-carousel-card a-float-left']"); if (recList == null) { main.Log("Could not find related book list page on Amazon.\r\nUnable to create End Actions."); } if (recList != null) { foreach (HtmlNode item in recList.Where(item => item != null)) { HtmlNode nodeTitle = item.SelectSingleNode(".//div/a"); string nodeTitleCheck = nodeTitle.GetAttributeValue("title", ""); string nodeUrl = nodeTitle.GetAttributeValue("href", ""); string cleanAuthor = ""; if (nodeUrl != "") { nodeUrl = "http://www.amazon.com" + nodeUrl; } if (nodeTitleCheck == "") { nodeTitle = item.SelectSingleNode(".//div/a"); //Remove CR, LF and TAB nodeTitleCheck = nodeTitle.InnerText.CleanString(); } cleanAuthor = item.SelectSingleNode(".//div/div").InnerText.CleanString(); Match match = Regex.Match(nodeTitleCheck, @"Series Reading Order|Edition|eSpecial|\([0-9]+ Book Series\)", RegexOptions.IgnoreCase); if (match.Success) { nodeTitleCheck = ""; continue; } BookInfo newBook = new BookInfo(nodeTitleCheck, cleanAuthor, item.SelectSingleNode(".//div").GetAttributeValue("data-asin", "")); try { //Gather book desc, image url, etc, if using new format if (settings.useNewVersion) { newBook.GetAmazonInfo(nodeUrl); } custAlsoBought.Add(newBook); } catch (Exception ex) { main.Log(String.Format("{0}\r\n{1}\r\nContinuing anyway...", ex.Message, nodeUrl)); } } } } catch (Exception ex) { main.Log("An error occurred parsing the book's amazon page: " + ex.Message); return; } SetPaths(); complete = true; }
public bool complete = false; //Set if constructor succeeded in generating profile public AuthorProfile(BookInfo nBook, frmMain frm) { this.curBook = nBook; this.main = frm; string outputDir; try { if (settings.android) { outputDir = settings.outDir + @"\Android\" + curBook.asin; Directory.CreateDirectory(outputDir); } else { outputDir = settings.useSubDirectories ? Functions.GetBookOutputDirectory(curBook.author, curBook.sidecarName) : settings.outDir; } } catch (Exception ex) { main.Log("Failed to create output directory: " + ex.Message + "\r\nFiles will be placed in the default output directory."); outputDir = settings.outDir; } ApPath = outputDir + @"\AuthorProfile.profile." + curBook.asin + ".asc"; if (!Properties.Settings.Default.overwrite && File.Exists(ApPath)) { main.Log("AuthorProfile file already exists... Skipping!\r\n" + "Please review the settings page if you want to overwite any existing files."); return; } //Process GUID. If in decimal form, convert to hex. if (Regex.IsMatch(curBook.guid, "/[a-zA-Z]/")) { curBook.guid = curBook.guid.ToUpper(); } else { long guidDec; long.TryParse(curBook.guid, out guidDec); curBook.guid = guidDec.ToString("X"); } if (curBook.guid == "0") { main.Log("Something bad happened while converting the GUID."); return; } //Generate Author search URL from author's name string newAuthor = Functions.FixAuthor(curBook.author); string plusAuthorName = newAuthor.Replace(" ", "+"); string amazonAuthorSearchUrl = @"http://www.amazon.com/s/?url=search-alias%3Dstripbooks&field-keywords=" + plusAuthorName; main.Log("Searching for author's page on Amazon..."); // Search Amazon for Author HtmlDocument authorHtmlDoc = new HtmlDocument { OptionAutoCloseOnEnd = true }; string authorsearchHtml = HttpDownloader.GetPageHtml(amazonAuthorSearchUrl); authorHtmlDoc.LoadHtml(authorsearchHtml); if (Properties.Settings.Default.saveHtml) { try { main.Log("Saving Amazon's author search webpage..."); File.WriteAllText(Environment.CurrentDirectory + String.Format(@"\dmp\{0}.authorsearchHtml.txt", curBook.asin), authorHtmlDoc.DocumentNode.InnerHtml); } catch (Exception ex) { main.Log(String.Format("An error ocurred saving authorsearchHtml.txt: {0}", ex.Message)); } } // Try to find Author's page from Amazon search HtmlNode node = authorHtmlDoc.DocumentNode.SelectSingleNode("//*[@id='result_1']"); if (node == null || !node.OuterHtml.Contains("/e/B")) { main.Log("Could not find author's page on Amazon.\r\nUnable to create Author Profile.\r\nEnsure the author metadata field matches the author's name exactly.\r\nSearch results can be viewed at " + amazonAuthorSearchUrl); return; } authorAsin = node.OuterHtml; int index1 = authorAsin.IndexOf("data-asin"); if (index1 > 0) { authorAsin = authorAsin.Substring(index1 + 11, 10); } node = node.SelectSingleNode("//*[@id='result_1']/div/div/div/div/a"); string properAuthor = node.GetAttributeValue("href", "not found"); if (properAuthor == "not found" || properAuthor.IndexOf('/', 1) < 3) { main.Log("Found author's page, but could not parse URL properly. Report this URL on the MobileRead thread: " + amazonAuthorSearchUrl); return; } properAuthor = properAuthor.Substring(1, properAuthor.IndexOf('/', 1) - 1); string authorAmazonWebsiteLocationLog = @"http://www.amazon.com/" + properAuthor + "/e/" + authorAsin; string authorAmazonWebsiteLocation = @"http://www.amazon.com/" + properAuthor + "/e/" + authorAsin + "/ref=la_" + authorAsin + "_rf_p_n_feature_browse-b_2?fst=as%3Aoff&rh=n%3A283155%2Cp_82%3A" + authorAsin + "%2Cp_n_feature_browse-bin%3A618073011&bbn=283155&ie=UTF8&qid=1432378570&rnid=618072011"; main.Log("Author page found on Amazon!"); main.Log(String.Format("Author's Amazon Page URL: {0}", authorAmazonWebsiteLocationLog)); // Load Author's Amazon page string authorpageHtml = HttpDownloader.GetPageHtml(authorAmazonWebsiteLocation); authorHtmlDoc.LoadHtml(authorpageHtml); if (Properties.Settings.Default.saveHtml) { try { main.Log("Saving author's Amazon webpage..."); File.WriteAllText(Environment.CurrentDirectory + String.Format(@"\dmp\{0}.authorpageHtml.txt", curBook.asin), authorHtmlDoc.DocumentNode.InnerHtml); } catch (Exception ex) { main.Log(String.Format("An error ocurred saving authorpageHtml.txt: {0}", ex.Message)); } } // Try to find Author's Biography HtmlNode bio = authorHtmlDoc.DocumentNode.SelectSingleNode("//div[@id='ap-bio' and @class='a-row']/div/div/span"); //Trim authour biography to less than 1000 characters and/or replace more problematic characters. if (bio.InnerText.Trim().Length != 0) { if (bio.InnerText.Length > 1000) { int lastPunc = bio.InnerText.LastIndexOfAny(new char[] { '.', '!', '?' }); int lastSpace = bio.InnerText.LastIndexOf(' '); if (lastPunc > lastSpace) { BioTrimmed = bio.InnerText.Substring(0, lastPunc + 1); } else { BioTrimmed = bio.InnerText.Substring(0, lastSpace) + '\u2026'; } } else { BioTrimmed = bio.InnerText; } BioTrimmed = Functions.CleanString(BioTrimmed); main.Log("Author biography found on Amazon!"); } else { BioTrimmed = "No author biography found on Amazon!"; main.Log("No author biography found on Amazon!"); } // Try to download Author image HtmlNode imageXpath = authorHtmlDoc.DocumentNode.SelectSingleNode("//div[@id='ap-image']/img"); authorImageUrl = imageXpath.GetAttributeValue("src", ""); string downloadedAuthorImage = curBook.path + @"\DownloadedAuthorImage.jpg"; try { using (WebClient webClient = new WebClient()) { webClient.DownloadFile(new Uri(authorImageUrl), downloadedAuthorImage); main.Log("Downloading author image..."); } } catch (Exception ex) { main.Log(String.Format("Failed to download author image: {0}", ex.Message)); return; } main.Log("Resizing and cropping Author image..."); //Resize and Crop Author image Bitmap o = (Bitmap)Image.FromFile(downloadedAuthorImage); Bitmap nb = new Bitmap(o, o.Width, o.Height); int sourceWidth = o.Width; int sourceHeight = o.Height; float nPercent; float nPercentW = (185 / (float)sourceWidth); float nPercentH = (278 / (float)sourceHeight); nPercent = nPercentH > nPercentW ? nPercentH : nPercentW; int destWidth = (int)(sourceWidth * nPercent); int destHeight = (int)(sourceHeight * nPercent); Bitmap b = new Bitmap(destWidth, destHeight); Graphics g = Graphics.FromImage(b); g.InterpolationMode = InterpolationMode.HighQualityBicubic; g.SmoothingMode = SmoothingMode.HighQuality; g.PixelOffsetMode = PixelOffsetMode.HighQuality; g.CompositingQuality = CompositingQuality.HighQuality; g.CompositingMode = CompositingMode.SourceOver; ImageAttributes ia = new ImageAttributes(); ia.SetWrapMode(WrapMode.TileFlipXY); g.DrawImage(nb, 0, 0, destWidth, destHeight); b.Save(curBook.path + @"\ResizedAuthorImage.jpg"); b.Dispose(); g.Dispose(); o.Dispose(); nb.Dispose(); Bitmap target = new Bitmap(185, destHeight); Rectangle cropRect = new Rectangle(((destWidth - 185) / 2), 0, 185, destHeight); using (g = Graphics.FromImage(target)) { g.DrawImage(Image.FromFile(curBook.path + @"\ResizedAuthorImage.jpg"), new Rectangle(0, 0, target.Width, target.Height), cropRect, GraphicsUnit.Pixel); } target.Save(curBook.path + @"\CroppedAuthorImage.jpg"); target.Dispose(); Bitmap bc = new Bitmap(curBook.path + @"\CroppedAuthorImage.jpg"); //Convert Author image to Grayscale and save as jpeg Bitmap bgs = Functions.MakeGrayscale3(bc); ImageCodecInfo[] availableCodecs = ImageCodecInfo.GetImageEncoders(); ImageCodecInfo jpgCodec = availableCodecs.FirstOrDefault(codec => codec.MimeType == "image/jpeg"); if (jpgCodec == null) { throw new NotSupportedException("Encoder for JPEG not found."); } EncoderParameters encoderParams = new EncoderParameters(1); encoderParams.Param[0] = new EncoderParameter(Encoder.ColorDepth, 8L); bgs.Save(curBook.path + @"\FinalImage.jpg", jpgCodec, encoderParams); int authorImageHeight = bgs.Height; bc.Dispose(); //Convert final grayscale Author image to Base64 Format String string base64ImageString = Functions.ImageToBase64(bgs, ImageFormat.Jpeg); main.Log("Grayscale Base-64 encoded author image created!"); bgs.Dispose(); main.Log("Gathering author's other books..."); List <BookInfo> bookList = new List <BookInfo>(); HtmlNodeCollection resultsNodes = authorHtmlDoc.DocumentNode.SelectNodes("//div[@id='mainResults']/ul/li"); foreach (HtmlNode result in resultsNodes) { if (!result.Id.StartsWith("result_")) { continue; } string name, url, asin = ""; HtmlNode otherBook = result.SelectSingleNode(".//div[@class='a-row a-spacing-small']/a/h2"); Match match = Regex.Match(otherBook.InnerText, @"Series Reading Order|Edition|eSpecial|\([0-9]+ Book Series\)", RegexOptions.IgnoreCase); if (match.Success) { continue; } name = otherBook.InnerText; otherBook = result.SelectSingleNode(".//*[@title='Kindle Edition']"); match = Regex.Match(otherBook.OuterHtml, "dp/(B[A-Z0-9]{9})/"); if (match.Success) { asin = match.Groups[1].Value; } //url = otherBook.GetAttributeValue("href", ""); //url = otherBook.GetAttributeValue("href", ""). // Substring(0, otherBook.GetAttributeValue("href", ""). // IndexOf(match.Groups[1].Value) + // match.Groups[1].Length); url = String.Format("http://www.amazon.com/dp/{0}", asin); if (name != "" && url != "" && asin != "") { BookInfo newBook = new BookInfo(name, curBook.author, asin); newBook.amazonUrl = url; bookList.Add(newBook); } } main.Log("Gathering metadata for other books..."); foreach (BookInfo book in bookList) { try { //Gather book desc, image url, etc, if using new format if (settings.useNewVersion) { book.GetAmazonInfo(book.amazonUrl); } otherBooks.Add(book); } catch (Exception ex) { main.Log(String.Format("{0}\r\nURL: {1}\r\nBook: {2}\r\nContinuing anyway...", ex.Message, book.amazonUrl, book.title)); } } main.Log("Writing Author Profile to file..."); //Create list of Asin numbers and titles List <string> authorsOtherBookList = new List <string>(); foreach (BookInfo bk in otherBooks) { authorsOtherBookList.Add(String.Format(@"{{""e"":1,""a"":""{0}"",""t"":""{1}""}}", bk.asin, bk.title)); } //Create finalAuthorProfile.profile.ASIN.asc int unixTimestamp = (Int32)(DateTime.UtcNow.Subtract(new DateTime(1970, 1, 1))).TotalSeconds; try { string authorProfileOutput = @"{""u"":[{""y"":" + authorImageHeight + @",""l"":[""" + string.Join(@""",""", otherBooks.Select(book => book.asin).ToArray()) + @"""],""n"":""" + curBook.author + @""",""a"":""" + authorAsin + @""",""b"":""" + BioTrimmed + @""",""i"":""" + base64ImageString + @"""}],""a"":""" + String.Format(@"{0}"",""d"":{1},""o"":[", curBook.asin, unixTimestamp) + string.Join(",", authorsOtherBookList.ToArray()) + "]}"; File.WriteAllText(ApPath, authorProfileOutput); main.btnPreview.Enabled = true; main.cmsPreview.Items[0].Enabled = true; main.Log("Author Profile file created successfully!\r\nSaved to " + ApPath); } catch (Exception ex) { main.Log("An error occurred while writing the Author Profile file: " + ex.Message); return; } ApTitle = "About " + curBook.author; ApSubTitle = "Kindle Books By " + curBook.author; ApAuthorImage = Image.FromFile(curBook.path + @"\FinalImage.jpg"); EaSubTitle = "More Books By " + curBook.author; complete = true; }
private void btnSearchShelfari_Click(object sender, EventArgs e) { if (!File.Exists(txtMobi.Text)) { MessageBox.Show("Specified book was not found.", "Book Not Found"); return; } if (!File.Exists(settings.mobi_unpack)) { MessageBox.Show("Kindleunpack was not found. Please review the settings page.", "Kindleunpack Not Found"); return; } if (!Directory.Exists(settings.outDir)) { MessageBox.Show("Specified output directory does not exist. Please review the settings page.", "Output Directory Not found"); return; } //Create temp dir and ensure it exists string randomFile = Functions.GetTempDirectory(); if (!Directory.Exists(randomFile)) { MessageBox.Show("Temporary path not accessible for some reason.", "Temporary Directory Error"); return; } //0 = asin, 1 = uniqid, 2 = databasename, 3 = rawML, 4 = author, 5 = title //this.TopMost = true; List <string> results; if (settings.useKindleUnpack) { Log("Running Kindleunpack to get metadata..."); results = Functions.GetMetaData(txtMobi.Text, settings.outDir, randomFile, settings.mobi_unpack); } else { Log("Extracting metadata..."); try { results = Functions.GetMetaDataInternal(txtMobi.Text, settings.outDir, false).getResults(); } catch (Exception ex) { Log("Error getting metadata: " + ex.Message); return; } } if (results.Count != 6) { Log(results[0]); return; } // Added author name to log output Log(String.Format("Got metadata!\r\nDatabase Name: {0}\r\nASIN: {1}\r\nAuthor: {2}\r\nTitle: {3}\r\nUniqueID: {4}", results[2], results[0], results[4], results[5], results[1])); //Get Shelfari Search URL Log("Searching for book on Shelfari..."); string shelfariSearchUrlBase = @"http://www.shelfari.com/search/books?Author={0}&Title={1}&Binding={2}"; string[] bindingTypes = { "Hardcover", "Kindle", "Paperback" }; // Search book on Shelfari bool bookFound = false; string shelfariBookUrl = ""; results[4] = Functions.FixAuthor(results[4]); try { HtmlAgilityPack.HtmlDocument shelfariHtmlDoc = new HtmlAgilityPack.HtmlDocument(); for (int j = 0; j <= 1; j++) { for (int i = 0; i < bindingTypes.Length; i++) { Log("Searching for " + bindingTypes[i] + " edition..."); // Insert parameters (mainly for searching with removed diacritics). Seems to work fine without replacing spaces? shelfariHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(String.Format(shelfariSearchUrlBase, results[4], results[5], bindingTypes[i]))); if (!shelfariHtmlDoc.DocumentNode.InnerText.Contains("Your search did not return any results")) { shelfariBookUrl = FindShelfariURL(shelfariHtmlDoc, results[4], results[5]); if (shelfariBookUrl != "") { bookFound = true; if (Properties.Settings.Default.saveHtml) { try { Log("Saving book's Shelfari webpage..."); shelfariHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(shelfariBookUrl)); File.WriteAllText(Environment.CurrentDirectory + String.Format(@"\dmp\{0}.shelfaripageHtml.txt", results[0]), shelfariHtmlDoc.DocumentNode.InnerHtml); } catch (Exception ex) { Log(String.Format("An error ocurred saving shelfaripageHtml.txt: {0}", ex.Message)); } } break; } } if (!bookFound) { Log("Unable to find a " + bindingTypes[i] + " edition of this book on Shelfari!"); } } if (bookFound) { break; } // Attempt to remove diacritics (accented characters) from author & title for searching string newAuthor = results[4].RemoveDiacritics(); string newTitle = results[5].RemoveDiacritics(); if (!results[4].Equals(newAuthor) || !results[5].Equals(newTitle)) { results[4] = newAuthor; results[5] = newTitle; Log("Accented characters detected. Attempting to search without them."); } } } catch (Exception ex) { Log("Error: " + ex.Message); return; } if (bookFound) { Log("Book found on Shelfari!"); Log(results[5] + " by " + results[4]); txtShelfari.Text = shelfariBookUrl; txtShelfari.Refresh(); Log(String.Format("Shelfari URL updated: {0}\r\nYou may want to visit the URL to ensure it is correct and add/modify terms if necessary.", shelfariBookUrl)); } else { Log("Unable to find this book on Shelfari! You may have to search manually."); } try { Directory.Delete(randomFile, true); } catch (Exception) { Log("An error occurred while trying to delete temporary files.\r\nTry deleting these files manually."); } }