private BookInfo GetNextInSeries() { BookInfo nextBook = null; if (curBook.shelfariUrl == "") { return(null); } // Get title of next book HtmlAgilityPack.HtmlDocument searchHtmlDoc = new HtmlAgilityPack.HtmlDocument(); searchHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(curBook.shelfariUrl)); string nextTitle = GetNextInSeriesTitle(searchHtmlDoc); // If search failed, try other method //if (nextTitle == "") // nextTitle = GetNextInSeriesTitle2(searchHtmlDoc); if (nextTitle != "") { // Search author's other books for the book (assumes next in series was written by the same author...) // Returns the first one found, though there should probably not be more than 1 of the same name anyway nextBook = authorProfile.otherBooks.FirstOrDefault(bk => bk.title == nextTitle); if (nextBook == null) { // Attempt to search Amazon for the book instead nextBook = Functions.AmazonSearchBook(nextTitle, curBook.author); if (nextBook != null) { nextBook.GetAmazonInfo(nextBook.amazonUrl); //fill in desc, imageurl, and ratings } } // Try to fill in desc, imageurl, and ratings using Shelfari Kindle edition link instead if (nextBook == null) { HtmlDocument bookDoc = new HtmlDocument() { OptionAutoCloseOnEnd = true }; bookDoc.LoadHtml(HttpDownloader.GetPageHtml(nextShelfariUrl)); Match match = Regex.Match(bookDoc.DocumentNode.InnerHtml, "('B[A-Z0-9]{9}')"); if (match.Success) { string cleanASIN = match.Value.Replace("'", String.Empty); nextBook = new BookInfo(nextTitle, curBook.author, cleanASIN); nextBook.GetAmazonInfo("http://www.amazon.com/dp/" + cleanASIN); } } if (nextBook == null) { main.Log("Book was found to be part of a series, but next book could not be found.\r\n" + "Please report this book and the Shelfari URL and output log to improve parsing."); } } else if (curBook.seriesPosition != curBook.totalInSeries) { main.Log("Unable to find next book in series, the book may not be part of a series, or it is the latest release."); } if (previousTitle != "") { if (curBook.previousInSeries == null) { // Attempt to search Amazon for the book curBook.previousInSeries = Functions.AmazonSearchBook(previousTitle, curBook.author); if (curBook.previousInSeries != null) { curBook.previousInSeries.GetAmazonInfo(curBook.previousInSeries.amazonUrl); //fill in desc, imageurl, and ratings } // Try to fill in desc, imageurl, and ratings using Shelfari Kindle edition link instead if (curBook.previousInSeries == null) { HtmlDocument bookDoc = new HtmlDocument() { OptionAutoCloseOnEnd = true }; bookDoc.LoadHtml(HttpDownloader.GetPageHtml(previousShelfariUrl)); Match match = Regex.Match(bookDoc.DocumentNode.InnerHtml, "('B[A-Z0-9]{9}')"); if (match.Success) { string cleanASIN = match.Value.Replace("'", String.Empty); curBook.previousInSeries = new BookInfo(previousTitle, curBook.author, cleanASIN); curBook.previousInSeries.GetAmazonInfo("http://www.amazon.com/dp/" + cleanASIN); } } } else { main.Log("Book was found to be part of a series, but previous book could not be found.\r\n" + "Please report this book and the Shelfari URL and output log to improve parsing."); } } return(nextBook); }
public bool complete = false; //Set if constructor succeeds in gathering data //Requires an already-built AuthorProfile and the BaseEndActions.txt file public EndActions(AuthorProfile ap, BookInfo book, long erl, frmMain frm) { authorProfile = ap; curBook = book; _erl = erl; main = frm; main.Log("Attempting to find book on Amazon..."); //Generate Book search URL from book's ASIN string ebookLocation = @"http://www.amazon.com/dp/" + book.asin; // Search Amazon for book main.Log("Book found on Amazon!"); main.Log(String.Format("Book's Amazon page URL: {0}", ebookLocation)); HtmlDocument bookHtmlDoc = new HtmlDocument {OptionAutoCloseOnEnd = true}; try { bookHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(ebookLocation)); } catch (Exception ex) { main.Log(String.Format("An error ocurred while downloading book's Amazon page: {0}\r\nYour ASIN may not be correct.", ex.Message)); return; } if (Properties.Settings.Default.saveHtml) { try { main.Log("Saving book's Amazon webpage..."); File.WriteAllText(Environment.CurrentDirectory + String.Format(@"\dmp\{0}.bookpageHtml.txt", curBook.asin), bookHtmlDoc.DocumentNode.InnerHtml); } catch (Exception ex) { main.Log(String.Format("An error ocurred saving bookpageHtml.txt: {0}", ex.Message)); } } try { curBook.GetAmazonInfo(bookHtmlDoc); } catch (Exception ex) { main.Log(String.Format("An error ocurred parsing Amazon info: {0}", ex.Message)); return; } main.Log("Gathering recommended book info..."); //Parse Recommended Author titles and ASINs try { HtmlNodeCollection recList = bookHtmlDoc.DocumentNode.SelectNodes("//li[@class='a-carousel-card a-float-left']"); if (recList == null) main.Log("Could not find related book list page on Amazon.\r\nUnable to create End Actions."); if (recList != null) foreach (HtmlNode item in recList.Where(item => item != null)) { HtmlNode nodeTitle = item.SelectSingleNode(".//div/a"); string nodeTitleCheck = nodeTitle.GetAttributeValue("title", ""); string nodeUrl = nodeTitle.GetAttributeValue("href", ""); string cleanAuthor = ""; if (nodeUrl != "") nodeUrl = "http://www.amazon.com" + nodeUrl; if (nodeTitleCheck == "") { nodeTitle = item.SelectSingleNode(".//div/a"); //Remove CR, LF and TAB nodeTitleCheck = nodeTitle.InnerText.CleanString(); } cleanAuthor = item.SelectSingleNode(".//div/div").InnerText.CleanString(); Match match = Regex.Match(nodeTitleCheck, @"Series Reading Order|Edition|eSpecial|\([0-9]+ Book Series\)", RegexOptions.IgnoreCase); if (match.Success) { nodeTitleCheck = ""; continue; } BookInfo newBook = new BookInfo(nodeTitleCheck, cleanAuthor, item.SelectSingleNode(".//div").GetAttributeValue("data-asin", "")); try { //Gather book desc, image url, etc, if using new format if (settings.useNewVersion) newBook.GetAmazonInfo(nodeUrl); custAlsoBought.Add(newBook); } catch (Exception ex) { main.Log(String.Format("{0}\r\n{1}\r\nContinuing anyway...", ex.Message, nodeUrl)); } } } catch (Exception ex) { main.Log("An error occurred parsing the book's amazon page: " + ex.Message); return; } SetPaths(); complete = true; }
public bool complete = false; //Set if constructor succeeds in gathering data //Requires an already-built AuthorProfile and the BaseEndActions.txt file public EndActions(AuthorProfile ap, BookInfo book, long erl, frmMain frm) { authorProfile = ap; curBook = book; _erl = erl; main = frm; main.Log("Attempting to find book on Amazon..."); //Generate Book search URL from book's ASIN string ebookLocation = @"http://www.amazon.com/dp/" + book.asin; // Search Amazon for book main.Log("Book found on Amazon!"); main.Log(String.Format("Book's Amazon page URL: {0}", ebookLocation)); HtmlDocument bookHtmlDoc = new HtmlDocument { OptionAutoCloseOnEnd = true }; try { bookHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(ebookLocation)); } catch (Exception ex) { main.Log(String.Format("An error ocurred while downloading book's Amazon page: {0}\r\nYour ASIN may not be correct.", ex.Message)); return; } if (Properties.Settings.Default.saveHtml) { try { main.Log("Saving book's Amazon webpage..."); File.WriteAllText(Environment.CurrentDirectory + String.Format(@"\dmp\{0}.bookpageHtml.txt", curBook.asin), bookHtmlDoc.DocumentNode.InnerHtml); } catch (Exception ex) { main.Log(String.Format("An error ocurred saving bookpageHtml.txt: {0}", ex.Message)); } } try { curBook.GetAmazonInfo(bookHtmlDoc); } catch (Exception ex) { main.Log(String.Format("An error ocurred parsing Amazon info: {0}", ex.Message)); return; } main.Log("Gathering recommended book info..."); //Parse Recommended Author titles and ASINs try { HtmlNodeCollection recList = bookHtmlDoc.DocumentNode.SelectNodes("//li[@class='a-carousel-card a-float-left']"); if (recList == null) { main.Log("Could not find related book list page on Amazon.\r\nUnable to create End Actions."); } if (recList != null) { foreach (HtmlNode item in recList.Where(item => item != null)) { HtmlNode nodeTitle = item.SelectSingleNode(".//div/a"); string nodeTitleCheck = nodeTitle.GetAttributeValue("title", ""); string nodeUrl = nodeTitle.GetAttributeValue("href", ""); string cleanAuthor = ""; if (nodeUrl != "") { nodeUrl = "http://www.amazon.com" + nodeUrl; } if (nodeTitleCheck == "") { nodeTitle = item.SelectSingleNode(".//div/a"); //Remove CR, LF and TAB nodeTitleCheck = nodeTitle.InnerText.CleanString(); } cleanAuthor = item.SelectSingleNode(".//div/div").InnerText.CleanString(); Match match = Regex.Match(nodeTitleCheck, @"Series Reading Order|Edition|eSpecial|\([0-9]+ Book Series\)", RegexOptions.IgnoreCase); if (match.Success) { nodeTitleCheck = ""; continue; } BookInfo newBook = new BookInfo(nodeTitleCheck, cleanAuthor, item.SelectSingleNode(".//div").GetAttributeValue("data-asin", "")); try { //Gather book desc, image url, etc, if using new format if (settings.useNewVersion) { newBook.GetAmazonInfo(nodeUrl); } custAlsoBought.Add(newBook); } catch (Exception ex) { main.Log(String.Format("{0}\r\n{1}\r\nContinuing anyway...", ex.Message, nodeUrl)); } } } } catch (Exception ex) { main.Log("An error occurred parsing the book's amazon page: " + ex.Message); return; } SetPaths(); complete = true; }
private BookInfo GetNextInSeries() { BookInfo nextBook = null; if (curBook.shelfariUrl == "") return null; // Get title of next book HtmlAgilityPack.HtmlDocument searchHtmlDoc = new HtmlAgilityPack.HtmlDocument(); searchHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(curBook.shelfariUrl)); string nextTitle = GetNextInSeriesTitle(searchHtmlDoc); // If search failed, try other method //if (nextTitle == "") // nextTitle = GetNextInSeriesTitle2(searchHtmlDoc); if (nextTitle != "") { // Search author's other books for the book (assumes next in series was written by the same author...) // Returns the first one found, though there should probably not be more than 1 of the same name anyway nextBook = authorProfile.otherBooks.FirstOrDefault(bk => bk.title == nextTitle); if (nextBook == null) { // Attempt to search Amazon for the book instead nextBook = Functions.AmazonSearchBook(nextTitle, curBook.author); if (nextBook != null) nextBook.GetAmazonInfo(nextBook.amazonUrl); //fill in desc, imageurl, and ratings } // Try to fill in desc, imageurl, and ratings using Shelfari Kindle edition link instead if (nextBook == null) { HtmlDocument bookDoc = new HtmlDocument() { OptionAutoCloseOnEnd = true }; bookDoc.LoadHtml(HttpDownloader.GetPageHtml(nextShelfariUrl)); Match match = Regex.Match(bookDoc.DocumentNode.InnerHtml, "('B[A-Z0-9]{9}')"); if (match.Success) { string cleanASIN = match.Value.Replace("'", String.Empty); nextBook = new BookInfo(nextTitle, curBook.author, cleanASIN); nextBook.GetAmazonInfo("http://www.amazon.com/dp/" + cleanASIN); } } if (nextBook == null) main.Log("Book was found to be part of a series, but next book could not be found.\r\n" + "Please report this book and the Shelfari URL and output log to improve parsing."); } else if (curBook.seriesPosition != curBook.totalInSeries) main.Log("Unable to find next book in series, the book may not be part of a series, or it is the latest release."); if (previousTitle != "") { if (curBook.previousInSeries == null) { // Attempt to search Amazon for the book curBook.previousInSeries = Functions.AmazonSearchBook(previousTitle, curBook.author); if (curBook.previousInSeries != null) curBook.previousInSeries.GetAmazonInfo(curBook.previousInSeries.amazonUrl); //fill in desc, imageurl, and ratings // Try to fill in desc, imageurl, and ratings using Shelfari Kindle edition link instead if (curBook.previousInSeries == null) { HtmlDocument bookDoc = new HtmlDocument() {OptionAutoCloseOnEnd = true}; bookDoc.LoadHtml(HttpDownloader.GetPageHtml(previousShelfariUrl)); Match match = Regex.Match(bookDoc.DocumentNode.InnerHtml, "('B[A-Z0-9]{9}')"); if (match.Success) { string cleanASIN = match.Value.Replace("'", String.Empty); curBook.previousInSeries = new BookInfo(previousTitle, curBook.author, cleanASIN); curBook.previousInSeries.GetAmazonInfo("http://www.amazon.com/dp/" + cleanASIN); } } } else main.Log("Book was found to be part of a series, but previous book could not be found.\r\n" + "Please report this book and the Shelfari URL and output log to improve parsing."); } return nextBook; }