Exemplo n.º 1
0
        /// <summary>
        /// As of 2018-07-31, format changed. For some amount of time, keep both just in case.
        /// TODO: Switch to Kindle section and grab only those instead
        /// </summary>
        public static List <BookInfo> GetAuthorBooksNew(AuthorSearchResults searchResults, string curTitle, string curAuthor, string TLD)
        {
            var resultsNodes = searchResults.authorHtmlDoc.DocumentNode.SelectNodes("//div[@id='searchWidget']/div");

            if (resultsNodes == null)
            {
                return(null);
            }
            var bookList = new List <BookInfo>(resultsNodes.Count);

            foreach (var result in resultsNodes)
            {
                if (result.InnerHtml.Contains("a-pagination"))
                {
                    continue;
                }
                var bookNodes = result.SelectNodes(".//div[@class='a-fixed-right-grid-inner']/div/div")
                                ?? throw new DataSource.FormatChangedException(nameof(Amazon), "book results - title nodes");
                var name = bookNodes.FirstOrDefault()?.SelectSingleNode("./a")?.InnerText.Trim()
                           ?? throw new DataSource.FormatChangedException(nameof(Amazon), "book results - title");
                //Exclude the current book title
                if (name.ContainsIgnorecase(curTitle) ||
                    name.ContainsIgnorecase(@"(Series|Reading) Order|Checklist|Edition|eSpecial|\([0-9]+ Book Series\)"))
                {
                    continue;
                }

                // Get Kindle ASIN
                var asin = "";
                foreach (var bookNode in bookNodes)
                {
                    var match = Regex.Match(bookNode.OuterHtml, "(dp/(?<asin>B[A-Z0-9]{9})/|/gp/product/(?<asin>B[A-Z0-9]{9}))", RegexOptions.Compiled);
                    if (!match.Success)
                    {
                        continue;
                    }
                    asin = match.Groups["asin"].Value;
                    break;
                }

                // TODO: This should be removable when the Kindle Only page is parsed instead
                if (asin == "")
                {
                    continue; //throw new DataSource.FormatChangedException(nameof(Amazon), "book results - kindle edition asin");
                }
                bookList.Add(new BookInfo(name, curAuthor, asin)
                {
                    amazonUrl = $"https://www.amazon.{TLD}/dp/{asin}"
                });
            }
            return(bookList);
        }
Exemplo n.º 2
0
        // TODO: Review this...
        public async Task <bool> Generate()
        {
            string outputDir;

            try
            {
                if (_settings.Android)
                {
                    outputDir = _settings.OutDir + @"\Android\" + _curBook.asin;
                    Directory.CreateDirectory(outputDir);
                }
                else
                {
                    outputDir = _settings.UseSubDirectories ? Functions.GetBookOutputDirectory(_curBook.author, _curBook.sidecarName, true) : _settings.OutDir;
                }
            }
            catch (Exception ex)
            {
                Logger.Log("An error occurred creating output directory: " + ex.Message + "\r\nFiles will be placed in the default output directory.");
                outputDir = _settings.OutDir;
            }
            string ApPath = outputDir + @"\AuthorProfile.profile." + _curBook.asin + ".asc";

            if (!Properties.Settings.Default.overwrite && File.Exists(ApPath))
            {
                Logger.Log("AuthorProfile file already exists... Skipping!\r\n" +
                           "Please review the settings page if you want to overwite any existing files.");
                return(false);
            }

            DataSources.AuthorSearchResults searchResults = null;
            // Attempt to download from the alternate site, if present. If it fails in some way, try .com
            // If the .com search crashes, it will crash back to the caller in frmMain
            try
            {
                searchResults = await DataSources.Amazon.SearchAuthor(_curBook, _settings.AmazonTld);
            }
            catch (Exception ex)
            {
                Logger.Log("Error searching Amazon." + _settings.AmazonTld + ": " + ex.Message + "\r\n" + ex.StackTrace);
            }
            finally
            {
                if (searchResults == null)
                {
                    Logger.Log(String.Format("Failed to find {0} on Amazon." + _settings.AmazonTld, _curBook.author));
                    if (_settings.AmazonTld != "com")
                    {
                        Logger.Log("Trying again with Amazon.com.");
                        _settings.AmazonTld = "com";
                        searchResults       = await DataSources.Amazon.SearchAuthor(_curBook, _settings.AmazonTld);
                    }
                }
            }
            if (searchResults == null)
            {
                return(false);                       // Already logged error in search function
            }
            authorAsin = searchResults.authorAsin;

            if (Properties.Settings.Default.saveHtml)
            {
                try
                {
                    Logger.Log("Saving author's Amazon webpage...");
                    File.WriteAllText(Environment.CurrentDirectory + String.Format(@"\dmp\{0}.authorpageHtml.txt", _curBook.asin),
                                      searchResults.authorHtmlDoc.DocumentNode.InnerHtml);
                }
                catch (Exception ex)
                {
                    Logger.Log(String.Format("An error occurred saving authorpageHtml.txt: {0}", ex.Message));
                }
            }

            // Try to find author's biography
            string bioFile = Environment.CurrentDirectory + @"\ext\" + authorAsin + ".bio";

            if (_settings.SaveBio && File.Exists(bioFile))
            {
                if (!readBio(bioFile))
                {
                    return(false);
                }
            }
            if (BioTrimmed == "")
            {
                // TODO: Let users edit bio in same style as chapters and aliases
                HtmlNode bio = DataSources.Amazon.GetBioNode(searchResults, _settings.AmazonTld);
                //Trim authour biography to less than 1000 characters and/or replace more problematic characters.
                if (bio?.InnerText.Trim().Length > 0)
                {
                    if (bio.InnerText.Length > 1000)
                    {
                        int lastPunc  = bio.InnerText.LastIndexOfAny(new [] { '.', '!', '?' });
                        int lastSpace = bio.InnerText.LastIndexOf(' ');
                        if (lastPunc > lastSpace)
                        {
                            BioTrimmed = bio.InnerText.Substring(0, lastPunc + 1);
                        }
                        else
                        {
                            BioTrimmed = bio.InnerText.Substring(0, lastSpace) + '\u2026';
                        }
                    }
                    else
                    {
                        BioTrimmed = bio.InnerText;
                    }
                    BioTrimmed = BioTrimmed.Clean();
                    Logger.Log("Author biography found on Amazon!");
                }
            }
            else
            {
                File.WriteAllText(bioFile, String.Empty);
                if (System.Windows.Forms.DialogResult.Yes ==
                    System.Windows.Forms.MessageBox.Show(
                        "No author biography found on Amazon!\r\nWould you like to create a biography?", "Biography",
                        System.Windows.Forms.MessageBoxButtons.YesNo, System.Windows.Forms.MessageBoxIcon.Question,
                        System.Windows.Forms.MessageBoxDefaultButton.Button2))
                {
                    Functions.RunNotepad(bioFile);
                    if (!readBio(bioFile))
                    {
                        return(false);
                    }
                }
                else
                {
                    BioTrimmed = "No author biography found on Amazon!";
                    Logger.Log("An error occurred finding the author biography on Amazon.");
                }
            }
            if (_settings.SaveBio)
            {
                if (!File.Exists(bioFile))
                {
                    try
                    {
                        Logger.Log("Saving biography to " + bioFile);
                        using (var streamWriter = new StreamWriter(bioFile, false, System.Text.Encoding.UTF8))
                        {
                            streamWriter.Write(BioTrimmed);
                        }
                    }
                    catch (Exception ex)
                    {
                        Logger.Log("An error occurred while writing biography.\r\n" + ex.Message + "\r\n" + ex.StackTrace);
                        return(false);
                    }
                }
                if (System.Windows.Forms.DialogResult.Yes == System.Windows.Forms.MessageBox.Show("Would you like to open the biography file in notepad for editing?", "Biography",
                                                                                                  System.Windows.Forms.MessageBoxButtons.YesNo, System.Windows.Forms.MessageBoxIcon.Question, System.Windows.Forms.MessageBoxDefaultButton.Button2))
                {
                    Functions.RunNotepad(bioFile);
                    if (!readBio(bioFile))
                    {
                        return(false);
                    }
                }
            }
            // Try to download Author image
            HtmlNode imageXpath = DataSources.Amazon.GetAuthorImageNode(searchResults, _settings.AmazonTld);

            authorImageUrl = Regex.Replace(imageXpath.GetAttributeValue("src", ""), @"_.*?_\.", string.Empty);

            // cleanup to match retail file image links
            if (authorImageUrl.Contains(@"https://images-na.ssl-images-amazon"))
            {
                authorImageUrl = authorImageUrl.Replace(@"https://images-na.ssl-images-amazon", @"http://ecx.images-amazon");
            }

            _curBook.authorImageUrl = authorImageUrl;

            Bitmap ApAuthorImage;

            try
            {
                Logger.Log("Downloading author image...");
                ApAuthorImage = await HttpDownloader.GetImage(authorImageUrl);

                Logger.Log("Grayscale base64-encoded author image created!");
            }
            catch (Exception ex)
            {
                Logger.Log(String.Format("An error occurred downloading the author image: {0}", ex.Message));
                return(false);
            }

            Logger.Log("Gathering author's other books...");
            var bookList = DataSources.Amazon.GetAuthorBooks(searchResults, _curBook.title, _curBook.author, _settings.AmazonTld)
                           ?? DataSources.Amazon.GetAuthorBooksNew(searchResults, _curBook.title, _curBook.author, _settings.AmazonTld);

            if (bookList != null)
            {
                Logger.Log("Gathering metadata for other books...");
                var bookBag = new ConcurrentBag <BookInfo>();
                await bookList.ParallelForEachAsync(async book =>
                {
                    // TODO: retry a couple times if one fails maybe
                    try
                    {
                        //Gather book desc, image url, etc, if using new format
                        if (_settings.UseNewVersion)
                        {
                            await book.GetAmazonInfo(book.amazonUrl);
                        }
                        bookBag.Add(book);
                    }
                    catch (Exception ex)
                    {
                        Logger.Log(String.Format("An error occurred gathering metadata for other books: {0}\r\nURL: {1}\r\nBook: {2}", ex.Message, book.amazonUrl, book.title));
                        throw;
                    }
                });

                otherBooks.AddRange(bookBag);
            }
            else
            {
                Logger.Log("Unable to find other books by this author. If there should be some, check the Amazon URL to ensure it is correct.");
            }

            Logger.Log("Writing Author Profile to file...");

            var authorOtherBooks = otherBooks.Select(book => new Model.AuthorProfile.Book
            {
                E     = 1,
                Asin  = book.asin,
                Title = book.title
            }).ToArray();

            var ap = new Model.AuthorProfile
            {
                Asin         = _curBook.asin,
                CreationDate = Functions.UnixTimestampSeconds(),
                OtherBooks   = authorOtherBooks,
                Authors      = new []
                {
                    new Model.AuthorProfile.Author
                    {
                        Asin           = authorAsin,
                        Bio            = BioTrimmed,
                        ImageHeight    = ApAuthorImage.Height,
                        Name           = _curBook.author,
                        OtherBookAsins = otherBooks.Select(book => book.asin).ToArray(),
                        Picture        = Functions.ImageToBase64(ApAuthorImage, ImageFormat.Jpeg)
                    }
                }
            };

            string authorProfileOutput = JsonConvert.SerializeObject(ap);

            try
            {
                File.WriteAllText(ApPath, authorProfileOutput);
                Logger.Log("Author Profile file created successfully!\r\nSaved to " + ApPath);
            }
            catch (Exception ex)
            {
                Logger.Log("An error occurred while writing the Author Profile file: " + ex.Message + "\r\n" + ex.StackTrace);
                return(false);
            }

            ApTitle    = "About " + _curBook.author;
            ApSubTitle = "Kindle Books By " + _curBook.author;
            EaSubTitle = "More Books By " + _curBook.author;
            return(true);
        }
Exemplo n.º 3
0
        public static async Task <AuthorSearchResults> SearchAuthor(BookInfo curBook, string TLD)
        {
            AuthorSearchResults results = new AuthorSearchResults();
            //Generate Author search URL from author's name
            string newAuthor      = Functions.FixAuthor(curBook.author);
            string plusAuthorName = newAuthor.Replace(" ", "+");
            //Updated to match Search "all" Amazon
            string amazonAuthorSearchUrl = $"https://www.amazon.{TLD}/s/ref=nb_sb_noss_2?url=search-alias%3Dstripbooks&field-keywords={plusAuthorName}";

            Logger.Log($"Searching for author's page on Amazon.{TLD}...");

            // Search Amazon for Author
            results.authorHtmlDoc = new HtmlDocument {
                OptionAutoCloseOnEnd = true
            };
            results.authorHtmlDoc.LoadHtml(await HttpDownloader.GetPageHtmlAsync(amazonAuthorSearchUrl));

            if (Properties.Settings.Default.saveHtml)
            {
                try
                {
                    Logger.Log("Saving Amazon's author search webpage...");
                    File.WriteAllText(Environment.CurrentDirectory + $"\\dmp\\{curBook.asin}.authorsearchHtml.txt",
                                      results.authorHtmlDoc.DocumentNode.InnerHtml);
                }
                catch (Exception ex)
                {
                    Logger.Log(String.Format("An error ocurred saving authorsearchHtml.txt: {0}", ex.Message));
                }
            }

            // Check for captcha
            // TODO: Try to prompt for captcha and have user complete it to continue
            if (results.authorHtmlDoc.DocumentNode.InnerText.Contains("Robot Check"))
            {
                Logger.Log($"Warning: Amazon.{TLD} is requesting a captcha."
                           + $"You can try visiting Amazon.{TLD} in a real browser first, try another region, or try again later.");
            }
            // Try to find Author's page from Amazon search
            HtmlNode node = results.authorHtmlDoc.DocumentNode.SelectSingleNode("//*[@id='result_1']");

            if (node == null || !node.OuterHtml.Contains("/e/B"))
            {
                Logger.Log($"An error occurred finding author's page on Amazon.{TLD}." +
                           "\r\nUnable to create Author Profile." +
                           "\r\nEnsure the author metadata field matches the author's name exactly." +
                           $"\r\nSearch results can be viewed at {amazonAuthorSearchUrl}");
                return(null);
            }

            string properAuthor = "";

            // Check for typical search results, second item is the author page
            if ((node = node.SelectSingleNode("//*[@id='result_1']/div/div/div/div/a")) != null)
            {
                properAuthor       = node.GetAttributeValue("href", "");
                results.authorAsin = node.GetAttributeValue("data-asin", null)
                                     ?? AsinFromUrl(properAuthor);
            }
            // otherwise check for "by so-and-so" text beneath the titles for a possible match
            else if ((node = results.authorHtmlDoc.DocumentNode.SelectSingleNode($"//div[@id='resultsCol']//li[@class='s-result-item celwidget  ']//a[text()=\"{newAuthor}\"]")) != null)
            {
                properAuthor       = node.GetAttributeValue("href", "");
                results.authorAsin = AsinFromUrl(properAuthor);
            }

            if (string.IsNullOrEmpty(properAuthor) || properAuthor.IndexOf('/', 1) < 3 || results.authorAsin == "")
            {
                Logger.Log("Unable to parse author's page URL properly. Try again later or report this URL on the MobileRead thread: " + amazonAuthorSearchUrl);
                return(null);
            }
            properAuthor = properAuthor.Substring(1, properAuthor.IndexOf('/', 1) - 1);
            string authorAmazonWebsiteLocationLog = @"https://www.amazon." + TLD + "/" + properAuthor + "/e/" + results.authorAsin;
            string authorAmazonWebsiteLocation    = @"https://www.amazon." + TLD + "/" + properAuthor + "/e/" + results.authorAsin +
                                                    "/ref=la_" + results.authorAsin +
                                                    "_rf_p_n_feature_browse-b_2?fst=as%3Aoff&rh=n%3A283155%2Cp_82%3A" +
                                                    results.authorAsin +
                                                    "%2Cp_n_feature_browse-bin%3A618073011&bbn=283155&ie=UTF8&qid=1432378570&rnid=618072011";

            curBook.authorAsin = results.authorAsin;
            Logger.Log($"Author page found on Amazon!\r\nAuthor's Amazon Page URL: {authorAmazonWebsiteLocationLog}");

            // Load Author's Amazon page
            string authorpageHtml;

            try
            {
                authorpageHtml = await HttpDownloader.GetPageHtmlAsync(authorAmazonWebsiteLocation);
            }
            catch
            {
                // If page not found (on co.uk at least, the long form does not seem to work) fallback to short form
                // and pray the formatting/item display suits our needs. If short form not found, crash back to caller.
                authorpageHtml = await HttpDownloader.GetPageHtmlAsync(authorAmazonWebsiteLocationLog);
            }
            results.authorHtmlDoc.LoadHtml(authorpageHtml);
            return(results);
        }
Exemplo n.º 4
0
        public static List <BookInfo> GetAuthorBooks(AuthorSearchResults searchResults, string curTitle, string curAuthor, string TLD)
        {
            HtmlNodeCollection resultsNodes = searchResults.authorHtmlDoc.DocumentNode.SelectNodes("//div[@id='mainResults']/ul/li");

            if (resultsNodes == null)
            {
                return(null);
            }
            List <BookInfo> bookList = new List <BookInfo>(resultsNodes.Count);

            foreach (HtmlNode result in resultsNodes)
            {
                if (!result.Id.StartsWith("result_"))
                {
                    continue;
                }
                string   asin      = "";
                HtmlNode otherBook = result.SelectSingleNode(".//div[@class='a-row a-spacing-small']/a/h2");
                if (otherBook == null)
                {
                    continue;
                }
                //Exclude the current book title from other books search
                if (Regex.Match(otherBook.InnerText, curTitle, RegexOptions.IgnoreCase).Success ||
                    Regex.Match(otherBook.InnerText, @"(Series|Reading) Order|Checklist|Edition|eSpecial|\([0-9]+ Book Series\)", RegexOptions.IgnoreCase).Success)
                {
                    continue;
                }
                var name = otherBook.InnerText.Trim();
                otherBook = result.SelectSingleNode(".//*[@title='Kindle Edition']");
                Match match = Regex.Match(otherBook.OuterHtml, "dp/(B[A-Z0-9]{9})/");
                if (match.Success)
                {
                    asin = match.Groups[1].Value;
                }
                var url = $"https://www.amazon.{TLD}/dp/{asin}";
                if (name != "" && url != "" && asin != "")
                {
                    BookInfo newBook = new BookInfo(name, curAuthor, asin)
                    {
                        amazonUrl = url
                    };
                    bookList.Add(newBook);
                }
            }
            // If no kindle books returned, try the top carousel
            if (bookList.Count == 0)
            {
                resultsNodes = searchResults.authorHtmlDoc.DocumentNode.SelectNodes("//ol[@class='a-carousel' and @role ='list']/li");
                if (resultsNodes == null)
                {
                    return(null);
                }
                foreach (HtmlNode result in resultsNodes)
                {
                    string   asin      = "";
                    HtmlNode otherBook = result.SelectSingleNode(".//a/img");
                    if (otherBook == null)
                    {
                        continue;
                    }
                    var name = otherBook.GetAttributeValue("alt", "");
                    //Exclude the current book title from other books search
                    if (Regex.Match(name, curTitle, RegexOptions.IgnoreCase).Success ||
                        Regex.Match(name, @"(Series|Reading) Order|Checklist|Edition|eSpecial|\([0-9]+ Book Series\)", RegexOptions.IgnoreCase).Success)
                    {
                        continue;
                    }
                    otherBook = result.SelectSingleNode(".//a");
                    if (otherBook == null)
                    {
                        continue;
                    }
                    Match match = Regex.Match(otherBook.OuterHtml, "dp/(B[A-Z0-9]{9})/");
                    if (match.Success)
                    {
                        asin = match.Groups[1].Value;
                    }
                    var url = $"https://www.amazon.{TLD}/dp/{asin}";
                    if (name != "" && url != "" && asin != "")
                    {
                        BookInfo newBook = new BookInfo(name, curAuthor, asin)
                        {
                            amazonUrl = url
                        };
                        bookList.Add(newBook);
                    }
                }
            }
            return(bookList);
        }
Exemplo n.º 5
0
 public static HtmlNode GetAuthorImageNode(AuthorSearchResults searchResults, string TLD)
 {
     return(searchResults.authorHtmlDoc.DocumentNode.SelectSingleNode("//div[@id='ap-image']/img")
            ?? searchResults.authorHtmlDoc.DocumentNode.SelectSingleNode("//div[@id='authorImage']/img")
            ?? throw new DataSource.FormatChangedException(nameof(Amazon), "author image"));
 }
Exemplo n.º 6
0
 // Get biography from results page; TLD included in case different Amazon sites have different formatting
 public static HtmlNode GetBioNode(AuthorSearchResults searchResults, string TLD)
 {
     return(searchResults.authorHtmlDoc.DocumentNode.SelectSingleNode("//div[@id='ap-bio' and @class='a-row']/div/div/span")
            ?? searchResults.authorHtmlDoc.DocumentNode.SelectSingleNode("//span[@id='author_biography']")
            ?? throw new DataSource.FormatChangedException(nameof(Amazon), "author bio"));
 }