Exemplo n.º 1
0
        /// <summary>
        /// Retrieves the book's description, image URL, and rating from the book's Amazon URL.
        /// </summary>
        /// <param name="amazonUrl">Book's Amazon URL</param>
        public void GetAmazonInfo(string amazonUrl)
        {
            if (amazonUrl == "")
            {
                return;
            }
            HtmlDocument bookDoc = new HtmlDocument()
            {
                OptionAutoCloseOnEnd = true
            };

            bookDoc.LoadHtml(HttpDownloader.GetPageHtml(amazonUrl));
            GetAmazonInfo(bookDoc);
        }
Exemplo n.º 2
0
        public static BookInfo AmazonSearchBook(string title, string author)
        {
            BookInfo result = null;

            string authorTrim = "";

            Regex regex = new Regex(@"( [A-Z]\.)", RegexOptions.Compiled);
            Match match = Regex.Match(author, @"( [A-Z]\.)", RegexOptions.Compiled);

            if (match.Success)
            {
                foreach (Match m in regex.Matches(author))
                {
                    authorTrim = author.Replace(m.Value, m.Value.Trim());
                }
            }
            else
            {
                authorTrim = author;
            }
            if (title.IndexOf(" (") >= 0)
            {
                title = title.Substring(0, title.IndexOf(" ("));
            }

            string searchUrl = @"http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Ddigital-text&field-keywords=" +
                               Uri.EscapeDataString(title + " " + authorTrim + " kindle edition");

            HAP.HtmlDocument searchDoc = new HAP.HtmlDocument();
            searchDoc.LoadHtml(HttpDownloader.GetPageHtml(searchUrl));
            HAP.HtmlNode node = searchDoc.DocumentNode.SelectSingleNode("//li[@id='result_0']");
            //At least attempt to verify it might be the same book?
            //Ignore case of title
            if (node != null && node.InnerText.IndexOf(title, StringComparison.OrdinalIgnoreCase) >= 0)
            {
                string foundASIN = node.GetAttributeValue("data-asin", "");
                node = node.SelectSingleNode(".//div/div/div/div[@class='a-fixed-left-grid-col a-col-right']/div/a");
                if (node != null)
                {
                    result           = new BookInfo(node.InnerText, author, foundASIN);
                    result.amazonUrl = node.GetAttributeValue("href", ""); // Grab the true link for good measure
                }
            }

            return(result);
        }
Exemplo n.º 3
0
        public static string GetPageHtml(string url)
        {
            HttpDownloader http = new HttpDownloader(url);

            return(http.GetPage());
        }
Exemplo n.º 4
0
        public async Task Populate(string inputFile)
        {
            string input;

            using (StreamReader streamReader = new StreamReader(inputFile, Encoding.UTF8))
                input = streamReader.ReadToEnd();
            ilOtherBooks.Images.Clear();
            dgvOtherBooks.Rows.Clear();

            JObject sa       = JObject.Parse(input);
            var     tempData = sa["data"]["seriesPosition"];

            if (tempData != null)
            {
                string position = tempData["positionInSeries"].ToString();
                string total    = tempData["totalInSeries"].ToString();
                string name     = tempData["seriesName"].ToString();
                lblSeries.Text = $"This is book {position} of {total} in {name}";
                if (position == "1")
                {
                    pbPreviousCover.Visible    = false;
                    lblPreviousHeading.Visible = false;
                    lblPreviousTitle.Visible   = false;
                    lblSeries.Left             = 12;
                    lblSeries.Width            = 312;
                }
                else
                {
                    lblSeries.Left             = 80;
                    lblSeries.Width            = 244;
                    pbPreviousCover.Visible    = true;
                    lblPreviousHeading.Visible = true;
                    lblPreviousTitle.Visible   = true;
                }
            }
            else
            {
                lblSeries.Text             = "This book is not part of a series...";
                pbPreviousCover.Image      = Resources.missing_image;
                lblPreviousHeading.Visible = false;
                lblPreviousTitle.Visible   = false;
            }

            tempData = sa["data"]["popularHighlightsText"]?["localizedText"]?["en-US"];
            if (tempData != null)
            {
                Match popularHighlightsText = Regex.Match(tempData.ToString(),
                                                          @"((\d+) passages have been highlighted (\d+) times)");
                if (popularHighlightsText.Success)
                {
                    lblHighlights.Text = popularHighlightsText.Groups[1].Value;
                }
            }

            tempData = sa["data"]["bookDescription"];
            if (tempData != null)
            {
                lblTitle.Text       = tempData["title"].ToString();
                lblAuthor.Text      = tempData["authors"][0].ToString();
                titlePopup          = lblAuthor.Text;
                lblDescription.Text = tempData["description"].ToString();
                descriptionPopup    = lblDescription.Text;
                Match rating = Regex.Match(tempData["amazonRating"].ToString(), @"(\d+)");
                if (rating.Success)
                {
                    pbRating.Image = (Image)Resources.ResourceManager.GetObject($"STAR{rating.Groups[1].Value}");
                }
                lblVotes.Text = $"({tempData["numberOfReviews"]} votes)";
            }

            tempData = sa["data"]["authorBios"]?["authors"]?[0];
            if (tempData != null)
            {
                string imageUrl = tempData["imageUrl"]?.ToString() ?? "";
                if (imageUrl != "")
                {
                    pbAuthorImage.Image = Functions.MakeGrayscale3(await HttpDownloader.GetImage(imageUrl));
                }
                lblBiography.Text = tempData["bio"]?.ToString();
                biographyPopup    = lblBiography.Text;
            }

            tempData = sa["data"]["authorRecs"]?["recommendations"];
            if (tempData != null)
            {
                // TODO: Figure out why otherBooks is here but not used
                //var otherBooks = new List<Tuple<string, string, string, string>>();
                foreach (var rec in tempData)
                {
                    string imageUrl = rec["imageUrl"]?.ToString() ?? "";
                    string author   = rec["authors"][0].ToString();
                    string title    = rec["title"].ToString();
                    //otherBooks.Add(new Tuple<string, string, string, string>(rec["asin"].ToString(), title, author, imageUrl));
                    if (imageUrl != "")
                    {
                        ilOtherBooks.Images.Add(Functions.MakeGrayscale3(await HttpDownloader.GetImage(imageUrl)));
                    }
                    dgvOtherBooks.Rows.Add(ilOtherBooks.Images[ilOtherBooks.Images.Count - 1], $"{title}\n{author}");
                }
            }

            tempData = sa["data"]["readingTime"];
            if (tempData != null)
            {
                lblReadingTime.Text = $"{tempData["hours"]} hours and {tempData["minutes"]} minutes to read";
                tempData            = sa["data"]["readingPages"];
                if (tempData != null)
                {
                    lblReadingTime.Text = $"{lblReadingTime.Text} ({tempData["pagesInBook"]} pages)";
                }
            }

            tempData = sa["data"]["previousBookInTheSeries"];
            if (tempData != null)
            {
                lblPreviousTitle.Text = tempData["title"].ToString();
                string imageUrl = tempData["imageUrl"]?.ToString() ?? "";
                if (imageUrl != "")
                {
                    pbPreviousCover.Image = Functions.MakeGrayscale3(await HttpDownloader.GetImage(imageUrl));
                }
            }
        }
Exemplo n.º 5
0
        /// <summary>
        /// Search Shelfari for series info, scrape series page, and return next title in series.
        /// </summary>
        /// <param name="searchHtmlDoc">Book's Shelfari page, pre-downloaded</param>
        private string GetNextInSeriesTitle2(HtmlAgilityPack.HtmlDocument searchHtmlDoc)
        {
            bool   hasSeries          = false;
            string series             = "";
            string seriesShort        = "";
            string seriesURL          = "";
            int    currentSeriesIndex = 0;
            int    currentSeriesCount = 0;
            string nextTitle          = "";

            //Check if book's Shelfari page contains series info
            HtmlAgilityPack.HtmlNode node = searchHtmlDoc.DocumentNode.SelectSingleNode("//span[@class='series']");
            if (node != null)
            {
                //Series name and book number
                series = node.InnerText.Trim();
                //Convert book number string to integer
                Int32.TryParse(series.Substring(series.LastIndexOf(" ") + 1), out currentSeriesIndex);
                //Parse series Shelfari URL
                seriesURL = node.SelectSingleNode("//span[@class='series']/a[@href]")
                            .GetAttributeValue("href", "");
                seriesShort = node.FirstChild.InnerText.Trim();
                //Add series name and book number to log, if found
                searchHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(String.Format(seriesURL)));
                //Parse number of books in series and convert to integer
                node = searchHtmlDoc.DocumentNode.SelectSingleNode("//h2[@class='f_m']");
                string test  = node.FirstChild.InnerText.Trim();
                Match  match = Regex.Match(test, @"\d+");
                if (match.Success)
                {
                    Int32.TryParse(match.Value, out currentSeriesCount);
                }
                hasSeries = true;
                //Check if there is a next book
                if (currentSeriesIndex < currentSeriesCount)
                {
                    //Add series name and book number to log, if found
                    main.Log(String.Format("This is book {0} of {1} in the {2} Series...",
                                           currentSeriesIndex, currentSeriesCount, seriesShort));
                    foreach (HtmlAgilityPack.HtmlNode seriesItem in
                             searchHtmlDoc.DocumentNode.SelectNodes(".//ol/li"))
                    {
                        node = seriesItem.SelectSingleNode(".//div/span[@class='series bold']");
                        if (node != null)
                        {
                            if (node.InnerText.Contains((currentSeriesIndex + 1).ToString()))
                            {
                                node = seriesItem.SelectSingleNode(".//h3/a");
                                //Parse title of the next book
                                nextTitle = node.InnerText.Trim();
                                //Add next book in series to log, if found
                                main.Log(String.Format("The next book in this series is {0}!", nextTitle));
                                return(nextTitle);
                            }
                        }
                    }
                }
                if (hasSeries)
                {
                    return("");
                }
            }
            return("");
        }
Exemplo n.º 6
0
        private BookInfo GetNextInSeries()
        {
            BookInfo nextBook = null;

            if (curBook.shelfariUrl == "")
            {
                return(null);
            }

            // Get title of next book
            HtmlAgilityPack.HtmlDocument searchHtmlDoc = new HtmlAgilityPack.HtmlDocument();
            searchHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(curBook.shelfariUrl));
            string nextTitle = GetNextInSeriesTitle(searchHtmlDoc);

            // If search failed, try other method
            //if (nextTitle == "")
            //    nextTitle = GetNextInSeriesTitle2(searchHtmlDoc);
            if (nextTitle != "")
            {
                // Search author's other books for the book (assumes next in series was written by the same author...)
                // Returns the first one found, though there should probably not be more than 1 of the same name anyway
                nextBook = authorProfile.otherBooks.FirstOrDefault(bk => bk.title == nextTitle);
                if (nextBook == null)
                {
                    // Attempt to search Amazon for the book instead
                    nextBook = Functions.AmazonSearchBook(nextTitle, curBook.author);
                    if (nextBook != null)
                    {
                        nextBook.GetAmazonInfo(nextBook.amazonUrl); //fill in desc, imageurl, and ratings
                    }
                }
                // Try to fill in desc, imageurl, and ratings using Shelfari Kindle edition link instead
                if (nextBook == null)
                {
                    HtmlDocument bookDoc = new HtmlDocument()
                    {
                        OptionAutoCloseOnEnd = true
                    };
                    bookDoc.LoadHtml(HttpDownloader.GetPageHtml(nextShelfariUrl));
                    Match match = Regex.Match(bookDoc.DocumentNode.InnerHtml, "('B[A-Z0-9]{9}')");
                    if (match.Success)
                    {
                        string cleanASIN = match.Value.Replace("'", String.Empty);
                        nextBook = new BookInfo(nextTitle, curBook.author, cleanASIN);
                        nextBook.GetAmazonInfo("http://www.amazon.com/dp/" + cleanASIN);
                    }
                }
                if (nextBook == null)
                {
                    main.Log("Book was found to be part of a series, but next book could not be found.\r\n" +
                             "Please report this book and the Shelfari URL and output log to improve parsing.");
                }
            }
            else if (curBook.seriesPosition != curBook.totalInSeries)
            {
                main.Log("Unable to find next book in series, the book may not be part of a series, or it is the latest release.");
            }

            if (previousTitle != "")
            {
                if (curBook.previousInSeries == null)
                {
                    // Attempt to search Amazon for the book
                    curBook.previousInSeries = Functions.AmazonSearchBook(previousTitle, curBook.author);
                    if (curBook.previousInSeries != null)
                    {
                        curBook.previousInSeries.GetAmazonInfo(curBook.previousInSeries.amazonUrl); //fill in desc, imageurl, and ratings
                    }
                    // Try to fill in desc, imageurl, and ratings using Shelfari Kindle edition link instead
                    if (curBook.previousInSeries == null)
                    {
                        HtmlDocument bookDoc = new HtmlDocument()
                        {
                            OptionAutoCloseOnEnd = true
                        };
                        bookDoc.LoadHtml(HttpDownloader.GetPageHtml(previousShelfariUrl));
                        Match match = Regex.Match(bookDoc.DocumentNode.InnerHtml, "('B[A-Z0-9]{9}')");
                        if (match.Success)
                        {
                            string cleanASIN = match.Value.Replace("'", String.Empty);
                            curBook.previousInSeries = new BookInfo(previousTitle, curBook.author, cleanASIN);
                            curBook.previousInSeries.GetAmazonInfo("http://www.amazon.com/dp/" + cleanASIN);
                        }
                    }
                }
                else
                {
                    main.Log("Book was found to be part of a series, but previous book could not be found.\r\n" +
                             "Please report this book and the Shelfari URL and output log to improve parsing.");
                }
            }

            return(nextBook);
        }
Exemplo n.º 7
0
        public bool complete = false; //Set if constructor succeeds in gathering data

        //Requires an already-built AuthorProfile and the BaseEndActions.txt file
        public EndActions(AuthorProfile ap, BookInfo book, long erl, frmMain frm)
        {
            authorProfile = ap;
            curBook       = book;
            _erl          = erl;
            main          = frm;

            main.Log("Attempting to find book on Amazon...");
            //Generate Book search URL from book's ASIN
            string ebookLocation = @"http://www.amazon.com/dp/" + book.asin;

            // Search Amazon for book
            main.Log("Book found on Amazon!");
            main.Log(String.Format("Book's Amazon page URL: {0}", ebookLocation));

            HtmlDocument bookHtmlDoc = new HtmlDocument {
                OptionAutoCloseOnEnd = true
            };

            try
            {
                bookHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(ebookLocation));
            }
            catch (Exception ex)
            {
                main.Log(String.Format("An error ocurred while downloading book's Amazon page: {0}\r\nYour ASIN may not be correct.", ex.Message));
                return;
            }
            if (Properties.Settings.Default.saveHtml)
            {
                try
                {
                    main.Log("Saving book's Amazon webpage...");
                    File.WriteAllText(Environment.CurrentDirectory +
                                      String.Format(@"\dmp\{0}.bookpageHtml.txt", curBook.asin),
                                      bookHtmlDoc.DocumentNode.InnerHtml);
                }
                catch (Exception ex)
                {
                    main.Log(String.Format("An error ocurred saving bookpageHtml.txt: {0}", ex.Message));
                }
            }

            try
            {
                curBook.GetAmazonInfo(bookHtmlDoc);
            }
            catch (Exception ex)
            {
                main.Log(String.Format("An error ocurred parsing Amazon info: {0}", ex.Message));
                return;
            }

            main.Log("Gathering recommended book info...");
            //Parse Recommended Author titles and ASINs
            try
            {
                HtmlNodeCollection recList = bookHtmlDoc.DocumentNode.SelectNodes("//li[@class='a-carousel-card a-float-left']");
                if (recList == null)
                {
                    main.Log("Could not find related book list page on Amazon.\r\nUnable to create End Actions.");
                }
                if (recList != null)
                {
                    foreach (HtmlNode item in recList.Where(item => item != null))
                    {
                        HtmlNode nodeTitle      = item.SelectSingleNode(".//div/a");
                        string   nodeTitleCheck = nodeTitle.GetAttributeValue("title", "");
                        string   nodeUrl        = nodeTitle.GetAttributeValue("href", "");
                        string   cleanAuthor    = "";
                        if (nodeUrl != "")
                        {
                            nodeUrl = "http://www.amazon.com" + nodeUrl;
                        }
                        if (nodeTitleCheck == "")
                        {
                            nodeTitle = item.SelectSingleNode(".//div/a");
                            //Remove CR, LF and TAB
                            nodeTitleCheck = nodeTitle.InnerText.CleanString();
                        }
                        cleanAuthor = item.SelectSingleNode(".//div/div").InnerText.CleanString();
                        Match match = Regex.Match(nodeTitleCheck, @"Series Reading Order|Edition|eSpecial|\([0-9]+ Book Series\)", RegexOptions.IgnoreCase);
                        if (match.Success)
                        {
                            nodeTitleCheck = "";
                            continue;
                        }
                        BookInfo newBook = new BookInfo(nodeTitleCheck, cleanAuthor,
                                                        item.SelectSingleNode(".//div").GetAttributeValue("data-asin", ""));
                        try
                        {
                            //Gather book desc, image url, etc, if using new format
                            if (settings.useNewVersion)
                            {
                                newBook.GetAmazonInfo(nodeUrl);
                            }
                            custAlsoBought.Add(newBook);
                        }
                        catch (Exception ex)
                        {
                            main.Log(String.Format("{0}\r\n{1}\r\nContinuing anyway...", ex.Message, nodeUrl));
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                main.Log("An error occurred parsing the book's amazon page: " + ex.Message);
                return;
            }

            SetPaths();
            complete = true;
        }
Exemplo n.º 8
0
        public bool GetShelfari()
        {
            //Download HTML of Shelfari URL, try 3 times just in case it fails the first time
            main.Log(String.Format("Downloading Shelfari page... {0}", useSpoilers ? "SHOWING SPOILERS!" : ""));
            main.Log(String.Format("Shelfari URL: {0}", shelfariURL));
            var shelfariHtml = "";
            var tries = 3;
            do
            {
                try
                {
                    //Enable cookies
                    var jar = new CookieContainer();
                    var client = new HttpDownloader(shelfariURL, jar, "", "");

                    if (useSpoilers)
                    {
                        //Grab book ID from url (search for 5 digits between slashes) and create spoiler cookie
                        var bookId = Regex.Match(shelfariURL, @"\/\d{5}").Value.Substring(1, 5);
                        var spoilers = new Cookie("ShelfariBookWikiSession", "", "/", "www.shelfari.com")
                        {
                            Value = "{\"SpoilerShowAll\":true%2C\"SpoilerShowCharacters\":true%2C\"SpoilerBookId\":" +
                                    bookId +
                                    "%2C\"SpoilerShowPSS\":true%2C\"SpoilerShowQuotations\":true%2C\"SpoilerShowParents\":true%2C\"SpoilerShowThemes\":true}"
                        };
                        jar.Add(spoilers);
                    }
                    shelfariHtml = client.GetPage();
                    break;
                }
                catch
                {
                    if (tries <= 0)
                    {
                        main.Log("Failed to connect to Shelfari URL.");
                        return false;
                    }
                }
            }
            while (tries-- > 0);

            //Constants for wiki processing
            Dictionary<string, string> sections = new Dictionary<string, string>
            {
                {"WikiModule_Characters", "character"},
                {"WikiModule_Organizations", "topic"},
                {"WikiModule_Settings", "topic"},
                {"WikiModule_Glossary", "topic"}
            }; //, {"WikiModule_Themes", "topic"} };
            string[] patterns = {@""""};
            //, @"\[\d\]", @"\s*?\(.*\)\s*?" }; //Escape quotes, numbers in brackets, and anything within brackets at all
            string[] replacements = {@"\"""};

            //Parse elements from various headers listed in sections
            HtmlAgilityPack.HtmlDocument shelfariDoc = new HtmlAgilityPack.HtmlDocument();
            shelfariDoc.LoadHtml(shelfariHtml);
            foreach (string header in sections.Keys)
            {
                if (!shelfariHtml.Contains(header)) continue; //Skip section if not found on page
                //Select <li> nodes on page from within the <div id=header> tag, under <ul class=li_6>
                HtmlNodeCollection characterNodes =
                    shelfariDoc.DocumentNode.SelectNodes("//div[@id='" + header + "']//ul[@class='li_6']/li");
                foreach (HtmlNode li in characterNodes)
                {
                    string tmpString = li.InnerText;
                    Term newTerm = new Term(sections[header]); //Create term as either character/topic
                    if (tmpString.Contains(":"))
                    {
                        newTerm.TermName = tmpString.Substring(0, tmpString.IndexOf(":"));
                        newTerm.Desc = tmpString.Substring(tmpString.IndexOf(":") + 1).Replace("&amp;", "&").Trim();
                    }
                    else
                    {
                        newTerm.TermName = tmpString;
                    }
                    //newTerm.TermName = newTerm.TermName.PregReplace(patterns, replacements);
                    //newTerm.Desc = newTerm.Desc.PregReplace(patterns, replacements);
                    newTerm.DescSrc = "shelfari";
                    //Use either the associated shelfari URL of the term or if none exists, use the book's url
                    //Could use a wikipedia page instead as the xray plugin/site does but I decided not to
                    newTerm.DescUrl = (li.InnerHtml.IndexOf("<a href") == 0
                        ? li.InnerHtml.Substring(9, li.InnerHtml.IndexOf("\"", 9) - 9)
                        : shelfariURL);
                    if (header == "WikiModule_Glossary")
                        newTerm.MatchCase = false;
                    //Default glossary terms to be case insensitive when searching through book
                    if (Terms.Select<Term, string>(t => t.TermName).Contains<string>(newTerm.TermName))
                        main.Log("Duplicate term \"" + newTerm.TermName + "\" found. Ignoring this duplicate.");
                    else
                        Terms.Add(newTerm);
                }
            }

            // Scrape quotes to attempt matching in ExpandRawML
            if (Properties.Settings.Default.useNewVersion)
            {
                HtmlNodeCollection quoteNodes = shelfariDoc.DocumentNode.SelectNodes("//div[@id='WikiModule_Quotations']/div/ul[@class='li_6']/li");
                if (quoteNodes != null)
                {
                    foreach (HtmlNode quoteNode in quoteNodes)
                    {
                        HtmlNode node = quoteNode.SelectSingleNode(".//blockquote");
                        if (node == null) continue;
                        string quote = node.InnerText;
                        string character = "";
                        node = quoteNode.SelectSingleNode(".//cite");
                        if (node != null)
                            character = node.InnerText;
                        // Remove quotes (sometimes people put unnecessary quotes in the quote as well)
                        quote = Regex.Replace(quote, "^(&ldquo;){1,2}", "");
                        quote = Regex.Replace(quote, "(&rdquo;){1,2}$", "");
                        notableShelfariQuotes.Add(new string[] {quote, character});
                    }
                }
            }
            return true;
        }
Exemplo n.º 9
0
        public bool complete = false; //Set if constructor succeeded in generating profile

        public AuthorProfile(BookInfo nBook, frmMain frm)
        {
            this.curBook = nBook;
            this.main    = frm;
            string outputDir;

            try
            {
                if (settings.android)
                {
                    outputDir = settings.outDir + @"\Android\" + curBook.asin;
                    Directory.CreateDirectory(outputDir);
                }
                else
                {
                    outputDir = settings.useSubDirectories ? Functions.GetBookOutputDirectory(curBook.author, curBook.sidecarName) : settings.outDir;
                }
            }
            catch (Exception ex)
            {
                main.Log("Failed to create output directory: " + ex.Message + "\r\nFiles will be placed in the default output directory.");
                outputDir = settings.outDir;
            }
            ApPath = outputDir + @"\AuthorProfile.profile." + curBook.asin + ".asc";

            if (!Properties.Settings.Default.overwrite && File.Exists(ApPath))
            {
                main.Log("AuthorProfile file already exists... Skipping!\r\n" +
                         "Please review the settings page if you want to overwite any existing files.");
                return;
            }

            //Process GUID. If in decimal form, convert to hex.
            if (Regex.IsMatch(curBook.guid, "/[a-zA-Z]/"))
            {
                curBook.guid = curBook.guid.ToUpper();
            }
            else
            {
                long guidDec;
                long.TryParse(curBook.guid, out guidDec);
                curBook.guid = guidDec.ToString("X");
            }
            if (curBook.guid == "0")
            {
                main.Log("Something bad happened while converting the GUID.");
                return;
            }

            //Generate Author search URL from author's name
            string newAuthor             = Functions.FixAuthor(curBook.author);
            string plusAuthorName        = newAuthor.Replace(" ", "+");
            string amazonAuthorSearchUrl = @"http://www.amazon.com/s/?url=search-alias%3Dstripbooks&field-keywords=" +
                                           plusAuthorName;

            main.Log("Searching for author's page on Amazon...");

            // Search Amazon for Author
            HtmlDocument authorHtmlDoc = new HtmlDocument {
                OptionAutoCloseOnEnd = true
            };
            string authorsearchHtml = HttpDownloader.GetPageHtml(amazonAuthorSearchUrl);

            authorHtmlDoc.LoadHtml(authorsearchHtml);

            if (Properties.Settings.Default.saveHtml)
            {
                try
                {
                    main.Log("Saving Amazon's author search webpage...");
                    File.WriteAllText(Environment.CurrentDirectory +
                                      String.Format(@"\dmp\{0}.authorsearchHtml.txt", curBook.asin),
                                      authorHtmlDoc.DocumentNode.InnerHtml);
                }
                catch (Exception ex)
                {
                    main.Log(String.Format("An error ocurred saving authorsearchHtml.txt: {0}", ex.Message));
                }
            }

            // Try to find Author's page from Amazon search
            HtmlNode node = authorHtmlDoc.DocumentNode.SelectSingleNode("//*[@id='result_1']");

            if (node == null || !node.OuterHtml.Contains("/e/B"))
            {
                main.Log("Could not find author's page on Amazon.\r\nUnable to create Author Profile.\r\nEnsure the author metadata field matches the author's name exactly.\r\nSearch results can be viewed at " + amazonAuthorSearchUrl);
                return;
            }
            authorAsin = node.OuterHtml;
            int index1 = authorAsin.IndexOf("data-asin");

            if (index1 > 0)
            {
                authorAsin = authorAsin.Substring(index1 + 11, 10);
            }

            node = node.SelectSingleNode("//*[@id='result_1']/div/div/div/div/a");
            string properAuthor = node.GetAttributeValue("href", "not found");

            if (properAuthor == "not found" || properAuthor.IndexOf('/', 1) < 3)
            {
                main.Log("Found author's page, but could not parse URL properly. Report this URL on the MobileRead thread: " + amazonAuthorSearchUrl);
                return;
            }
            properAuthor = properAuthor.Substring(1, properAuthor.IndexOf('/', 1) - 1);
            string authorAmazonWebsiteLocationLog = @"http://www.amazon.com/" + properAuthor + "/e/" + authorAsin;
            string authorAmazonWebsiteLocation    = @"http://www.amazon.com/" + properAuthor + "/e/" + authorAsin +
                                                    "/ref=la_" + authorAsin +
                                                    "_rf_p_n_feature_browse-b_2?fst=as%3Aoff&rh=n%3A283155%2Cp_82%3A" +
                                                    authorAsin +
                                                    "%2Cp_n_feature_browse-bin%3A618073011&bbn=283155&ie=UTF8&qid=1432378570&rnid=618072011";

            main.Log("Author page found on Amazon!");
            main.Log(String.Format("Author's Amazon Page URL: {0}", authorAmazonWebsiteLocationLog));

            // Load Author's Amazon page
            string authorpageHtml = HttpDownloader.GetPageHtml(authorAmazonWebsiteLocation);

            authorHtmlDoc.LoadHtml(authorpageHtml);

            if (Properties.Settings.Default.saveHtml)
            {
                try
                {
                    main.Log("Saving author's Amazon webpage...");
                    File.WriteAllText(Environment.CurrentDirectory +
                                      String.Format(@"\dmp\{0}.authorpageHtml.txt", curBook.asin),
                                      authorHtmlDoc.DocumentNode.InnerHtml);
                }
                catch (Exception ex)
                {
                    main.Log(String.Format("An error ocurred saving authorpageHtml.txt: {0}", ex.Message));
                }
            }

            // Try to find Author's Biography
            HtmlNode bio = authorHtmlDoc.DocumentNode.SelectSingleNode("//div[@id='ap-bio' and @class='a-row']/div/div/span");

            //Trim authour biography to less than 1000 characters and/or replace more problematic characters.
            if (bio.InnerText.Trim().Length != 0)
            {
                if (bio.InnerText.Length > 1000)
                {
                    int lastPunc  = bio.InnerText.LastIndexOfAny(new char[] { '.', '!', '?' });
                    int lastSpace = bio.InnerText.LastIndexOf(' ');
                    if (lastPunc > lastSpace)
                    {
                        BioTrimmed = bio.InnerText.Substring(0, lastPunc + 1);
                    }
                    else
                    {
                        BioTrimmed = bio.InnerText.Substring(0, lastSpace) + '\u2026';
                    }
                }
                else
                {
                    BioTrimmed = bio.InnerText;
                }
                BioTrimmed = Functions.CleanString(BioTrimmed);
                main.Log("Author biography found on Amazon!");
            }
            else
            {
                BioTrimmed = "No author biography found on Amazon!";
                main.Log("No author biography found on Amazon!");
            }
            // Try to download Author image
            HtmlNode imageXpath = authorHtmlDoc.DocumentNode.SelectSingleNode("//div[@id='ap-image']/img");

            authorImageUrl = imageXpath.GetAttributeValue("src", "");
            string downloadedAuthorImage = curBook.path + @"\DownloadedAuthorImage.jpg";

            try
            {
                using (WebClient webClient = new WebClient())
                {
                    webClient.DownloadFile(new Uri(authorImageUrl), downloadedAuthorImage);
                    main.Log("Downloading author image...");
                }
            }
            catch (Exception ex)
            {
                main.Log(String.Format("Failed to download author image: {0}", ex.Message));
                return;
            }

            main.Log("Resizing and cropping Author image...");
            //Resize and Crop Author image
            Bitmap o  = (Bitmap)Image.FromFile(downloadedAuthorImage);
            Bitmap nb = new Bitmap(o, o.Width, o.Height);

            int   sourceWidth  = o.Width;
            int   sourceHeight = o.Height;
            float nPercent;
            float nPercentW = (185 / (float)sourceWidth);
            float nPercentH = (278 / (float)sourceHeight);

            nPercent = nPercentH > nPercentW ? nPercentH : nPercentW;

            int      destWidth  = (int)(sourceWidth * nPercent);
            int      destHeight = (int)(sourceHeight * nPercent);
            Bitmap   b          = new Bitmap(destWidth, destHeight);
            Graphics g          = Graphics.FromImage(b);

            g.InterpolationMode  = InterpolationMode.HighQualityBicubic;
            g.SmoothingMode      = SmoothingMode.HighQuality;
            g.PixelOffsetMode    = PixelOffsetMode.HighQuality;
            g.CompositingQuality = CompositingQuality.HighQuality;
            g.CompositingMode    = CompositingMode.SourceOver;

            ImageAttributes ia = new ImageAttributes();

            ia.SetWrapMode(WrapMode.TileFlipXY);

            g.DrawImage(nb, 0, 0, destWidth, destHeight);
            b.Save(curBook.path + @"\ResizedAuthorImage.jpg");
            b.Dispose();
            g.Dispose();
            o.Dispose();
            nb.Dispose();

            Bitmap    target   = new Bitmap(185, destHeight);
            Rectangle cropRect = new Rectangle(((destWidth - 185) / 2), 0, 185, destHeight);

            using (g = Graphics.FromImage(target))
            {
                g.DrawImage(Image.FromFile(curBook.path + @"\ResizedAuthorImage.jpg"),
                            new Rectangle(0, 0, target.Width, target.Height),
                            cropRect, GraphicsUnit.Pixel);
            }
            target.Save(curBook.path + @"\CroppedAuthorImage.jpg");
            target.Dispose();
            Bitmap bc = new Bitmap(curBook.path + @"\CroppedAuthorImage.jpg");

            //Convert Author image to Grayscale and save as jpeg
            Bitmap bgs = Functions.MakeGrayscale3(bc);

            ImageCodecInfo[] availableCodecs = ImageCodecInfo.GetImageEncoders();
            ImageCodecInfo   jpgCodec        = availableCodecs.FirstOrDefault(codec => codec.MimeType == "image/jpeg");

            if (jpgCodec == null)
            {
                throw new NotSupportedException("Encoder for JPEG not found.");
            }
            EncoderParameters encoderParams = new EncoderParameters(1);

            encoderParams.Param[0] = new EncoderParameter(Encoder.ColorDepth, 8L);
            bgs.Save(curBook.path + @"\FinalImage.jpg", jpgCodec, encoderParams);
            int authorImageHeight = bgs.Height;

            bc.Dispose();

            //Convert final grayscale Author image to Base64 Format String
            string base64ImageString = Functions.ImageToBase64(bgs, ImageFormat.Jpeg);

            main.Log("Grayscale Base-64 encoded author image created!");
            bgs.Dispose();

            main.Log("Gathering author's other books...");
            List <BookInfo>    bookList     = new List <BookInfo>();
            HtmlNodeCollection resultsNodes =
                authorHtmlDoc.DocumentNode.SelectNodes("//div[@id='mainResults']/ul/li");

            foreach (HtmlNode result in resultsNodes)
            {
                if (!result.Id.StartsWith("result_"))
                {
                    continue;
                }
                string   name, url, asin = "";
                HtmlNode otherBook = result.SelectSingleNode(".//div[@class='a-row a-spacing-small']/a/h2");
                Match    match     = Regex.Match(otherBook.InnerText, @"Series Reading Order|Edition|eSpecial|\([0-9]+ Book Series\)", RegexOptions.IgnoreCase);
                if (match.Success)
                {
                    continue;
                }
                name      = otherBook.InnerText;
                otherBook = result.SelectSingleNode(".//*[@title='Kindle Edition']");
                match     = Regex.Match(otherBook.OuterHtml, "dp/(B[A-Z0-9]{9})/");
                if (match.Success)
                {
                    asin = match.Groups[1].Value;
                }
                //url = otherBook.GetAttributeValue("href", "");
                //url = otherBook.GetAttributeValue("href", "").
                //    Substring(0, otherBook.GetAttributeValue("href", "").
                //    IndexOf(match.Groups[1].Value) +
                //    match.Groups[1].Length);
                url = String.Format("http://www.amazon.com/dp/{0}", asin);
                if (name != "" && url != "" && asin != "")
                {
                    BookInfo newBook = new BookInfo(name, curBook.author, asin);
                    newBook.amazonUrl = url;
                    bookList.Add(newBook);
                }
            }

            main.Log("Gathering metadata for other books...");
            foreach (BookInfo book in bookList)
            {
                try
                {
                    //Gather book desc, image url, etc, if using new format
                    if (settings.useNewVersion)
                    {
                        book.GetAmazonInfo(book.amazonUrl);
                    }
                    otherBooks.Add(book);
                }
                catch (Exception ex)
                {
                    main.Log(String.Format("{0}\r\nURL: {1}\r\nBook: {2}\r\nContinuing anyway...", ex.Message, book.amazonUrl, book.title));
                }
            }

            main.Log("Writing Author Profile to file...");

            //Create list of Asin numbers and titles
            List <string> authorsOtherBookList = new List <string>();

            foreach (BookInfo bk in otherBooks)
            {
                authorsOtherBookList.Add(String.Format(@"{{""e"":1,""a"":""{0}"",""t"":""{1}""}}",
                                                       bk.asin, bk.title));
            }

            //Create finalAuthorProfile.profile.ASIN.asc
            int unixTimestamp = (Int32)(DateTime.UtcNow.Subtract(new DateTime(1970, 1, 1))).TotalSeconds;

            try
            {
                string authorProfileOutput = @"{""u"":[{""y"":" + authorImageHeight + @",""l"":[""" +
                                             string.Join(@""",""", otherBooks.Select(book => book.asin).ToArray()) + @"""],""n"":""" +
                                             curBook.author + @""",""a"":""" + authorAsin + @""",""b"":""" + BioTrimmed +
                                             @""",""i"":""" + base64ImageString + @"""}],""a"":""" +
                                             String.Format(@"{0}"",""d"":{1},""o"":[", curBook.asin, unixTimestamp) +
                                             string.Join(",", authorsOtherBookList.ToArray()) + "]}";
                File.WriteAllText(ApPath, authorProfileOutput);
                main.btnPreview.Enabled          = true;
                main.cmsPreview.Items[0].Enabled = true;
                main.Log("Author Profile file created successfully!\r\nSaved to " + ApPath);
            }
            catch (Exception ex)
            {
                main.Log("An error occurred while writing the Author Profile file: " + ex.Message);
                return;
            }

            ApTitle       = "About " + curBook.author;
            ApSubTitle    = "Kindle Books By " + curBook.author;
            ApAuthorImage = Image.FromFile(curBook.path + @"\FinalImage.jpg");
            EaSubTitle    = "More Books By " + curBook.author;

            complete = true;
        }
Exemplo n.º 10
0
        // TODO: Review this...
        public async Task <bool> Generate()
        {
            string outputDir;

            try
            {
                if (_settings.Android)
                {
                    outputDir = _settings.OutDir + @"\Android\" + _curBook.asin;
                    Directory.CreateDirectory(outputDir);
                }
                else
                {
                    outputDir = _settings.UseSubDirectories ? Functions.GetBookOutputDirectory(_curBook.author, _curBook.sidecarName, true) : _settings.OutDir;
                }
            }
            catch (Exception ex)
            {
                Logger.Log("An error occurred creating output directory: " + ex.Message + "\r\nFiles will be placed in the default output directory.");
                outputDir = _settings.OutDir;
            }
            string ApPath = outputDir + @"\AuthorProfile.profile." + _curBook.asin + ".asc";

            if (!Properties.Settings.Default.overwrite && File.Exists(ApPath))
            {
                Logger.Log("AuthorProfile file already exists... Skipping!\r\n" +
                           "Please review the settings page if you want to overwite any existing files.");
                return(false);
            }

            DataSources.AuthorSearchResults searchResults = null;
            // Attempt to download from the alternate site, if present. If it fails in some way, try .com
            // If the .com search crashes, it will crash back to the caller in frmMain
            try
            {
                searchResults = await DataSources.Amazon.SearchAuthor(_curBook, _settings.AmazonTld);
            }
            catch (Exception ex)
            {
                Logger.Log("Error searching Amazon." + _settings.AmazonTld + ": " + ex.Message + "\r\n" + ex.StackTrace);
            }
            finally
            {
                if (searchResults == null)
                {
                    Logger.Log(String.Format("Failed to find {0} on Amazon." + _settings.AmazonTld, _curBook.author));
                    if (_settings.AmazonTld != "com")
                    {
                        Logger.Log("Trying again with Amazon.com.");
                        _settings.AmazonTld = "com";
                        searchResults       = await DataSources.Amazon.SearchAuthor(_curBook, _settings.AmazonTld);
                    }
                }
            }
            if (searchResults == null)
            {
                return(false);                       // Already logged error in search function
            }
            authorAsin = searchResults.authorAsin;

            if (Properties.Settings.Default.saveHtml)
            {
                try
                {
                    Logger.Log("Saving author's Amazon webpage...");
                    File.WriteAllText(Environment.CurrentDirectory + String.Format(@"\dmp\{0}.authorpageHtml.txt", _curBook.asin),
                                      searchResults.authorHtmlDoc.DocumentNode.InnerHtml);
                }
                catch (Exception ex)
                {
                    Logger.Log(String.Format("An error occurred saving authorpageHtml.txt: {0}", ex.Message));
                }
            }

            // Try to find author's biography
            string bioFile = Environment.CurrentDirectory + @"\ext\" + authorAsin + ".bio";

            if (_settings.SaveBio && File.Exists(bioFile))
            {
                if (!readBio(bioFile))
                {
                    return(false);
                }
            }
            if (BioTrimmed == "")
            {
                // TODO: Let users edit bio in same style as chapters and aliases
                HtmlNode bio = DataSources.Amazon.GetBioNode(searchResults, _settings.AmazonTld);
                //Trim authour biography to less than 1000 characters and/or replace more problematic characters.
                if (bio?.InnerText.Trim().Length > 0)
                {
                    if (bio.InnerText.Length > 1000)
                    {
                        int lastPunc  = bio.InnerText.LastIndexOfAny(new [] { '.', '!', '?' });
                        int lastSpace = bio.InnerText.LastIndexOf(' ');
                        if (lastPunc > lastSpace)
                        {
                            BioTrimmed = bio.InnerText.Substring(0, lastPunc + 1);
                        }
                        else
                        {
                            BioTrimmed = bio.InnerText.Substring(0, lastSpace) + '\u2026';
                        }
                    }
                    else
                    {
                        BioTrimmed = bio.InnerText;
                    }
                    BioTrimmed = BioTrimmed.Clean();
                    Logger.Log("Author biography found on Amazon!");
                }
            }
            else
            {
                File.WriteAllText(bioFile, String.Empty);
                if (System.Windows.Forms.DialogResult.Yes ==
                    System.Windows.Forms.MessageBox.Show(
                        "No author biography found on Amazon!\r\nWould you like to create a biography?", "Biography",
                        System.Windows.Forms.MessageBoxButtons.YesNo, System.Windows.Forms.MessageBoxIcon.Question,
                        System.Windows.Forms.MessageBoxDefaultButton.Button2))
                {
                    Functions.RunNotepad(bioFile);
                    if (!readBio(bioFile))
                    {
                        return(false);
                    }
                }
                else
                {
                    BioTrimmed = "No author biography found on Amazon!";
                    Logger.Log("An error occurred finding the author biography on Amazon.");
                }
            }
            if (_settings.SaveBio)
            {
                if (!File.Exists(bioFile))
                {
                    try
                    {
                        Logger.Log("Saving biography to " + bioFile);
                        using (var streamWriter = new StreamWriter(bioFile, false, System.Text.Encoding.UTF8))
                        {
                            streamWriter.Write(BioTrimmed);
                        }
                    }
                    catch (Exception ex)
                    {
                        Logger.Log("An error occurred while writing biography.\r\n" + ex.Message + "\r\n" + ex.StackTrace);
                        return(false);
                    }
                }
                if (System.Windows.Forms.DialogResult.Yes == System.Windows.Forms.MessageBox.Show("Would you like to open the biography file in notepad for editing?", "Biography",
                                                                                                  System.Windows.Forms.MessageBoxButtons.YesNo, System.Windows.Forms.MessageBoxIcon.Question, System.Windows.Forms.MessageBoxDefaultButton.Button2))
                {
                    Functions.RunNotepad(bioFile);
                    if (!readBio(bioFile))
                    {
                        return(false);
                    }
                }
            }
            // Try to download Author image
            HtmlNode imageXpath = DataSources.Amazon.GetAuthorImageNode(searchResults, _settings.AmazonTld);

            authorImageUrl = Regex.Replace(imageXpath.GetAttributeValue("src", ""), @"_.*?_\.", string.Empty);

            // cleanup to match retail file image links
            if (authorImageUrl.Contains(@"https://images-na.ssl-images-amazon"))
            {
                authorImageUrl = authorImageUrl.Replace(@"https://images-na.ssl-images-amazon", @"http://ecx.images-amazon");
            }

            _curBook.authorImageUrl = authorImageUrl;

            Bitmap ApAuthorImage;

            try
            {
                Logger.Log("Downloading author image...");
                ApAuthorImage = await HttpDownloader.GetImage(authorImageUrl);

                Logger.Log("Grayscale base64-encoded author image created!");
            }
            catch (Exception ex)
            {
                Logger.Log(String.Format("An error occurred downloading the author image: {0}", ex.Message));
                return(false);
            }

            Logger.Log("Gathering author's other books...");
            var bookList = DataSources.Amazon.GetAuthorBooks(searchResults, _curBook.title, _curBook.author, _settings.AmazonTld)
                           ?? DataSources.Amazon.GetAuthorBooksNew(searchResults, _curBook.title, _curBook.author, _settings.AmazonTld);

            if (bookList != null)
            {
                Logger.Log("Gathering metadata for other books...");
                var bookBag = new ConcurrentBag <BookInfo>();
                await bookList.ParallelForEachAsync(async book =>
                {
                    // TODO: retry a couple times if one fails maybe
                    try
                    {
                        //Gather book desc, image url, etc, if using new format
                        if (_settings.UseNewVersion)
                        {
                            await book.GetAmazonInfo(book.amazonUrl);
                        }
                        bookBag.Add(book);
                    }
                    catch (Exception ex)
                    {
                        Logger.Log(String.Format("An error occurred gathering metadata for other books: {0}\r\nURL: {1}\r\nBook: {2}", ex.Message, book.amazonUrl, book.title));
                        throw;
                    }
                });

                otherBooks.AddRange(bookBag);
            }
            else
            {
                Logger.Log("Unable to find other books by this author. If there should be some, check the Amazon URL to ensure it is correct.");
            }

            Logger.Log("Writing Author Profile to file...");

            var authorOtherBooks = otherBooks.Select(book => new Model.AuthorProfile.Book
            {
                E     = 1,
                Asin  = book.asin,
                Title = book.title
            }).ToArray();

            var ap = new Model.AuthorProfile
            {
                Asin         = _curBook.asin,
                CreationDate = Functions.UnixTimestampSeconds(),
                OtherBooks   = authorOtherBooks,
                Authors      = new []
                {
                    new Model.AuthorProfile.Author
                    {
                        Asin           = authorAsin,
                        Bio            = BioTrimmed,
                        ImageHeight    = ApAuthorImage.Height,
                        Name           = _curBook.author,
                        OtherBookAsins = otherBooks.Select(book => book.asin).ToArray(),
                        Picture        = Functions.ImageToBase64(ApAuthorImage, ImageFormat.Jpeg)
                    }
                }
            };

            string authorProfileOutput = JsonConvert.SerializeObject(ap);

            try
            {
                File.WriteAllText(ApPath, authorProfileOutput);
                Logger.Log("Author Profile file created successfully!\r\nSaved to " + ApPath);
            }
            catch (Exception ex)
            {
                Logger.Log("An error occurred while writing the Author Profile file: " + ex.Message + "\r\n" + ex.StackTrace);
                return(false);
            }

            ApTitle    = "About " + _curBook.author;
            ApSubTitle = "Kindle Books By " + _curBook.author;
            EaSubTitle = "More Books By " + _curBook.author;
            return(true);
        }
Exemplo n.º 11
0
        /// <summary>
        /// Generate the necessities for both old and new formats
        /// </summary>
        public async Task <bool> Generate()
        {
            Logger.Log("Attempting to find book on Amazon...");
            //Generate Book search URL from book's ASIN
            string ebookLocation = String.Format(@"https://www.amazon.{0}/dp/{1}", _settings.AmazonTld, curBook.asin);

            // Search Amazon for book
            //Logger.Log(String.Format("Book's Amazon page URL: {0}", ebookLocation));

            HtmlDocument bookHtmlDoc = new HtmlDocument {
                OptionAutoCloseOnEnd = true
            };

            try
            {
                bookHtmlDoc.LoadHtml(await HttpDownloader.GetPageHtmlAsync(ebookLocation));
            }
            catch (Exception ex)
            {
                Logger.Log(String.Format("An error ocurred while downloading book's Amazon page: {0}\r\nYour ASIN may not be correct.", ex.Message));
                return(false);
            }
            Logger.Log("Book found on Amazon!");
            if (Properties.Settings.Default.saveHtml)
            {
                try
                {
                    Logger.Log("Saving book's Amazon webpage...");
                    File.WriteAllText(Environment.CurrentDirectory +
                                      String.Format(@"\dmp\{0}.bookpageHtml.txt", curBook.asin),
                                      bookHtmlDoc.DocumentNode.InnerHtml);
                }
                catch (Exception ex)
                {
                    Logger.Log(String.Format("An error ocurred saving bookpageHtml.txt: {0}", ex.Message));
                }
            }

            try
            {
                curBook.GetAmazonInfo(bookHtmlDoc);
            }
            catch (Exception ex)
            {
                Logger.Log(String.Format("An error ocurred parsing Amazon info: {0}", ex.Message));
                return(false);
            }

            Logger.Log("Gathering recommended book metadata...");
            //Parse Recommended Author titles and ASINs
            try
            {
                var recList = bookHtmlDoc.DocumentNode.SelectNodes("//ol[@class='a-carousel' and @role='list']/li[@class='a-carousel-card a-float-left']");
                if (recList != null)
                {
                    var possibleBooks = new List <BookInfo>();
                    foreach (HtmlNode item in recList.Where(item => item != null))
                    {
                        HtmlNode nodeTitle      = item.SelectSingleNode(".//div/a");
                        var      nodeTitleCheck = nodeTitle.GetAttributeValue("title", "");
                        var      nodeUrl        = nodeTitle.GetAttributeValue("href", "");
                        if (nodeUrl != "")
                        {
                            nodeUrl = "https://www.amazon." + _settings.AmazonTld + nodeUrl;
                        }
                        if (nodeTitleCheck == "")
                        {
                            nodeTitle = item.SelectSingleNode(".//div/a");
                            //Remove CR, LF and TAB
                            nodeTitleCheck = nodeTitle.InnerText.Clean();
                        }
                        //Check for duplicate by title
                        if (possibleBooks.Any(bk => bk.title.Contains(nodeTitleCheck)))
                        {
                            continue;
                        }

                        var cleanAuthor = item.SelectSingleNode(".//div/div").InnerText.Clean();
                        //Exclude the current book title from other books search
                        Match match = Regex.Match(nodeTitleCheck, curBook.title, RegexOptions.IgnoreCase);
                        if (match.Success)
                        {
                            continue;
                        }
                        match = Regex.Match(nodeTitleCheck,
                                            @"(Series|Reading) Order|Checklist|Edition|eSpecial|\([0-9]+ Book Series\)",
                                            RegexOptions.IgnoreCase);
                        if (match.Success)
                        {
                            continue;
                        }
                        possibleBooks.Add(new BookInfo(nodeTitleCheck, cleanAuthor,
                                                       item.SelectSingleNode(".//div")?.GetAttributeValue("data-asin", null))
                        {
                            amazonUrl = nodeUrl
                        });
                    }
                    var bookBag = new ConcurrentBag <BookInfo>();
                    await possibleBooks.ParallelForEachAsync(async book =>
                    {
                        if (book == null)
                        {
                            return;
                        }
                        // TODO: Make a separate function for this, duplicate here and AuthorProfile
                        try
                        {
                            //Gather book desc, image url, etc, if using new format
                            if (_settings.UseNewVersion)
                            {
                                await book.GetAmazonInfo(book.amazonUrl);
                            }
                            bookBag.Add(book);
                        }
                        catch (Exception ex)
                        {
                            Logger.Log($"Error: {ex}\r\n{book.amazonUrl}");
                        }
                    });

                    custAlsoBought.AddRange(bookBag);
                }
                //Add sponsored related, if they exist...
                HtmlNode otherItems =
                    bookHtmlDoc.DocumentNode.SelectSingleNode("//div[@id='view_to_purchase-sims-feature']");
                if (otherItems != null)
                {
                    recList = otherItems.SelectNodes(".//li[@class='a-spacing-medium p13n-sc-list-item']");
                    if (recList != null)
                    {
                        string sponsTitle, sponsAsin = "", sponsUrl = "";
                        var    possibleBooks = new List <BookInfo>();
                        // TODO: This entire foreach is pretty much the exact same as the one above...
                        foreach (HtmlNode result in recList.Where(result => result != null))
                        {
                            HtmlNode otherBook =
                                result.SelectSingleNode(".//div[@class='a-fixed-left-grid-col a-col-left']/a");
                            if (otherBook == null)
                            {
                                continue;
                            }
                            Match match = Regex.Match(otherBook.GetAttributeValue("href", ""),
                                                      "dp/(B[A-Z0-9]{9})");
                            if (!match.Success)
                            {
                                match = Regex.Match(otherBook.GetAttributeValue("href", ""),
                                                    "gp/product/(B[A-Z0-9]{9})");
                            }
                            if (match.Success)
                            {
                                sponsAsin = match.Groups[1].Value;
                                sponsUrl  = String.Format("https://www.amazon.{1}/dp/{0}", sponsAsin,
                                                          _settings.AmazonTld);
                            }

                            otherBook = otherBook.SelectSingleNode(".//img");
                            match     = Regex.Match(otherBook.GetAttributeValue("alt", ""),
                                                    @"(Series|Reading) Order|Checklist|Edition|eSpecial|\([0-9]+ Book Series\)",
                                                    RegexOptions.IgnoreCase);
                            if (match.Success)
                            {
                                continue;
                            }
                            sponsTitle = otherBook.GetAttributeValue("alt", "");
                            //Check for duplicate by title
                            if (custAlsoBought.Any(bk => bk.title.Contains(sponsTitle)) || possibleBooks.Any(bk => bk.title.Contains(sponsTitle)))
                            {
                                continue;
                            }
                            otherBook = result.SelectSingleNode(".//a[@class='a-size-small a-link-child']")
                                        ?? result.SelectSingleNode(".//span[@class='a-size-small a-color-base']")
                                        ?? throw new DataSource.FormatChangedException("Amazon", "Sponsored book author");
                            // TODO: Throw more format changed exceptions to make it obvious that the site changed
                            var sponsAuthor = otherBook.InnerText.Trim();
                            possibleBooks.Add(new BookInfo(sponsTitle, sponsAuthor, sponsAsin)
                            {
                                amazonUrl = sponsUrl
                            });
                        }

                        var bookBag = new ConcurrentBag <BookInfo>();
                        await possibleBooks.ParallelForEachAsync(async book =>
                        {
                            //Gather book desc, image url, etc, if using new format
                            try
                            {
                                if (_settings.UseNewVersion)
                                {
                                    await book.GetAmazonInfo(book.amazonUrl);
                                }
                                bookBag.Add(book);
                            }
                            catch (Exception ex)
                            {
                                Logger.Log($"Error: {ex.Message}\r\n{book.amazonUrl}");
                            }
                        });

                        custAlsoBought.AddRange(bookBag);
                    }
                }
            }
            catch (Exception ex)
            {
                Logger.Log("An error occurred parsing the book's amazon page: " + ex.Message + ex.StackTrace);
                return(false);
            }
            SetPaths();
            return(true);
        }
Exemplo n.º 12
0
 public static string GetPageHtml(string url)
 {
     HttpDownloader http = new HttpDownloader(url);
     return http.GetPage();
 }
Exemplo n.º 13
0
        private void btnSearchShelfari_Click(object sender, EventArgs e)
        {
            if (!File.Exists(txtMobi.Text))
            {
                MessageBox.Show("Specified book was not found.", "Book Not Found");
                return;
            }
            if (!File.Exists(settings.mobi_unpack))
            {
                MessageBox.Show("Kindleunpack was not found. Please review the settings page.", "Kindleunpack Not Found");
                return;
            }
            if (!Directory.Exists(settings.outDir))
            {
                MessageBox.Show("Specified output directory does not exist. Please review the settings page.",
                                "Output Directory Not found");
                return;
            }
            //Create temp dir and ensure it exists
            string randomFile = Functions.GetTempDirectory();

            if (!Directory.Exists(randomFile))
            {
                MessageBox.Show("Temporary path not accessible for some reason.", "Temporary Directory Error");
                return;
            }

            //0 = asin, 1 = uniqid, 2 = databasename, 3 = rawML, 4 = author, 5 = title
            //this.TopMost = true;
            List <string> results;

            if (settings.useKindleUnpack)
            {
                Log("Running Kindleunpack to get metadata...");
                results = Functions.GetMetaData(txtMobi.Text, settings.outDir, randomFile, settings.mobi_unpack);
            }
            else
            {
                Log("Extracting metadata...");
                try
                {
                    results = Functions.GetMetaDataInternal(txtMobi.Text, settings.outDir, false).getResults();
                }
                catch (Exception ex)
                {
                    Log("Error getting metadata: " + ex.Message);
                    return;
                }
            }
            if (results.Count != 6)
            {
                Log(results[0]);
                return;
            }

            // Added author name to log output
            Log(String.Format("Got metadata!\r\nDatabase Name: {0}\r\nASIN: {1}\r\nAuthor: {2}\r\nTitle: {3}\r\nUniqueID: {4}",
                              results[2], results[0], results[4], results[5], results[1]));

            //Get Shelfari Search URL
            Log("Searching for book on Shelfari...");
            string shelfariSearchUrlBase = @"http://www.shelfari.com/search/books?Author={0}&Title={1}&Binding={2}";

            string[] bindingTypes = { "Hardcover", "Kindle", "Paperback" };

            // Search book on Shelfari
            bool   bookFound       = false;
            string shelfariBookUrl = "";

            results[4] = Functions.FixAuthor(results[4]);

            try
            {
                HtmlAgilityPack.HtmlDocument shelfariHtmlDoc = new HtmlAgilityPack.HtmlDocument();
                for (int j = 0; j <= 1; j++)
                {
                    for (int i = 0; i < bindingTypes.Length; i++)
                    {
                        Log("Searching for " + bindingTypes[i] + " edition...");
                        // Insert parameters (mainly for searching with removed diacritics). Seems to work fine without replacing spaces?
                        shelfariHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(String.Format(shelfariSearchUrlBase, results[4], results[5], bindingTypes[i])));
                        if (!shelfariHtmlDoc.DocumentNode.InnerText.Contains("Your search did not return any results"))
                        {
                            shelfariBookUrl = FindShelfariURL(shelfariHtmlDoc, results[4], results[5]);
                            if (shelfariBookUrl != "")
                            {
                                bookFound = true;
                                if (Properties.Settings.Default.saveHtml)
                                {
                                    try
                                    {
                                        Log("Saving book's Shelfari webpage...");
                                        shelfariHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(shelfariBookUrl));
                                        File.WriteAllText(Environment.CurrentDirectory +
                                                          String.Format(@"\dmp\{0}.shelfaripageHtml.txt", results[0]),
                                                          shelfariHtmlDoc.DocumentNode.InnerHtml);
                                    }
                                    catch (Exception ex)
                                    {
                                        Log(String.Format("An error ocurred saving shelfaripageHtml.txt: {0}", ex.Message));
                                    }
                                }
                                break;
                            }
                        }
                        if (!bookFound)
                        {
                            Log("Unable to find a " + bindingTypes[i] + " edition of this book on Shelfari!");
                        }
                    }
                    if (bookFound)
                    {
                        break;
                    }
                    // Attempt to remove diacritics (accented characters) from author & title for searching
                    string newAuthor = results[4].RemoveDiacritics();
                    string newTitle  = results[5].RemoveDiacritics();
                    if (!results[4].Equals(newAuthor) || !results[5].Equals(newTitle))
                    {
                        results[4] = newAuthor;
                        results[5] = newTitle;
                        Log("Accented characters detected. Attempting to search without them.");
                    }
                }
            }
            catch (Exception ex)
            {
                Log("Error: " + ex.Message);
                return;
            }

            if (bookFound)
            {
                Log("Book found on Shelfari!");
                Log(results[5] + " by " + results[4]);

                txtShelfari.Text = shelfariBookUrl;
                txtShelfari.Refresh();
                Log(String.Format("Shelfari URL updated: {0}\r\nYou may want to visit the URL to ensure it is correct and add/modify terms if necessary.", shelfariBookUrl));
            }
            else
            {
                Log("Unable to find this book on Shelfari! You may have to search manually.");
            }

            try
            {
                Directory.Delete(randomFile, true);
            }
            catch (Exception)
            {
                Log("An error occurred while trying to delete temporary files.\r\nTry deleting these files manually.");
            }
        }
Exemplo n.º 14
0
        public async Task Populate(string inputFile)
        {
            string input;

            using (StreamReader streamReader = new StreamReader(inputFile, Encoding.UTF8))
                input = streamReader.ReadToEnd();
            ilauthorRecs.Images.Clear();
            lvAuthorRecs.Items.Clear();
            ilcustomersWhoBoughtRecs.Images.Clear();
            lvCustomersWhoBoughtRecs.Items.Clear();

            JObject ea       = JObject.Parse(input);
            var     tempData = ea["data"]["nextBook"];

            if (tempData != null)
            {
                lblNextTitle.Text  = tempData["title"].ToString();
                lblNextAuthor.Text = tempData["authors"][0].ToString();
                string imageUrl = tempData["imageUrl"]?.ToString();
                if (!string.IsNullOrEmpty(imageUrl))
                {
                    pbNextCover.Image = Functions.MakeGrayscale3(await HttpDownloader.GetImage(imageUrl));
                }
            }
            else
            {
                pbNextCover.Visible    = false;
                lblNextTitle.Visible   = false;
                lblNextAuthor.Visible  = false;
                lblNotInSeries.Visible = true;
            }

            tempData = ea["data"]["authorRecs"]["recommendations"];
            if (tempData != null)
            {
                foreach (var rec in tempData)
                {
                    string imageUrl = rec["imageUrl"]?.ToString();
                    if (!string.IsNullOrEmpty(imageUrl))
                    {
                        ilauthorRecs.Images.Add(Functions.MakeGrayscale3(await HttpDownloader.GetImage(imageUrl)));
                    }
                }
                ListViewItem_SetSpacing(lvAuthorRecs, 60 + 7, 90 + 7);
                for (int i = 0; i < ilauthorRecs.Images.Count; i++)
                {
                    ListViewItem item = new ListViewItem {
                        ImageIndex = i
                    };
                    lvAuthorRecs.Items.Add(item);
                }
            }

            tempData = ea["data"]["customersWhoBoughtRecs"]["recommendations"];
            if (tempData != null)
            {
                foreach (var rec in tempData)
                {
                    var imageUrl = rec["imageUrl"]?.ToString();
                    if (!string.IsNullOrEmpty(imageUrl))
                    {
                        ilcustomersWhoBoughtRecs.Images.Add(Functions.MakeGrayscale3(await HttpDownloader.GetImage(imageUrl)));
                    }
                }
                ListViewItem_SetSpacing(lvCustomersWhoBoughtRecs, 60 + 7, 90 + 7);
                for (int i = 0; i < ilcustomersWhoBoughtRecs.Images.Count; i++)
                {
                    var item = new ListViewItem {
                        ImageIndex = i
                    };
                    lvCustomersWhoBoughtRecs.Items.Add(item);
                }
            }
        }
Exemplo n.º 15
0
        public static Task <string> GetPageHtmlAsync(string url, CancellationToken cancellationToken = default)
        {
            var http = new HttpDownloader(url);

            return(Task.Run(async() => await http.GetPageAsync(cancellationToken).ConfigureAwait(false), cancellationToken));
        }