Exemple #1
0
        private BookInfo GetNextInSeries()
        {
            BookInfo nextBook = null;

            if (curBook.shelfariUrl == "")
            {
                return(null);
            }

            // Get title of next book
            HtmlAgilityPack.HtmlDocument searchHtmlDoc = new HtmlAgilityPack.HtmlDocument();
            searchHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(curBook.shelfariUrl));
            string nextTitle = GetNextInSeriesTitle(searchHtmlDoc);

            // If search failed, try other method
            //if (nextTitle == "")
            //    nextTitle = GetNextInSeriesTitle2(searchHtmlDoc);
            if (nextTitle != "")
            {
                // Search author's other books for the book (assumes next in series was written by the same author...)
                // Returns the first one found, though there should probably not be more than 1 of the same name anyway
                nextBook = authorProfile.otherBooks.FirstOrDefault(bk => bk.title == nextTitle);
                if (nextBook == null)
                {
                    // Attempt to search Amazon for the book instead
                    nextBook = Functions.AmazonSearchBook(nextTitle, curBook.author);
                    if (nextBook != null)
                    {
                        nextBook.GetAmazonInfo(nextBook.amazonUrl); //fill in desc, imageurl, and ratings
                    }
                }
                // Try to fill in desc, imageurl, and ratings using Shelfari Kindle edition link instead
                if (nextBook == null)
                {
                    HtmlDocument bookDoc = new HtmlDocument()
                    {
                        OptionAutoCloseOnEnd = true
                    };
                    bookDoc.LoadHtml(HttpDownloader.GetPageHtml(nextShelfariUrl));
                    Match match = Regex.Match(bookDoc.DocumentNode.InnerHtml, "('B[A-Z0-9]{9}')");
                    if (match.Success)
                    {
                        string cleanASIN = match.Value.Replace("'", String.Empty);
                        nextBook = new BookInfo(nextTitle, curBook.author, cleanASIN);
                        nextBook.GetAmazonInfo("http://www.amazon.com/dp/" + cleanASIN);
                    }
                }
                if (nextBook == null)
                {
                    main.Log("Book was found to be part of a series, but next book could not be found.\r\n" +
                             "Please report this book and the Shelfari URL and output log to improve parsing.");
                }
            }
            else if (curBook.seriesPosition != curBook.totalInSeries)
            {
                main.Log("Unable to find next book in series, the book may not be part of a series, or it is the latest release.");
            }

            if (previousTitle != "")
            {
                if (curBook.previousInSeries == null)
                {
                    // Attempt to search Amazon for the book
                    curBook.previousInSeries = Functions.AmazonSearchBook(previousTitle, curBook.author);
                    if (curBook.previousInSeries != null)
                    {
                        curBook.previousInSeries.GetAmazonInfo(curBook.previousInSeries.amazonUrl); //fill in desc, imageurl, and ratings
                    }
                    // Try to fill in desc, imageurl, and ratings using Shelfari Kindle edition link instead
                    if (curBook.previousInSeries == null)
                    {
                        HtmlDocument bookDoc = new HtmlDocument()
                        {
                            OptionAutoCloseOnEnd = true
                        };
                        bookDoc.LoadHtml(HttpDownloader.GetPageHtml(previousShelfariUrl));
                        Match match = Regex.Match(bookDoc.DocumentNode.InnerHtml, "('B[A-Z0-9]{9}')");
                        if (match.Success)
                        {
                            string cleanASIN = match.Value.Replace("'", String.Empty);
                            curBook.previousInSeries = new BookInfo(previousTitle, curBook.author, cleanASIN);
                            curBook.previousInSeries.GetAmazonInfo("http://www.amazon.com/dp/" + cleanASIN);
                        }
                    }
                }
                else
                {
                    main.Log("Book was found to be part of a series, but previous book could not be found.\r\n" +
                             "Please report this book and the Shelfari URL and output log to improve parsing.");
                }
            }

            return(nextBook);
        }
        public bool complete = false; //Set if constructor succeeds in gathering data
        
        //Requires an already-built AuthorProfile and the BaseEndActions.txt file
        public EndActions(AuthorProfile ap, BookInfo book, long erl, frmMain frm)
        {
            authorProfile = ap;
            curBook = book;
            _erl = erl;
            main = frm;

            main.Log("Attempting to find book on Amazon...");
            //Generate Book search URL from book's ASIN
            string ebookLocation = @"http://www.amazon.com/dp/" + book.asin;

            // Search Amazon for book
            main.Log("Book found on Amazon!");
            main.Log(String.Format("Book's Amazon page URL: {0}", ebookLocation));
            
            HtmlDocument bookHtmlDoc = new HtmlDocument {OptionAutoCloseOnEnd = true};
            try
            {
                bookHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(ebookLocation));
            }
            catch (Exception ex)
            {
                main.Log(String.Format("An error ocurred while downloading book's Amazon page: {0}\r\nYour ASIN may not be correct.", ex.Message));
                return;
            }
            if (Properties.Settings.Default.saveHtml)
            {
                try
                {
                    main.Log("Saving book's Amazon webpage...");
                    File.WriteAllText(Environment.CurrentDirectory +
                                      String.Format(@"\dmp\{0}.bookpageHtml.txt", curBook.asin),
                        bookHtmlDoc.DocumentNode.InnerHtml);
                }
                catch (Exception ex)
                {
                    main.Log(String.Format("An error ocurred saving bookpageHtml.txt: {0}", ex.Message));
                }
            }

            try
            {
                curBook.GetAmazonInfo(bookHtmlDoc);
            }
            catch (Exception ex)
            {
                main.Log(String.Format("An error ocurred parsing Amazon info: {0}", ex.Message));
                return;
            }

            main.Log("Gathering recommended book info...");
            //Parse Recommended Author titles and ASINs
            try
            {
                HtmlNodeCollection recList = bookHtmlDoc.DocumentNode.SelectNodes("//li[@class='a-carousel-card a-float-left']");
                if (recList == null)
                    main.Log("Could not find related book list page on Amazon.\r\nUnable to create End Actions.");
                if (recList != null)
                    foreach (HtmlNode item in recList.Where(item => item != null))
                    {
                        HtmlNode nodeTitle = item.SelectSingleNode(".//div/a");
                        string nodeTitleCheck = nodeTitle.GetAttributeValue("title", "");
                        string nodeUrl = nodeTitle.GetAttributeValue("href", "");
                        string cleanAuthor = "";
                        if (nodeUrl != "")
                            nodeUrl = "http://www.amazon.com" + nodeUrl;
                        if (nodeTitleCheck == "")
                        {
                            nodeTitle = item.SelectSingleNode(".//div/a");
                            //Remove CR, LF and TAB
                            nodeTitleCheck = nodeTitle.InnerText.CleanString();
                        }
                        cleanAuthor = item.SelectSingleNode(".//div/div").InnerText.CleanString();
                        Match match = Regex.Match(nodeTitleCheck, @"Series Reading Order|Edition|eSpecial|\([0-9]+ Book Series\)", RegexOptions.IgnoreCase);
                        if (match.Success)
                        {
                            nodeTitleCheck = "";
                            continue;
                        }
                        BookInfo newBook = new BookInfo(nodeTitleCheck, cleanAuthor,
                            item.SelectSingleNode(".//div").GetAttributeValue("data-asin", ""));
                        try
                        {
                            //Gather book desc, image url, etc, if using new format
                            if (settings.useNewVersion)
                                newBook.GetAmazonInfo(nodeUrl);
                            custAlsoBought.Add(newBook);
                        }
                        catch (Exception ex)
                        {
                            main.Log(String.Format("{0}\r\n{1}\r\nContinuing anyway...", ex.Message, nodeUrl));
                        }
                    }
            }
            catch (Exception ex)
            {
                main.Log("An error occurred parsing the book's amazon page: " + ex.Message);
                return;
            }

            SetPaths();
            complete = true;
        }
Exemple #3
0
        public bool complete = false; //Set if constructor succeeds in gathering data

        //Requires an already-built AuthorProfile and the BaseEndActions.txt file
        public EndActions(AuthorProfile ap, BookInfo book, long erl, frmMain frm)
        {
            authorProfile = ap;
            curBook       = book;
            _erl          = erl;
            main          = frm;

            main.Log("Attempting to find book on Amazon...");
            //Generate Book search URL from book's ASIN
            string ebookLocation = @"http://www.amazon.com/dp/" + book.asin;

            // Search Amazon for book
            main.Log("Book found on Amazon!");
            main.Log(String.Format("Book's Amazon page URL: {0}", ebookLocation));

            HtmlDocument bookHtmlDoc = new HtmlDocument {
                OptionAutoCloseOnEnd = true
            };

            try
            {
                bookHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(ebookLocation));
            }
            catch (Exception ex)
            {
                main.Log(String.Format("An error ocurred while downloading book's Amazon page: {0}\r\nYour ASIN may not be correct.", ex.Message));
                return;
            }
            if (Properties.Settings.Default.saveHtml)
            {
                try
                {
                    main.Log("Saving book's Amazon webpage...");
                    File.WriteAllText(Environment.CurrentDirectory +
                                      String.Format(@"\dmp\{0}.bookpageHtml.txt", curBook.asin),
                                      bookHtmlDoc.DocumentNode.InnerHtml);
                }
                catch (Exception ex)
                {
                    main.Log(String.Format("An error ocurred saving bookpageHtml.txt: {0}", ex.Message));
                }
            }

            try
            {
                curBook.GetAmazonInfo(bookHtmlDoc);
            }
            catch (Exception ex)
            {
                main.Log(String.Format("An error ocurred parsing Amazon info: {0}", ex.Message));
                return;
            }

            main.Log("Gathering recommended book info...");
            //Parse Recommended Author titles and ASINs
            try
            {
                HtmlNodeCollection recList = bookHtmlDoc.DocumentNode.SelectNodes("//li[@class='a-carousel-card a-float-left']");
                if (recList == null)
                {
                    main.Log("Could not find related book list page on Amazon.\r\nUnable to create End Actions.");
                }
                if (recList != null)
                {
                    foreach (HtmlNode item in recList.Where(item => item != null))
                    {
                        HtmlNode nodeTitle      = item.SelectSingleNode(".//div/a");
                        string   nodeTitleCheck = nodeTitle.GetAttributeValue("title", "");
                        string   nodeUrl        = nodeTitle.GetAttributeValue("href", "");
                        string   cleanAuthor    = "";
                        if (nodeUrl != "")
                        {
                            nodeUrl = "http://www.amazon.com" + nodeUrl;
                        }
                        if (nodeTitleCheck == "")
                        {
                            nodeTitle = item.SelectSingleNode(".//div/a");
                            //Remove CR, LF and TAB
                            nodeTitleCheck = nodeTitle.InnerText.CleanString();
                        }
                        cleanAuthor = item.SelectSingleNode(".//div/div").InnerText.CleanString();
                        Match match = Regex.Match(nodeTitleCheck, @"Series Reading Order|Edition|eSpecial|\([0-9]+ Book Series\)", RegexOptions.IgnoreCase);
                        if (match.Success)
                        {
                            nodeTitleCheck = "";
                            continue;
                        }
                        BookInfo newBook = new BookInfo(nodeTitleCheck, cleanAuthor,
                                                        item.SelectSingleNode(".//div").GetAttributeValue("data-asin", ""));
                        try
                        {
                            //Gather book desc, image url, etc, if using new format
                            if (settings.useNewVersion)
                            {
                                newBook.GetAmazonInfo(nodeUrl);
                            }
                            custAlsoBought.Add(newBook);
                        }
                        catch (Exception ex)
                        {
                            main.Log(String.Format("{0}\r\n{1}\r\nContinuing anyway...", ex.Message, nodeUrl));
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                main.Log("An error occurred parsing the book's amazon page: " + ex.Message);
                return;
            }

            SetPaths();
            complete = true;
        }
        private BookInfo GetNextInSeries()
        {
            BookInfo nextBook = null;

            if (curBook.shelfariUrl == "") return null;

            // Get title of next book
            HtmlAgilityPack.HtmlDocument searchHtmlDoc = new HtmlAgilityPack.HtmlDocument();
            searchHtmlDoc.LoadHtml(HttpDownloader.GetPageHtml(curBook.shelfariUrl));
            string nextTitle = GetNextInSeriesTitle(searchHtmlDoc);
            // If search failed, try other method
            //if (nextTitle == "")
            //    nextTitle = GetNextInSeriesTitle2(searchHtmlDoc);
            if (nextTitle != "")
            {
                // Search author's other books for the book (assumes next in series was written by the same author...)
                // Returns the first one found, though there should probably not be more than 1 of the same name anyway
                nextBook = authorProfile.otherBooks.FirstOrDefault(bk => bk.title == nextTitle);
                if (nextBook == null)
                {
                    // Attempt to search Amazon for the book instead
                    nextBook = Functions.AmazonSearchBook(nextTitle, curBook.author);
                    if (nextBook != null)
                        nextBook.GetAmazonInfo(nextBook.amazonUrl); //fill in desc, imageurl, and ratings
                }
                // Try to fill in desc, imageurl, and ratings using Shelfari Kindle edition link instead
                if (nextBook == null)
                {
                    HtmlDocument bookDoc = new HtmlDocument() { OptionAutoCloseOnEnd = true };
                    bookDoc.LoadHtml(HttpDownloader.GetPageHtml(nextShelfariUrl));
                    Match match = Regex.Match(bookDoc.DocumentNode.InnerHtml, "('B[A-Z0-9]{9}')");
                    if (match.Success)
                    {
                        string cleanASIN = match.Value.Replace("'", String.Empty);
                        nextBook = new BookInfo(nextTitle, curBook.author, cleanASIN);
                        nextBook.GetAmazonInfo("http://www.amazon.com/dp/" + cleanASIN);
                    }
                }
                if (nextBook == null)
                    main.Log("Book was found to be part of a series, but next book could not be found.\r\n" +
                        "Please report this book and the Shelfari URL and output log to improve parsing.");

            }
            else if (curBook.seriesPosition != curBook.totalInSeries)
                main.Log("Unable to find next book in series, the book may not be part of a series, or it is the latest release.");

            if (previousTitle != "")
            {
                if (curBook.previousInSeries == null)
                {
                    // Attempt to search Amazon for the book
                    curBook.previousInSeries = Functions.AmazonSearchBook(previousTitle, curBook.author);
                    if (curBook.previousInSeries != null)
                        curBook.previousInSeries.GetAmazonInfo(curBook.previousInSeries.amazonUrl); //fill in desc, imageurl, and ratings
                    
                    // Try to fill in desc, imageurl, and ratings using Shelfari Kindle edition link instead
                    if (curBook.previousInSeries == null)
                    {
                        HtmlDocument bookDoc = new HtmlDocument() {OptionAutoCloseOnEnd = true};
                        bookDoc.LoadHtml(HttpDownloader.GetPageHtml(previousShelfariUrl));
                        Match match = Regex.Match(bookDoc.DocumentNode.InnerHtml, "('B[A-Z0-9]{9}')");
                        if (match.Success)
                        {
                            string cleanASIN = match.Value.Replace("'", String.Empty);
                            curBook.previousInSeries = new BookInfo(previousTitle, curBook.author, cleanASIN);
                            curBook.previousInSeries.GetAmazonInfo("http://www.amazon.com/dp/" + cleanASIN);
                        }
                    }
                }
                else
                    main.Log("Book was found to be part of a series, but previous book could not be found.\r\n" +
                             "Please report this book and the Shelfari URL and output log to improve parsing.");
            }

            return nextBook;
        }