Inheritance: BrowserSessionBase
        /// <summary>
        /// Get the categories from the supplied Url
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        public static List<Category> LoadAmazonPrimeCategoriesFromUrl(this string url, Category parent, AmazonBrowserSession session)
        {
            HtmlDocument doc;
            var result = new List<Category>();
            List<HtmlNode> nodes = null;
            var variedUserAgent = string.Empty;
            var tmpWeb = session; //HtmlWeb { UseCookies = true };
            // Attempt the URL up to 15 times as amazon wants us to use the api!
            for (int i = 0; i <= 15; i++)
            { 
                doc = tmpWeb.Load(url.Replace("{RANDOMNUMBER}", new Random().Next(1000000, 2000000).ToString()));
                nodes = doc.DocumentNode.GetNodesByClass("collections-element");

                if (nodes == null)
                    Thread.Sleep(400);
                else 
                    break;
            }

            if (nodes != null)
            {
                foreach (var node in nodes)
                {
                    var tmpCateg = new Category();// { HasSubCategories = false, SubCategoriesDiscovered = true };
                    var descNode = node.NavigatePath(new[] { 0, 1, 0, 0 });
                    if (descNode != null)
                    {
                        tmpCateg.Other = "V~" + descNode.Attributes["href"].Value.Replace("&amp;", "&");
                        tmpCateg.Name = descNode.InnerText.Replace("&amp;", "&");
                    }
                    else
                    {
                        // In Editor's picks some categories have no description, we use the image name
                        tmpCateg.Other = "V~" + node.NavigatePath(new[] { 0, 0 }).Attributes["href"].Value.Replace("&amp;", "&");
                        tmpCateg.Name = node.NavigatePath(new[] { 0, 0, 0 }).Attributes["src"].Value;
                        MatchCollection matchName = Regex.Matches(tmpCateg.Name, "^.*/([a-zA-Z_-]+)([^/]*)$", RegexOptions.None);
                        if (matchName.Count > 0)
                        {
                            tmpCateg.Name = matchName[0].Groups[1].Value.Replace("_", " ").ToUpper();
                        }
                        else
                        {
                            tmpCateg.Name = "(No Description)";
                        }
                    }
                    // Ugly hack, if not included some pages have a different html layout
                    if (!tmpCateg.Other.ToString().Contains("sort="))
                    {
                        tmpCateg.Other = tmpCateg.Other.ToString() + "&sort=popularity-rank";
                    }
                    tmpCateg.Name = StringUtils.PlainTextFromHtml(tmpCateg.Name.Replace("\n", String.Empty).Trim());
                    tmpCateg.Thumb = node.NavigatePath(new[] { 0, 0, 0 }).Attributes["src"].Value;
                    tmpCateg.ParentCategory = parent;
                    tmpCateg.HasSubCategories = true;
                    tmpCateg.SubCategoriesDiscovered = false;
                    result.Add(tmpCateg);
               }
            }
            return result;
        }
 public AmazonPrimeInformationConnector(SiteUtilBase siteUtil)
 {
     _siteUtil = siteUtil;
     _browserSession = new AmazonBrowserSession();
 }
Exemplo n.º 3
0
 public AmazonPrimeInformationConnector(SiteUtilBase siteUtil)
 {
     _siteUtil       = siteUtil;
     _browserSession = new AmazonBrowserSession();
 }
        /// <summary>
        /// Load all video summary from the specified url
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        public static List<VideoInfo> LoadVideosFromUrl(this string url, AmazonBrowserSession session)
        {
            var results = new List<VideoInfo>();
            HtmlDocument doc = null;
            var tmpWeb = session;
            HtmlNode detailNode = null;

            // Attempt the URL up to 15 times as amazon wants us to use the api!
            for (int i = 0; i <= 15; i++)
            {
                doc = tmpWeb.Load(url);
                detailNode = doc.GetElementbyId("aiv-main-content");

                if (detailNode == null)
                    Thread.Sleep(400);
                else
                    break;
            }

            if (detailNode != null)
            {
                var episodeContainer = doc.GetElementbyId("dv-episode-list");
                if (episodeContainer == null || (episodeContainer != null && episodeContainer.FindFirstChildElement() == null))
                {
                    // Movie, load this video
                    var video = new VideoInfo();

                    video.Title = detailNode.SelectSingleNode(".//h1[@id = 'aiv-content-title']").FirstChild.GetInnerTextTrim();
                    //doc.DocumentNode.GetNodeByClass("product_image").Attributes["alt"].Value;
                    var infoNode = detailNode.GetNodeByClass("dv-info");

                    var dvMetaInfo = infoNode.GetNodeByClass("dv-meta-info");
                    var altTitle = detailNode.NavigatePath(new[] { 0, 0 }).FirstChild.GetInnerTextTrim();
                    video.Description = string.Format("({0}amazon {1})\r\n{2}\r\n{3} {4}",
                        video.Title == altTitle ? "" : altTitle + ", ",
                        doc.GetElementbyId("summaryStars").FindFirstChildElement() == null ? string.Empty : doc.GetElementbyId("summaryStars").FindFirstChildElement().Attributes["title"].Value,
                        infoNode.GetNodeByClass("synopsis").GetInnerTextTrim(),
                        dvMetaInfo.NavigatePath(new[] { 0 }).GetInnerTextTrim(),
                        dvMetaInfo.NavigatePath(new[] { 1 }).GetInnerTextTrim());

                    var imageUrlNode = doc.GetElementbyId("dv-dp-left-content").GetNodeByClass("dp-meta-icon-container");
                    video.Thumb = imageUrlNode == null ? string.Empty : imageUrlNode.SelectSingleNode(".//img").Attributes["src"].Value;
                    video.Airdate = detailNode.GetNodeByClass("release-year").GetInnerTextTrim();
                    video.Length = dvMetaInfo.NavigatePath(new[] { 3 }).GetInnerTextTrim();
                    video.Other = doc.GetElementbyId("ASIN").Attributes["value"].Value;
                    results.Add(video);
                }
                else
                {
                    // TV Series, load all videos
                    var episodeList = episodeContainer.GetNodesByClass("episode-list-link");
                    var usesAltLayout = false;

                    if (episodeList == null)
                    {
                        usesAltLayout = true;
                        episodeList = episodeContainer.GetNodesByClass("episode-list-item-inner");
                    }

                    foreach (var item in episodeList)
                    {
                        var video = new VideoInfo();
                        var titleNode = usesAltLayout ? item.GetNodeByClass("dv-extender").NavigatePath(new[] { 0, 0 }) : item.GetNodeByClass("episode-title");

                        var seen = "";
                        /*if (item.GetNodeByClass("progress-bar") == null)
                        {
                            seen = " (new)";
                        }*/
                        video.Title = Regex.Replace(titleNode.GetInnerTextTrim(), @"^\d+", m => m.Value.PadLeft(2, '0')) + seen;

                        video.Description = titleNode.NextSibling.GetInnerTextTrim();
                        video.Airdate = item.GetNodeByClass("release-date").GetInnerTextTrim();

                        var imageUrlNode = item.GetNodeByClass("episode-list-image");
                        if (imageUrlNode != null)
                        {
                            video.Thumb = imageUrlNode.Attributes["src"].Value;
                        }
                        else
                        {
                            imageUrlNode = doc.GetElementbyId("dv-dp-left-content").GetNodeByClass("dp-meta-icon-container");
                            video.Thumb = imageUrlNode == null ? string.Empty : imageUrlNode.SelectSingleNode(".//img").Attributes["src"].Value;
                        }
                        var videoUrl = usesAltLayout ? titleNode.GetAttribute("href") : item.GetAttribute("href");
                        videoUrl = videoUrl.Substring(videoUrl.IndexOf("/product/") + 9);
                        videoUrl = videoUrl.Substring(0, videoUrl.IndexOf("/"));
                        video.Other = videoUrl;
                        video.Length = item.GetNodeByClass("runtime").GetInnerTextTrim();
                        video.CleanDescriptionAndTitle();
                        results.Add(video);
                    }
                }
            }


            return results;
        }
        /// <summary>
        /// Load the prime videos as categories because we can't get the description until we drill in to the video itself
        /// </summary>
        /// <param name="url"></param>
        /// <param name="parent"></param>
        /// <returns></returns>
        public static List<Category> LoadAmazonPrimeVideosAsCategoriesFromUrl(this string url, Category parent, AmazonBrowserSession session)
        {
            var results = new List<Category>();
            if (string.IsNullOrEmpty(url))
                return results;
            var nextPage = string.Empty;
            HtmlDocument doc = null;
            var tmpWeb = session;

            List<HtmlNode> listItems = null;

            // Attempt the URL up to 10 times as amazon wants us to use the api!
            for (int i = 0; i <= 10; i++)
            {
                doc = tmpWeb.Load(url);
                listItems = doc.DocumentNode.GetNodesByClass("result-item");

                if (listItems == null)
                {
                    Thread.Sleep(200);
                }
                else
                    break;
            }

            if (listItems != null)
            {
                listItems = listItems.Where(x => x.OriginalName.ToLower() == "li" && x.Id.StartsWith("result_")).ToList();

                // These are the movies - parse them into categories
                foreach (var item in listItems)
                {
                    var tmpCateg = new Category();
                    tmpCateg.ParentCategory = parent;
                    tmpCateg.HasSubCategories = false;
                    var link = item.GetNodeByClass("s-access-detail-page");
                    tmpCateg.Name = link.GetAttribute("title");
                    tmpCateg.Name = StringUtils.PlainTextFromHtml(tmpCateg.Name.Replace("\n", String.Empty).Trim());
                    tmpCateg.Other = link.GetAttribute("href"); ;
                    tmpCateg.Thumb = item.GetNodeByClass("s-access-image").GetAttribute("src");
                    var released = link.ParentNode.NavigatePath(new int[] { 3 }).GetInnerText();
                    var score = item.GetNodeByClass("a-icon-star") == null ? String.Empty : item.GetNodeByClass("a-icon-star").FirstChild.GetInnerText();
                    tmpCateg.Description = StringUtils.PlainTextFromHtml("Released: " + released + "\r\nReview Score: " + score);
                    results.Add(tmpCateg);
                }

                var nextPageCtrl = doc.GetElementById("pagnNextLink");

                if (nextPageCtrl != null)
                {
                    nextPage = Properties.Resources.AmazonRootUrl + nextPageCtrl.Attributes["href"].Value.Replace("&amp;", "&");
                    if (!string.IsNullOrEmpty(nextPage))
                        results.Add(new NextPageCategory() { ParentCategory = parent, Url = nextPage, SubCategories = new List<Category>() });
                }
            }

            return results;
        }
 public static List<SearchResultItem> LoadAmazonPrimeSearchAsCategoriesFromUrl(this string url, string query, AmazonBrowserSession session)
 {
     url = url.Replace("{QUERY}", Uri.EscapeDataString(query));
     return url.LoadAmazonPrimeVideosAsCategoriesFromUrl(null, session).Cast<SearchResultItem>().ToList();
 }
        /// <summary>
        /// Load the prime videos as categories because we can't get the description until we drill in to the video itself
        /// </summary>
        /// <param name="url"></param>
        /// <param name="parent"></param>
        /// <returns></returns>
        public static List<Category> LoadAmazonPrimeWatchlistAsCategoriesFromUrl(this string url, Category parent, AmazonBrowserSession session)
        {
            var results = new List<Category>();
            var nextPage = string.Empty;
            HtmlDocument doc = null;
            var tmpWeb = session;

            List<HtmlNode> listItems = null;

            // Attempt the URL up to 10 times as amazon wants us to use the api!
            for (int i = 0; i <= 10; i++)
            {
                doc = tmpWeb.Load(url);
                listItems = doc.DocumentNode.GetNodesByClass("dv-packshot");

                if (listItems == null)
                {
                    Thread.Sleep(200);
                }
                else
                    break;
            }



            if (listItems != null)
            {
                foreach (var item in listItems)
                {
                    var tmpCateg = new Category();
                    tmpCateg.ParentCategory = parent;
                    tmpCateg.HasSubCategories = false;
                    tmpCateg.Name = item.GetNodeByClass("dv-core-title").InnerText;
                    tmpCateg.Name = StringUtils.PlainTextFromHtml(tmpCateg.Name.Replace("\n", String.Empty).Trim());
                    tmpCateg.Other = item.NavigatePath(new[] { 0 }).Attributes["href"].Value.Replace("&amp;", "&"); ;
                    tmpCateg.Thumb = item.NavigatePath(new[] { 0, 0 }).Attributes["src"].Value;
                    //var released = (item.GetNodeByClass("reg subt") == null ? string.Empty : item.GetNodeByClass("reg subt").FirstChild.GetInnerText());
                    //var score = (item.GetNodeByClass("asinReviewsSummaryNoPopover") == null ? string.Empty : (item.GetNodeByClass("asinReviewsSummaryNoPopover").FindFirstChildElement() == null ? string.Empty : item.GetNodeByClass("asinReviewsSummaryNoPopover").FindFirstChildElement().Attributes["alt"].Value));
                    tmpCateg.Description = tmpCateg.Name; //"Released: " + released + "\r\nReview Score: " + score;
                    results.Add(tmpCateg);
                }

                var nextPageCtrl = doc.GetElementById("pagnNextLink");

                if (nextPageCtrl != null)
                {
                    nextPage = Properties.Resources.AmazonRootUrl + nextPageCtrl.Attributes["href"].Value.Replace("&amp;", "&");
                    if (!string.IsNullOrEmpty(nextPage))
                        results.Add(new NextPageCategory() { ParentCategory = parent, Url = nextPage, SubCategories = new List<Category>() });
                }
            }

            return results;
        }
        /// <summary>
        /// Load all video summary from the specified url
        /// </summary>
        /// <param name="url"></param>
        /// <param name="session"></param>
        /// <returns></returns>
        public static List<VideoInfo> LoadVideosFromUrl(this string url, AmazonBrowserSession session)
        {
            var results = new List<VideoInfo>();
            HtmlDocument doc = null;
            var tmpWeb = session;
            HtmlNode detailNode = null;

            // Attempt the URL up to 5 times as amazon wants us to use the api!
            for (int i = 0; i < 5; i++)
            {
                doc = tmpWeb.Load(url);
                detailNode = doc.GetElementbyId("aiv-main-content");

                if (detailNode == null)
                    Thread.Sleep(400);
                else
                    break;
            }

            if (detailNode == null)
                return results;

            var episodeContainer = doc.GetElementbyId("dv-episode-list");
            if (episodeContainer == null || episodeContainer.FindFirstChildElement() == null)
            {
                // Movie, load this video
                var video = new VideoInfo();

                video.Title = detailNode.SelectSingleNode(".//h1[@id = 'aiv-content-title']").FirstChild.GetInnerTextTrim();
                //doc.DocumentNode.GetNodeByClass("product_image").Attributes["alt"].Value;
                var infoNode = detailNode.GetNodeByClass("dv-info");

                var dvMetaInfo = infoNode.GetNodeByClass("dv-meta-info");
                var altTitle = detailNode.NavigatePath(new[] { 0, 0 }).FirstChild.GetInnerTextTrim();
                video.Description = string.Format("({0}amazon {1})\r\n{2}\r\n{3} {4}",
                    video.Title == altTitle ? "" : altTitle + ", ",
                    doc.GetElementbyId("summaryStars").FindFirstChildElement() == null ? string.Empty : doc.GetElementbyId("summaryStars").FindFirstChildElement().Attributes["title"].Value,
                    infoNode.GetNodeByClass("synopsis").GetInnerTextTrim(),
                    dvMetaInfo.NavigatePath(new[] { 0 }).GetInnerTextTrim(),
                    dvMetaInfo.NavigatePath(new[] { 1 }).GetInnerTextTrim());

                var imageUrlNode = doc.GetElementbyId("dv-dp-left-content").GetNodeByClass("dp-meta-icon-container");
                video.Thumb = imageUrlNode == null ? string.Empty : imageUrlNode.SelectSingleNode(".//img").Attributes["src"].Value;
                video.Airdate = detailNode.GetNodeByClass("release-year").GetInnerTextTrim();
                video.Length = dvMetaInfo.NavigatePath(new[] { 3 }).GetInnerTextTrim();
                video.Other = doc.GetElementbyId("ASIN").Attributes["value"].Value;
                results.Add(video);
            }
            else
            {
                // TV Series, load all videos
                var episodeList = episodeContainer.GetNodesByClass("episode-list-link");
                int layoutType = 0;

                if (episodeList == null)
                {
                    layoutType = 1;
                    episodeList = episodeContainer.GetNodesByClass("episode-list-item-inner");
                }
                if (episodeList == null)
                {
                    layoutType = 2;
                    episodeList = episodeContainer.GetNodesByClass("dv-episode-container");
                }
                if (episodeList == null)
                {
                    Log.Error("Could not load episode list!");
                    return results;
                }
                foreach (var item in episodeList)
                {
                    var extendedProperties = new ExtendedProperties();
                    var video = new VideoInfo { Other = extendedProperties };
                    var titleNode =
                        layoutType == 0 ? item.GetNodeByClass("dv-extender").NavigatePath(new[] { 0, 0 }) :
                            layoutType == 1 ? item.GetNodeByClass("episode-title") :
                                item.GetNodeByClass("dv-el-title");

                    var seen = "";
                    /*if (item.GetNodeByClass("progress-bar") == null)
                        {
                            seen = " (new)";
                        }*/
                    video.Title = Regex.Replace(titleNode.GetInnerTextTrim(), @"^\d+", m => m.Value.PadLeft(2, '0')) + seen;

                    string videoUrl = null;
                    HtmlNode imageUrlNode;
                    if (layoutType == 2)
                    {
                        var synopsis = item.GetNodeByClass("dv-el-synopsis-content");
                        if (synopsis != null)
                        {
                            video.Description = synopsis.FirstChild.NextSibling.GetInnerTextTrim();
                        }
                        // <div class="dv-el-packshot-image" style="background-image: url(http://ecx.images-amazon.com/images/I/....jpg);"></div>
                        imageUrlNode = item.GetNodeByClass("dv-el-packshot-image");
                        if (imageUrlNode != null)
                        {
                            var re = new Regex("\\((.*?)\\)");
                            var htmlAttribute = imageUrlNode.GetAttributeValue("style", null);
                            if (htmlAttribute != null)
                            {
                                var match = re.Match(htmlAttribute);
                                if (match.Groups.Count == 2)
                                    video.ThumbnailImage = match.Groups[1].Value;
                            }
                        }

                        // Certification, can be different classes,i.e. dv-ages_16_and_over
                        var certificationClasses = item.GetNodesByClass("dv-el-badge dv-ages_", true);
                        if (certificationClasses != null)
                        {
                            var certification = certificationClasses.First();
                            extendedProperties.VideoProperties["Certificate"] = certification.GetInnerTextTrim();
                        }

                        // Playback progress
                        extendedProperties.VideoProperties["Progress"] = string.Format("{0:0}%", 0);
                        var progress = item.GetNodesByClass("dv-linear-progress");
                        if (progress != null && progress.Count > 0)
                        {
                            var progressSpan = progress[0].ChildNodes.FirstOrDefault(n => n.Name == "span");
                            if (progressSpan != null)
                            {
                                var width = progressSpan.Attributes["style"].Value;
                                double percent;
                                if (double.TryParse(width.Replace("width:", "").Replace("%", ""), NumberStyles.Any, CultureInfo.InvariantCulture, out percent))
                                {
                                    extendedProperties.VideoProperties["Progress"] = string.Format("{0:0}%", percent);
                                }
                            }
                        }

                        var tagValues = item.GetNodesByClass("dv-el-attr-value");
                        if (tagValues.Count == 3)
                        {
                            video.Airdate = tagValues[2].GetInnerTextTrim();
                            video.Length = tagValues[1].GetInnerTextTrim();
                        }
                        var urlNode = item.GetNodeByClass("dv-playback-container");
                        if (urlNode != null)
                            videoUrl = urlNode.GetAttributeValue("data-asin", null);
                    }
                    else
                    {
                        video.Description = titleNode.NextSibling.GetInnerTextTrim();
                        video.Airdate = item.GetNodeByClass("release-date").GetInnerTextTrim();
                        imageUrlNode = item.GetNodeByClass("episode-list-image");
                        video.Length = item.GetNodeByClass("runtime").GetInnerTextTrim();
                        videoUrl = layoutType == 0 ? titleNode.GetAttribute("href") : item.GetAttribute("href");
                        videoUrl = videoUrl.Substring(videoUrl.IndexOf("/product/") + 9);
                        videoUrl = videoUrl.Substring(0, videoUrl.IndexOf("/"));

                        if (imageUrlNode != null)
                        {
                            video.Thumb = imageUrlNode.Attributes["src"].Value;
                        }
                        else
                        {
                            imageUrlNode = doc.GetElementbyId("dv-dp-left-content").GetNodeByClass("dp-meta-icon-container");
                            video.Thumb = imageUrlNode == null ? string.Empty : imageUrlNode.SelectSingleNode(".//img").Attributes["src"].Value;
                        }
                    }
                    extendedProperties.Other = videoUrl;
                    video.CleanDescriptionAndTitle();
                    results.Add(video);
                }
            }

            return results;
        }