/// <summary> /// Get the categories from the supplied Url /// </summary> /// <param name="url"></param> /// <returns></returns> public static List<Category> LoadAmazonPrimeCategoriesFromUrl(this string url, Category parent, AmazonBrowserSession session) { HtmlDocument doc; var result = new List<Category>(); List<HtmlNode> nodes = null; var variedUserAgent = string.Empty; var tmpWeb = session; //HtmlWeb { UseCookies = true }; // Attempt the URL up to 15 times as amazon wants us to use the api! for (int i = 0; i <= 15; i++) { doc = tmpWeb.Load(url.Replace("{RANDOMNUMBER}", new Random().Next(1000000, 2000000).ToString())); nodes = doc.DocumentNode.GetNodesByClass("collections-element"); if (nodes == null) Thread.Sleep(400); else break; } if (nodes != null) { foreach (var node in nodes) { var tmpCateg = new Category();// { HasSubCategories = false, SubCategoriesDiscovered = true }; var descNode = node.NavigatePath(new[] { 0, 1, 0, 0 }); if (descNode != null) { tmpCateg.Other = "V~" + descNode.Attributes["href"].Value.Replace("&", "&"); tmpCateg.Name = descNode.InnerText.Replace("&", "&"); } else { // In Editor's picks some categories have no description, we use the image name tmpCateg.Other = "V~" + node.NavigatePath(new[] { 0, 0 }).Attributes["href"].Value.Replace("&", "&"); tmpCateg.Name = node.NavigatePath(new[] { 0, 0, 0 }).Attributes["src"].Value; MatchCollection matchName = Regex.Matches(tmpCateg.Name, "^.*/([a-zA-Z_-]+)([^/]*)$", RegexOptions.None); if (matchName.Count > 0) { tmpCateg.Name = matchName[0].Groups[1].Value.Replace("_", " ").ToUpper(); } else { tmpCateg.Name = "(No Description)"; } } // Ugly hack, if not included some pages have a different html layout if (!tmpCateg.Other.ToString().Contains("sort=")) { tmpCateg.Other = tmpCateg.Other.ToString() + "&sort=popularity-rank"; } tmpCateg.Name = StringUtils.PlainTextFromHtml(tmpCateg.Name.Replace("\n", String.Empty).Trim()); tmpCateg.Thumb = node.NavigatePath(new[] { 0, 0, 0 }).Attributes["src"].Value; tmpCateg.ParentCategory = parent; tmpCateg.HasSubCategories = true; tmpCateg.SubCategoriesDiscovered = false; result.Add(tmpCateg); } } return result; }
public AmazonPrimeInformationConnector(SiteUtilBase siteUtil) { _siteUtil = siteUtil; _browserSession = new AmazonBrowserSession(); }
/// <summary> /// Load all video summary from the specified url /// </summary> /// <param name="url"></param> /// <returns></returns> public static List<VideoInfo> LoadVideosFromUrl(this string url, AmazonBrowserSession session) { var results = new List<VideoInfo>(); HtmlDocument doc = null; var tmpWeb = session; HtmlNode detailNode = null; // Attempt the URL up to 15 times as amazon wants us to use the api! for (int i = 0; i <= 15; i++) { doc = tmpWeb.Load(url); detailNode = doc.GetElementbyId("aiv-main-content"); if (detailNode == null) Thread.Sleep(400); else break; } if (detailNode != null) { var episodeContainer = doc.GetElementbyId("dv-episode-list"); if (episodeContainer == null || (episodeContainer != null && episodeContainer.FindFirstChildElement() == null)) { // Movie, load this video var video = new VideoInfo(); video.Title = detailNode.SelectSingleNode(".//h1[@id = 'aiv-content-title']").FirstChild.GetInnerTextTrim(); //doc.DocumentNode.GetNodeByClass("product_image").Attributes["alt"].Value; var infoNode = detailNode.GetNodeByClass("dv-info"); var dvMetaInfo = infoNode.GetNodeByClass("dv-meta-info"); var altTitle = detailNode.NavigatePath(new[] { 0, 0 }).FirstChild.GetInnerTextTrim(); video.Description = string.Format("({0}amazon {1})\r\n{2}\r\n{3} {4}", video.Title == altTitle ? "" : altTitle + ", ", doc.GetElementbyId("summaryStars").FindFirstChildElement() == null ? string.Empty : doc.GetElementbyId("summaryStars").FindFirstChildElement().Attributes["title"].Value, infoNode.GetNodeByClass("synopsis").GetInnerTextTrim(), dvMetaInfo.NavigatePath(new[] { 0 }).GetInnerTextTrim(), dvMetaInfo.NavigatePath(new[] { 1 }).GetInnerTextTrim()); var imageUrlNode = doc.GetElementbyId("dv-dp-left-content").GetNodeByClass("dp-meta-icon-container"); video.Thumb = imageUrlNode == null ? string.Empty : imageUrlNode.SelectSingleNode(".//img").Attributes["src"].Value; video.Airdate = detailNode.GetNodeByClass("release-year").GetInnerTextTrim(); video.Length = dvMetaInfo.NavigatePath(new[] { 3 }).GetInnerTextTrim(); video.Other = doc.GetElementbyId("ASIN").Attributes["value"].Value; results.Add(video); } else { // TV Series, load all videos var episodeList = episodeContainer.GetNodesByClass("episode-list-link"); var usesAltLayout = false; if (episodeList == null) { usesAltLayout = true; episodeList = episodeContainer.GetNodesByClass("episode-list-item-inner"); } foreach (var item in episodeList) { var video = new VideoInfo(); var titleNode = usesAltLayout ? item.GetNodeByClass("dv-extender").NavigatePath(new[] { 0, 0 }) : item.GetNodeByClass("episode-title"); var seen = ""; /*if (item.GetNodeByClass("progress-bar") == null) { seen = " (new)"; }*/ video.Title = Regex.Replace(titleNode.GetInnerTextTrim(), @"^\d+", m => m.Value.PadLeft(2, '0')) + seen; video.Description = titleNode.NextSibling.GetInnerTextTrim(); video.Airdate = item.GetNodeByClass("release-date").GetInnerTextTrim(); var imageUrlNode = item.GetNodeByClass("episode-list-image"); if (imageUrlNode != null) { video.Thumb = imageUrlNode.Attributes["src"].Value; } else { imageUrlNode = doc.GetElementbyId("dv-dp-left-content").GetNodeByClass("dp-meta-icon-container"); video.Thumb = imageUrlNode == null ? string.Empty : imageUrlNode.SelectSingleNode(".//img").Attributes["src"].Value; } var videoUrl = usesAltLayout ? titleNode.GetAttribute("href") : item.GetAttribute("href"); videoUrl = videoUrl.Substring(videoUrl.IndexOf("/product/") + 9); videoUrl = videoUrl.Substring(0, videoUrl.IndexOf("/")); video.Other = videoUrl; video.Length = item.GetNodeByClass("runtime").GetInnerTextTrim(); video.CleanDescriptionAndTitle(); results.Add(video); } } } return results; }
/// <summary> /// Load the prime videos as categories because we can't get the description until we drill in to the video itself /// </summary> /// <param name="url"></param> /// <param name="parent"></param> /// <returns></returns> public static List<Category> LoadAmazonPrimeVideosAsCategoriesFromUrl(this string url, Category parent, AmazonBrowserSession session) { var results = new List<Category>(); if (string.IsNullOrEmpty(url)) return results; var nextPage = string.Empty; HtmlDocument doc = null; var tmpWeb = session; List<HtmlNode> listItems = null; // Attempt the URL up to 10 times as amazon wants us to use the api! for (int i = 0; i <= 10; i++) { doc = tmpWeb.Load(url); listItems = doc.DocumentNode.GetNodesByClass("result-item"); if (listItems == null) { Thread.Sleep(200); } else break; } if (listItems != null) { listItems = listItems.Where(x => x.OriginalName.ToLower() == "li" && x.Id.StartsWith("result_")).ToList(); // These are the movies - parse them into categories foreach (var item in listItems) { var tmpCateg = new Category(); tmpCateg.ParentCategory = parent; tmpCateg.HasSubCategories = false; var link = item.GetNodeByClass("s-access-detail-page"); tmpCateg.Name = link.GetAttribute("title"); tmpCateg.Name = StringUtils.PlainTextFromHtml(tmpCateg.Name.Replace("\n", String.Empty).Trim()); tmpCateg.Other = link.GetAttribute("href"); ; tmpCateg.Thumb = item.GetNodeByClass("s-access-image").GetAttribute("src"); var released = link.ParentNode.NavigatePath(new int[] { 3 }).GetInnerText(); var score = item.GetNodeByClass("a-icon-star") == null ? String.Empty : item.GetNodeByClass("a-icon-star").FirstChild.GetInnerText(); tmpCateg.Description = StringUtils.PlainTextFromHtml("Released: " + released + "\r\nReview Score: " + score); results.Add(tmpCateg); } var nextPageCtrl = doc.GetElementById("pagnNextLink"); if (nextPageCtrl != null) { nextPage = Properties.Resources.AmazonRootUrl + nextPageCtrl.Attributes["href"].Value.Replace("&", "&"); if (!string.IsNullOrEmpty(nextPage)) results.Add(new NextPageCategory() { ParentCategory = parent, Url = nextPage, SubCategories = new List<Category>() }); } } return results; }
public static List<SearchResultItem> LoadAmazonPrimeSearchAsCategoriesFromUrl(this string url, string query, AmazonBrowserSession session) { url = url.Replace("{QUERY}", Uri.EscapeDataString(query)); return url.LoadAmazonPrimeVideosAsCategoriesFromUrl(null, session).Cast<SearchResultItem>().ToList(); }
/// <summary> /// Load the prime videos as categories because we can't get the description until we drill in to the video itself /// </summary> /// <param name="url"></param> /// <param name="parent"></param> /// <returns></returns> public static List<Category> LoadAmazonPrimeWatchlistAsCategoriesFromUrl(this string url, Category parent, AmazonBrowserSession session) { var results = new List<Category>(); var nextPage = string.Empty; HtmlDocument doc = null; var tmpWeb = session; List<HtmlNode> listItems = null; // Attempt the URL up to 10 times as amazon wants us to use the api! for (int i = 0; i <= 10; i++) { doc = tmpWeb.Load(url); listItems = doc.DocumentNode.GetNodesByClass("dv-packshot"); if (listItems == null) { Thread.Sleep(200); } else break; } if (listItems != null) { foreach (var item in listItems) { var tmpCateg = new Category(); tmpCateg.ParentCategory = parent; tmpCateg.HasSubCategories = false; tmpCateg.Name = item.GetNodeByClass("dv-core-title").InnerText; tmpCateg.Name = StringUtils.PlainTextFromHtml(tmpCateg.Name.Replace("\n", String.Empty).Trim()); tmpCateg.Other = item.NavigatePath(new[] { 0 }).Attributes["href"].Value.Replace("&", "&"); ; tmpCateg.Thumb = item.NavigatePath(new[] { 0, 0 }).Attributes["src"].Value; //var released = (item.GetNodeByClass("reg subt") == null ? string.Empty : item.GetNodeByClass("reg subt").FirstChild.GetInnerText()); //var score = (item.GetNodeByClass("asinReviewsSummaryNoPopover") == null ? string.Empty : (item.GetNodeByClass("asinReviewsSummaryNoPopover").FindFirstChildElement() == null ? string.Empty : item.GetNodeByClass("asinReviewsSummaryNoPopover").FindFirstChildElement().Attributes["alt"].Value)); tmpCateg.Description = tmpCateg.Name; //"Released: " + released + "\r\nReview Score: " + score; results.Add(tmpCateg); } var nextPageCtrl = doc.GetElementById("pagnNextLink"); if (nextPageCtrl != null) { nextPage = Properties.Resources.AmazonRootUrl + nextPageCtrl.Attributes["href"].Value.Replace("&", "&"); if (!string.IsNullOrEmpty(nextPage)) results.Add(new NextPageCategory() { ParentCategory = parent, Url = nextPage, SubCategories = new List<Category>() }); } } return results; }
/// <summary> /// Load all video summary from the specified url /// </summary> /// <param name="url"></param> /// <param name="session"></param> /// <returns></returns> public static List<VideoInfo> LoadVideosFromUrl(this string url, AmazonBrowserSession session) { var results = new List<VideoInfo>(); HtmlDocument doc = null; var tmpWeb = session; HtmlNode detailNode = null; // Attempt the URL up to 5 times as amazon wants us to use the api! for (int i = 0; i < 5; i++) { doc = tmpWeb.Load(url); detailNode = doc.GetElementbyId("aiv-main-content"); if (detailNode == null) Thread.Sleep(400); else break; } if (detailNode == null) return results; var episodeContainer = doc.GetElementbyId("dv-episode-list"); if (episodeContainer == null || episodeContainer.FindFirstChildElement() == null) { // Movie, load this video var video = new VideoInfo(); video.Title = detailNode.SelectSingleNode(".//h1[@id = 'aiv-content-title']").FirstChild.GetInnerTextTrim(); //doc.DocumentNode.GetNodeByClass("product_image").Attributes["alt"].Value; var infoNode = detailNode.GetNodeByClass("dv-info"); var dvMetaInfo = infoNode.GetNodeByClass("dv-meta-info"); var altTitle = detailNode.NavigatePath(new[] { 0, 0 }).FirstChild.GetInnerTextTrim(); video.Description = string.Format("({0}amazon {1})\r\n{2}\r\n{3} {4}", video.Title == altTitle ? "" : altTitle + ", ", doc.GetElementbyId("summaryStars").FindFirstChildElement() == null ? string.Empty : doc.GetElementbyId("summaryStars").FindFirstChildElement().Attributes["title"].Value, infoNode.GetNodeByClass("synopsis").GetInnerTextTrim(), dvMetaInfo.NavigatePath(new[] { 0 }).GetInnerTextTrim(), dvMetaInfo.NavigatePath(new[] { 1 }).GetInnerTextTrim()); var imageUrlNode = doc.GetElementbyId("dv-dp-left-content").GetNodeByClass("dp-meta-icon-container"); video.Thumb = imageUrlNode == null ? string.Empty : imageUrlNode.SelectSingleNode(".//img").Attributes["src"].Value; video.Airdate = detailNode.GetNodeByClass("release-year").GetInnerTextTrim(); video.Length = dvMetaInfo.NavigatePath(new[] { 3 }).GetInnerTextTrim(); video.Other = doc.GetElementbyId("ASIN").Attributes["value"].Value; results.Add(video); } else { // TV Series, load all videos var episodeList = episodeContainer.GetNodesByClass("episode-list-link"); int layoutType = 0; if (episodeList == null) { layoutType = 1; episodeList = episodeContainer.GetNodesByClass("episode-list-item-inner"); } if (episodeList == null) { layoutType = 2; episodeList = episodeContainer.GetNodesByClass("dv-episode-container"); } if (episodeList == null) { Log.Error("Could not load episode list!"); return results; } foreach (var item in episodeList) { var extendedProperties = new ExtendedProperties(); var video = new VideoInfo { Other = extendedProperties }; var titleNode = layoutType == 0 ? item.GetNodeByClass("dv-extender").NavigatePath(new[] { 0, 0 }) : layoutType == 1 ? item.GetNodeByClass("episode-title") : item.GetNodeByClass("dv-el-title"); var seen = ""; /*if (item.GetNodeByClass("progress-bar") == null) { seen = " (new)"; }*/ video.Title = Regex.Replace(titleNode.GetInnerTextTrim(), @"^\d+", m => m.Value.PadLeft(2, '0')) + seen; string videoUrl = null; HtmlNode imageUrlNode; if (layoutType == 2) { var synopsis = item.GetNodeByClass("dv-el-synopsis-content"); if (synopsis != null) { video.Description = synopsis.FirstChild.NextSibling.GetInnerTextTrim(); } // <div class="dv-el-packshot-image" style="background-image: url(http://ecx.images-amazon.com/images/I/....jpg);"></div> imageUrlNode = item.GetNodeByClass("dv-el-packshot-image"); if (imageUrlNode != null) { var re = new Regex("\\((.*?)\\)"); var htmlAttribute = imageUrlNode.GetAttributeValue("style", null); if (htmlAttribute != null) { var match = re.Match(htmlAttribute); if (match.Groups.Count == 2) video.ThumbnailImage = match.Groups[1].Value; } } // Certification, can be different classes,i.e. dv-ages_16_and_over var certificationClasses = item.GetNodesByClass("dv-el-badge dv-ages_", true); if (certificationClasses != null) { var certification = certificationClasses.First(); extendedProperties.VideoProperties["Certificate"] = certification.GetInnerTextTrim(); } // Playback progress extendedProperties.VideoProperties["Progress"] = string.Format("{0:0}%", 0); var progress = item.GetNodesByClass("dv-linear-progress"); if (progress != null && progress.Count > 0) { var progressSpan = progress[0].ChildNodes.FirstOrDefault(n => n.Name == "span"); if (progressSpan != null) { var width = progressSpan.Attributes["style"].Value; double percent; if (double.TryParse(width.Replace("width:", "").Replace("%", ""), NumberStyles.Any, CultureInfo.InvariantCulture, out percent)) { extendedProperties.VideoProperties["Progress"] = string.Format("{0:0}%", percent); } } } var tagValues = item.GetNodesByClass("dv-el-attr-value"); if (tagValues.Count == 3) { video.Airdate = tagValues[2].GetInnerTextTrim(); video.Length = tagValues[1].GetInnerTextTrim(); } var urlNode = item.GetNodeByClass("dv-playback-container"); if (urlNode != null) videoUrl = urlNode.GetAttributeValue("data-asin", null); } else { video.Description = titleNode.NextSibling.GetInnerTextTrim(); video.Airdate = item.GetNodeByClass("release-date").GetInnerTextTrim(); imageUrlNode = item.GetNodeByClass("episode-list-image"); video.Length = item.GetNodeByClass("runtime").GetInnerTextTrim(); videoUrl = layoutType == 0 ? titleNode.GetAttribute("href") : item.GetAttribute("href"); videoUrl = videoUrl.Substring(videoUrl.IndexOf("/product/") + 9); videoUrl = videoUrl.Substring(0, videoUrl.IndexOf("/")); if (imageUrlNode != null) { video.Thumb = imageUrlNode.Attributes["src"].Value; } else { imageUrlNode = doc.GetElementbyId("dv-dp-left-content").GetNodeByClass("dp-meta-icon-container"); video.Thumb = imageUrlNode == null ? string.Empty : imageUrlNode.SelectSingleNode(".//img").Attributes["src"].Value; } } extendedProperties.Other = videoUrl; video.CleanDescriptionAndTitle(); results.Add(video); } } return results; }