private static void WriteVideosJson(MsdnShow show, List <MsdnVideo> videos) { string fileName = $"{outputFolder}\\{RemoveBadNameChars(show.DisplayName)}.json"; fileName = ConvertStringToPureAscii(fileName).Trim(); var settings = new JsonSerializerSettings { Formatting = Formatting.Indented, }; File.WriteAllText(fileName, JsonConvert.SerializeObject(videos, settings)); }
private static List <MsdnVideo> GetAllVideosForShow(MsdnShow show) { var results = new List <MsdnVideo>(); int maxPage; results.AddRange(GetVideosForShow(show, 1, out maxPage)); if (maxPage == 1) { return(results); } for (int count = 2; count <= maxPage; count++) { int notUsed; results.AddRange(GetVideosForShow(show, count, out notUsed)); } return(results); }
private static List <MsdnVideo> GetVideosForShow(MsdnShow show, int pageNumber, out int maxPage) { List <MsdnVideo> results = new List <MsdnVideo>(); string pageLink = $"{siteRoot}{show.Href}?page={pageNumber}"; string sectionPage = client.DownloadString(pageLink); HtmlParser parser = new HtmlParser(); IHtmlDocument document = parser.ParseDocument(sectionPage); maxPage = GetMaxPageNumber(document); var videoEntrys = document.All.Where(el => el.LocalName == "article" && el.ClassList.Contains("abstract") && el.ClassList.Contains("small")); foreach (var entry in videoEntrys) { MsdnVideo newVideo = new MsdnVideo(); newVideo.DataApi = entry.Attributes["data-api"].Value; newVideo.VideoId = new Guid(newVideo.DataApi.Replace("/Entries(guid'", "").Replace("')/", "")); var testDiv = entry.FirstElementChild.FirstElementChild; if (testDiv.ClassList.Contains("liveFuture")) { // Video is a future live stream and does not yet have a video page, so we skip it continue; } newVideo.VideoPageLink = entry.FirstElementChild.FirstElementChild.FirstElementChild.Attributes["href"].Value; string videoPageLink = $"{siteRoot}{newVideo.VideoPageLink}"; string videoPage = client.DownloadString(videoPageLink); IHtmlDocument videoPageDocument = parser.ParseDocument(videoPage); string descriptionTemp = ""; var metaDescription = videoPageDocument.All.FirstOrDefault(el => el.LocalName == "meta" && el.HasAttribute("name") && el.Attributes["name"].Value == "description" ); if (metaDescription != null) { descriptionTemp = metaDescription.Attributes["content"].Value; } string authorTemp = ""; var authorsDiv = videoPageDocument.All.FirstOrDefault(el => el.LocalName == "div" && el.ClassList.Contains("authors") ); if (authorsDiv != null) { var authorName = authorsDiv.FirstElementChild.TextContent; if (!string.IsNullOrEmpty("authorName")) { authorTemp = authorName; } } var jsonDescription = videoPageDocument.All.FirstOrDefault( el => el.LocalName == "script" && el.HasAttribute("type") && el.Attributes["type"].Value == "application/ld+json" ); if (jsonDescription != null) { var jsonContent = jsonDescription.TextContent; var videoDescription = JsonConvert.DeserializeObject <VideoDescription>(jsonContent); newVideo.VideoPageThumb = videoDescription.ThumbnailUrl; newVideo.ShowName = show.DisplayName; newVideo.VideoLength = TimeSpan.Parse(videoDescription.Duration.Replace("PT", "").Replace("H", ":").Replace("M", ":").Replace("S", "")); newVideo.VideoTitle = videoDescription.Name; newVideo.Author = string.IsNullOrEmpty(authorTemp) ? "UNKNOWN" : authorTemp; newVideo.UtcDateTimePublished = DateTime.Parse(videoDescription.UploadDate).ToUniversalTime(); newVideo.ActualVideoFileLink = videoDescription.ContentUrl; newVideo.Description = string.IsNullOrEmpty(descriptionTemp) ? videoDescription.Description : descriptionTemp; // To reduce the likely hood of getting JSON with no usable details entry's in the list, we ONLY save // the entry IF we actually got a json description tag results.Add(newVideo); } } return(results); }