public RecsAndExtra(VideoExtraStored2 extra, Rec[] recs) { Extra = extra; Recs = recs; }
//ytInitialPlayerResponse.responseContext.serviceTrackingParams.filter(p => p.service == "CSI")[0].params public async Task <RecsAndExtra> GetRecsAndExtra(string videoId, ILogger log) { var watchPage = await GetVideoWatchPageHtmlAsync(videoId, log); var(html, raw, url) = watchPage; var infoDic = await GetVideoInfoDicAsync(videoId, log); var videoItem = GetVideo(videoId, infoDic, watchPage); var extra = new VideoExtraStored2 { VideoId = videoId, Updated = DateTime.UtcNow, ChannelId = videoItem?.ChannelId, ChannelTitle = videoItem?.ChannelTitle, Description = videoItem?.Description, Duration = videoItem?.Duration, Keywords = videoItem?.Keywords, Title = videoItem?.Title, UploadDate = videoItem?.UploadDate, AddedDate = videoItem?.AddedDate, Statistics = videoItem?.Statistics, Source = ScrapeSource.Web }; var ytInitPr = GetClientObjectFromWatchPage(html, "ytInitialPlayerResponse"); if (ytInitPr != null && ytInitPr.Value <string>("status") != "OK") { var playerError = ytInitPr.SelectToken("playabilityStatus.errorScreen.playerErrorMessageRenderer"); extra.Error = playerError?.SelectToken("reason.simpleText")?.Value <string>(); extra.SubError = (playerError?.SelectToken("subreason.simpleText") ?? playerError?.SelectToken("subreason.runs[0].text")) ?.Value <string>(); } if (extra.Error == null) { var restrictedMode = html.QueryElements("head > meta[property=\"og:restrictions:age\"]").FirstOrDefault()?.GetAttribute("content")?.Value == "18+"; if (restrictedMode) { extra.Error = RestrictedVideoError; extra.SubError = "Unable to find recommended video because it is age restricted and requires to log in"; } } if (extra.Error == null) { extra.SubError = html.QueryElements("#unavailable-submessage").FirstOrDefault()?.GetInnerText(); if (extra.SubError == "") { extra.SubError = null; } if (extra.SubError.HasValue()) // all pages have the error, but not a sub-error { extra.Error = html.QueryElements("#unavailable-message").FirstOrDefault()?.GetInnerText(); } } if (extra.Error != null) { return(new RecsAndExtra(extra, new Rec[] { })); } var(recs, recEx) = Def.New(() => GetRecs2(html)).Try(); if (recs?.Any() != true || recEx != null) { var uri = new Uri(url); var path = StringPath.Relative(DateTime.UtcNow.ToString("yyyy-MM-dd"), $"{uri.PathAndQuery}.html"); var logUrl = LogStore.Url(path); await LogStore.Save(path, raw.AsStream(), log); log.Warning("WebScraper - Unable to find recs at ({Url}). error: {Error}", logUrl, recEx?.ToString()); } return(new RecsAndExtra(extra, recs)); }
//ytInitialPlayerResponse.responseContext.serviceTrackingParams.filter(p => p.service == "CSI")[0].params public async Task <RecsAndExtra> GetRecsAndExtra(ILogger log, string videoId, string channelId = null, string channelTitle = null) { log = log.ForContext("VideoId", videoId); var watchPage = await GetVideoWatchPageHtmlAsync(videoId, log); var(html, raw, url) = watchPage; var infoDic = await GetVideoInfoDicAsync(videoId, log); var videoItem = GetVideo(videoId, infoDic, watchPage); var extra = new VideoExtraStored2 { VideoId = videoId, Updated = DateTime.UtcNow, // some videos are listed under a channels playlist, but when you click on the vidoe, its channel is under enother (e.g. _iYT8eg1F8s) // Record them as the channelId of the playlist. ChannelId = channelId ?? videoItem?.ChannelId, ChannelTitle = channelTitle ?? videoItem?.ChannelTitle, Description = videoItem?.Description, Duration = videoItem?.Duration, Keywords = videoItem?.Keywords, Title = videoItem?.Title, UploadDate = videoItem?.UploadDate, AddedDate = videoItem?.AddedDate, Statistics = videoItem?.Statistics, Source = ScrapeSource.Web }; var ytInitPr = await GetClientObjectFromWatchPage(log, html, videoId, "ytInitialPlayerResponse"); if (ytInitPr != null && ytInitPr.Value <string>("status") != "OK") { var playerError = ytInitPr.SelectToken("playabilityStatus.errorScreen.playerErrorMessageRenderer"); extra.Error = playerError?.SelectToken("reason.simpleText")?.Value <string>(); extra.SubError = (playerError?.SelectToken("subreason.simpleText") ?? playerError?.SelectTokens("subreason.runs[*].text").Join("")) ?.Value <string>(); } if (extra.Error == null) { var restrictedMode = html.QueryElements("head > meta[property=\"og:restrictions:age\"]").FirstOrDefault()?.GetAttribute("content")?.Value == "18+"; if (restrictedMode) { extra.Error = RestrictedVideoError; extra.SubError = "Unable to find recommended video because it is age restricted and requires to log in"; } } if (extra.Error == null) { extra.SubError = html.QueryElements("#unavailable-submessage").FirstOrDefault()?.GetInnerText(); if (extra.SubError == "") { extra.SubError = null; } if (extra.SubError.HasValue()) // all pages have the error, but not a sub-error { extra.Error = html.QueryElements("#unavailable-message").FirstOrDefault()?.GetInnerText(); } } if (extra.Error == null) { var ytInitialData = await GetClientObjectFromWatchPage(log, html, videoId, "ytInitialData"); var badgeLabels = ytInitialData.SelectTokens( "contents.twoColumnWatchNextResults.results.results.contents[*].videoPrimaryInfoRenderer.badges[*].metadataBadgeRenderer.label"); if (badgeLabels.Any(b => b.Value <string>() == "Unlisted")) { extra.Error = "Unlisted"; } } if (extra.Error != null) { return(new RecsAndExtra(extra, new Rec[] { })); } var recs = await GetRecs2(log, html, videoId); return(new RecsAndExtra(extra, recs)); }
//ytInitialPlayerResponse.responseContext.serviceTrackingParams.filter(p => p.service == "CSI")[0].params public async Task <RecsAndExtra> GetRecsAndExtra(string videoId, ILogger log) { var watchPage = await GetVideoWatchPageHtmlAsync(videoId, log); var(html, raw, url) = watchPage; var infoDic = await GetVideoInfoDicAsync(videoId, log); var videoItem = GetVideo(videoId, infoDic, watchPage); var extra = new VideoExtraStored2 { VideoId = videoId, Updated = DateTime.UtcNow, ChannelId = videoItem.ChannelId, ChannelTitle = videoItem.ChannelTitle, Description = videoItem.Description, Duration = videoItem.Duration, Keywords = videoItem.Keywords, Title = videoItem.Title, UploadDate = videoItem.UploadDate.UtcDateTime, Statistics = videoItem.Statistics, Source = ScrapeSource.Web, Thumbnail = VideoThumbnail.FromVideoId(videoId) }; var restrictedMode = html.QueryElements("head > meta[property=\"og:restrictions:age\"]").FirstOrDefault()?.GetAttribute("content")?.Value == "18+"; if (restrictedMode) { extra.Error = RestrictedVideoError; extra.SubError = "Unable to find recommended video because it is age restricted and requires to log in"; } else { extra.SubError = html.QueryElements("#unavailable-submessage").FirstOrDefault()?.GetInnerText(); if (extra.SubError == "") { extra.SubError = null; } if (extra.SubError.HasValue()) // all pages have the error, but not a sub-error { extra.Error = html.QueryElements("#unavailable-message").FirstOrDefault()?.GetInnerText(); } } if (extra.Error != null) { return(new RecsAndExtra(extra, new Rec[] { })); } var(recs, recEx) = Def.New(() => GetRecs2(html)).Try(); if (recs?.Any() != true || recEx != null) { var uri = new Uri(url); var path = StringPath.Relative(DateTime.UtcNow.ToString("yyyy-MM-dd"), $"{uri.PathAndQuery}.html"); var logUrl = LogStore.Url(path); await LogStore.Save(path, raw.AsStream(), log); log.Warning("WebScraper - Unable to find recs at ({Url}). error: {Error}", logUrl, recEx?.ToString()); } var match = _ytAdRegex.Match(raw); extra.HasAd = match.Success && match.Groups[1].Value == "1"; return(new RecsAndExtra(extra, recs)); }