Пример #1
0
 public RecsAndExtra(VideoExtraStored2 extra, Rec[] recs)
 {
     Extra = extra;
     Recs  = recs;
 }
Пример #2
0
        //ytInitialPlayerResponse.responseContext.serviceTrackingParams.filter(p => p.service == "CSI")[0].params
        public async Task <RecsAndExtra> GetRecsAndExtra(string videoId, ILogger log)
        {
            var watchPage = await GetVideoWatchPageHtmlAsync(videoId, log);

            var(html, raw, url) = watchPage;
            var infoDic = await GetVideoInfoDicAsync(videoId, log);

            var videoItem = GetVideo(videoId, infoDic, watchPage);

            var extra = new VideoExtraStored2 {
                VideoId      = videoId,
                Updated      = DateTime.UtcNow,
                ChannelId    = videoItem?.ChannelId,
                ChannelTitle = videoItem?.ChannelTitle,
                Description  = videoItem?.Description,
                Duration     = videoItem?.Duration,
                Keywords     = videoItem?.Keywords,
                Title        = videoItem?.Title,
                UploadDate   = videoItem?.UploadDate,
                AddedDate    = videoItem?.AddedDate,
                Statistics   = videoItem?.Statistics,
                Source       = ScrapeSource.Web
            };

            var ytInitPr = GetClientObjectFromWatchPage(html, "ytInitialPlayerResponse");

            if (ytInitPr != null && ytInitPr.Value <string>("status") != "OK")
            {
                var playerError = ytInitPr.SelectToken("playabilityStatus.errorScreen.playerErrorMessageRenderer");
                extra.Error    = playerError?.SelectToken("reason.simpleText")?.Value <string>();
                extra.SubError = (playerError?.SelectToken("subreason.simpleText") ??
                                  playerError?.SelectToken("subreason.runs[0].text"))
                                 ?.Value <string>();
            }
            if (extra.Error == null)
            {
                var restrictedMode = html.QueryElements("head > meta[property=\"og:restrictions:age\"]").FirstOrDefault()?.GetAttribute("content")?.Value == "18+";
                if (restrictedMode)
                {
                    extra.Error    = RestrictedVideoError;
                    extra.SubError = "Unable to find recommended video because it is age restricted and requires to log in";
                }
            }
            if (extra.Error == null)
            {
                extra.SubError = html.QueryElements("#unavailable-submessage").FirstOrDefault()?.GetInnerText();
                if (extra.SubError == "")
                {
                    extra.SubError = null;
                }
                if (extra.SubError.HasValue()) // all pages have the error, but not a sub-error
                {
                    extra.Error = html.QueryElements("#unavailable-message").FirstOrDefault()?.GetInnerText();
                }
            }
            if (extra.Error != null)
            {
                return(new RecsAndExtra(extra, new Rec[] { }));
            }

            var(recs, recEx) = Def.New(() => GetRecs2(html)).Try();
            if (recs?.Any() != true || recEx != null)
            {
                var uri    = new Uri(url);
                var path   = StringPath.Relative(DateTime.UtcNow.ToString("yyyy-MM-dd"), $"{uri.PathAndQuery}.html");
                var logUrl = LogStore.Url(path);
                await LogStore.Save(path, raw.AsStream(), log);

                log.Warning("WebScraper - Unable to find recs at ({Url}). error: {Error}", logUrl, recEx?.ToString());
            }

            return(new RecsAndExtra(extra, recs));
        }
Пример #3
0
        //ytInitialPlayerResponse.responseContext.serviceTrackingParams.filter(p => p.service == "CSI")[0].params
        public async Task <RecsAndExtra> GetRecsAndExtra(ILogger log, string videoId, string channelId = null, string channelTitle = null)
        {
            log = log.ForContext("VideoId", videoId);
            var watchPage = await GetVideoWatchPageHtmlAsync(videoId, log);

            var(html, raw, url) = watchPage;
            var infoDic = await GetVideoInfoDicAsync(videoId, log);

            var videoItem = GetVideo(videoId, infoDic, watchPage);

            var extra = new VideoExtraStored2 {
                VideoId = videoId,
                Updated = DateTime.UtcNow,
                // some videos are listed under a channels playlist, but when you click on the vidoe, its channel is under enother (e.g. _iYT8eg1F8s)
                // Record them as the channelId of the playlist.
                ChannelId    = channelId ?? videoItem?.ChannelId,
                ChannelTitle = channelTitle ?? videoItem?.ChannelTitle,
                Description  = videoItem?.Description,
                Duration     = videoItem?.Duration,
                Keywords     = videoItem?.Keywords,
                Title        = videoItem?.Title,
                UploadDate   = videoItem?.UploadDate,
                AddedDate    = videoItem?.AddedDate,
                Statistics   = videoItem?.Statistics,
                Source       = ScrapeSource.Web
            };

            var ytInitPr = await GetClientObjectFromWatchPage(log, html, videoId, "ytInitialPlayerResponse");

            if (ytInitPr != null && ytInitPr.Value <string>("status") != "OK")
            {
                var playerError = ytInitPr.SelectToken("playabilityStatus.errorScreen.playerErrorMessageRenderer");
                extra.Error    = playerError?.SelectToken("reason.simpleText")?.Value <string>();
                extra.SubError = (playerError?.SelectToken("subreason.simpleText") ??
                                  playerError?.SelectTokens("subreason.runs[*].text").Join(""))
                                 ?.Value <string>();
            }
            if (extra.Error == null)
            {
                var restrictedMode = html.QueryElements("head > meta[property=\"og:restrictions:age\"]").FirstOrDefault()?.GetAttribute("content")?.Value == "18+";
                if (restrictedMode)
                {
                    extra.Error    = RestrictedVideoError;
                    extra.SubError = "Unable to find recommended video because it is age restricted and requires to log in";
                }
            }
            if (extra.Error == null)
            {
                extra.SubError = html.QueryElements("#unavailable-submessage").FirstOrDefault()?.GetInnerText();
                if (extra.SubError == "")
                {
                    extra.SubError = null;
                }
                if (extra.SubError.HasValue()) // all pages have the error, but not a sub-error
                {
                    extra.Error = html.QueryElements("#unavailable-message").FirstOrDefault()?.GetInnerText();
                }
            }
            if (extra.Error == null)
            {
                var ytInitialData = await GetClientObjectFromWatchPage(log, html, videoId, "ytInitialData");

                var badgeLabels =
                    ytInitialData.SelectTokens(
                        "contents.twoColumnWatchNextResults.results.results.contents[*].videoPrimaryInfoRenderer.badges[*].metadataBadgeRenderer.label");
                if (badgeLabels.Any(b => b.Value <string>() == "Unlisted"))
                {
                    extra.Error = "Unlisted";
                }
            }
            if (extra.Error != null)
            {
                return(new RecsAndExtra(extra, new Rec[] { }));
            }

            var recs = await GetRecs2(log, html, videoId);

            return(new RecsAndExtra(extra, recs));
        }
Пример #4
0
        //ytInitialPlayerResponse.responseContext.serviceTrackingParams.filter(p => p.service == "CSI")[0].params
        public async Task <RecsAndExtra> GetRecsAndExtra(string videoId, ILogger log)
        {
            var watchPage = await GetVideoWatchPageHtmlAsync(videoId, log);

            var(html, raw, url) = watchPage;
            var infoDic = await GetVideoInfoDicAsync(videoId, log);

            var videoItem = GetVideo(videoId, infoDic, watchPage);

            var extra = new VideoExtraStored2 {
                VideoId      = videoId,
                Updated      = DateTime.UtcNow,
                ChannelId    = videoItem.ChannelId,
                ChannelTitle = videoItem.ChannelTitle,
                Description  = videoItem.Description,
                Duration     = videoItem.Duration,
                Keywords     = videoItem.Keywords,
                Title        = videoItem.Title,
                UploadDate   = videoItem.UploadDate.UtcDateTime,
                Statistics   = videoItem.Statistics,
                Source       = ScrapeSource.Web,
                Thumbnail    = VideoThumbnail.FromVideoId(videoId)
            };

            var restrictedMode = html.QueryElements("head > meta[property=\"og:restrictions:age\"]").FirstOrDefault()?.GetAttribute("content")?.Value == "18+";

            if (restrictedMode)
            {
                extra.Error    = RestrictedVideoError;
                extra.SubError = "Unable to find recommended video because it is age restricted and requires to log in";
            }
            else
            {
                extra.SubError = html.QueryElements("#unavailable-submessage").FirstOrDefault()?.GetInnerText();
                if (extra.SubError == "")
                {
                    extra.SubError = null;
                }
                if (extra.SubError.HasValue()) // all pages have the error, but not a sub-error
                {
                    extra.Error = html.QueryElements("#unavailable-message").FirstOrDefault()?.GetInnerText();
                }
            }
            if (extra.Error != null)
            {
                return(new RecsAndExtra(extra, new Rec[] { }));
            }


            var(recs, recEx) = Def.New(() => GetRecs2(html)).Try();
            if (recs?.Any() != true || recEx != null)
            {
                var uri    = new Uri(url);
                var path   = StringPath.Relative(DateTime.UtcNow.ToString("yyyy-MM-dd"), $"{uri.PathAndQuery}.html");
                var logUrl = LogStore.Url(path);
                await LogStore.Save(path, raw.AsStream(), log);

                log.Warning("WebScraper - Unable to find recs at ({Url}). error: {Error}", logUrl, recEx?.ToString());
            }

            var match = _ytAdRegex.Match(raw);

            extra.HasAd = match.Success && match.Groups[1].Value == "1";

            return(new RecsAndExtra(extra, recs));
        }