Example #1
0
        public async Task <Rec[]> GetRecs2(ILogger log, HtmlDocument html, string videoId)
        {
            var jInit = await GetClientObjectFromWatchPage(log, html, videoId, "ytInitialData");

            var resultsSel = "$.contents.twoColumnWatchNextResults.secondaryResults.secondaryResults.results";
            var jResults   = (JArray)jInit.SelectToken(resultsSel) ?? throw new InvalidOperationException($"can't find {resultsSel}");
            var recs       = jResults
                             .OfType <JObject>()
                             .Select(j => j.SelectToken("compactAutoplayRenderer.contents[0].compactVideoRenderer") ?? j.SelectToken("compactVideoRenderer"))
                             .Where(j => j != null)
                             .Select((j, i) => {
                var viewText = (j.SelectToken("viewCountText.simpleText") ?? j.SelectToken("viewCountText.runs[0].text"))?.Value <string>();
                return(new Rec {
                    ToVideoId = j.Value <string>("videoId"),
                    ToVideoTitle = j["title"]?.Value <string>("simpleText") ?? j.SelectToken("title.runs[0].text")?.Value <string>(),
                    ToChannelId = j.Value <string>("channelId") ?? j.SelectToken("longBylineText.runs[0].navigationEndpoint.browseEndpoint.browseId")?.Value <string>(),
                    ToChannelTitle = j.SelectToken("longBylineText.runs[0].text")?.Value <string>(),
                    Rank = i + 1,
                    Source = ScrapeSource.Web,
                    ToViews = ChromeScraper.ParseViews(viewText),
                    ToUploadDate = ChromeScraper.ParseAgo(DateTime.UtcNow, j.SelectToken("publishedTimeText.simpleText")?.Value <string>()),
                    ForYou = ChromeScraper.ParseForYou(viewText)
                });
            }).ToArray();

            return(recs);
        }
Example #2
0
        public Rec[] GetRecs2(HtmlDocument html)
        {
            var scripts = html.QueryElements("script")
                          .SelectMany(s => s.Children.OfType <HtmlText>()).Select(h => h.Content);

            var windowObjects = scripts
                                .SelectMany(t => WindowObjectsRe.Matches(t))
                                .ToDictionary(m => m.Groups["name"].Value, m => m.Groups["json"].Value);

            var initData = windowObjects.TryGet("ytInitialData") ?? throw new InvalidOperationException("can't find ytInitialData data script");

            var jInit      = JObject.Parse(initData);
            var resultsSel = "$.contents.twoColumnWatchNextResults.secondaryResults.secondaryResults.results";
            var jResults   = (JArray)jInit.SelectToken(resultsSel) ?? throw new InvalidOperationException($"can't find {resultsSel}");
            var recs       = jResults
                             .OfType <JObject>()
                             .Select(j => j.SelectToken("compactAutoplayRenderer.contents[0].compactVideoRenderer") ?? j.SelectToken("compactVideoRenderer"))
                             .Where(j => j != null)
                             .Select((j, i) => {
                var viewText = (j.SelectToken("viewCountText.simpleText") ?? j.SelectToken("viewCountText.runs[0].text"))?.Value <string>();
                return(new Rec {
                    ToVideoId = j.Value <string>("videoId"),
                    ToVideoTitle = j["title"]?.Value <string>("simpleText") ?? j.SelectToken("title.runs[0].text")?.Value <string>(),
                    ToChannelId = j.Value <string>("channelId"),
                    ToChannelTitle = j.SelectToken("longBylineText.runs[0].text")?.Value <string>(),
                    Rank = i + 1,
                    Source = ScrapeSource.Web,
                    ToViews = ChromeScraper.ParseViews(viewText),
                    ToUploadDate = ChromeScraper.ParseAgo(DateTime.UtcNow, j.SelectToken("publishedTimeText.simpleText")?.Value <string>()),
                    ForYou = ChromeScraper.ParseForYou(viewText)
                });
            }).ToArray();

            return(recs);
        }