Example #1
0
        private static async Task <int> GetCaptions(GetCaptionsOptions options)
        {
            UserCredential credential = null;

            using (var stream = new FileStream("/home/pknopf/git/qanon-delta/src/QAnon.Delta.Tool/client_secrets.json", FileMode.Open, FileAccess.Read))
            {
                var d = GoogleClientSecrets.Load(stream);
                credential = await GoogleWebAuthorizationBroker.AuthorizeAsync(
                    d.Secrets,
                    // This OAuth 2.0 access scope allows an application to upload files to the
                    // authenticated user's YouTube channel, but doesn't allow other types of access.
                    new[] { YouTubeService.Scope.YoutubeForceSsl },
                    "user",
                    CancellationToken.None
                    );
            }

            var youtubeService = new YouTubeService(new BaseClientService.Initializer()
            {
                HttpClientInitializer = credential,
                ApplicationName       = "test"
            });

            await OutputCaptionsToDirectory(youtubeService, "UCMVTRzCXvIbdK0Y1ZxD-BlA", Path.GetFullPath("./captions/"));

            //await OutputCaptionsToDirectory(youtubeService, "UCQ7VgW7XgJQjDEPnOR-Q0Qw", Path.GetFullPath("./captions/"));
            //await OutputCaptionsToDirectory(youtubeService, "UCm5CkXzGXb-A2XX0nuTctMQ", Path.GetFullPath("./captions"));
            //await OutputCaptionsToDirectory(youtubeService, "UCSio3E7kYvPeHKhfuYZWriA", Path.GetFullPath("./captions/"));
            //await OutputCaptionsToDirectory(youtubeService, "UCpwXjOAwWDuWlmA2gTjjBwg", Path.GetFullPath("./captions/"));
            // await OutputCaptionsToDirectory(youtubeService, "UCQ1h0i1ksKlvPI7zI6t9XoA",
            //     Path.GetFullPath("./captions/DanielLee"));

            // await OutputCaptionsToDirectory(youtubeService, "UC98Zwfvjq12M1oi99Yqd78w",
            //     Path.GetFullPath("./captions/Tracy"));

            // await OutputCaptionsToDirectory(youtubeService, "UCRVpj-n5kyVfDNtcuwN6KkA",
            //     Path.GetFullPath("./captions/BillSmith"));
            //
            // await OutputCaptionsToDirectory(youtubeService, "UC8VYbOH2Z_swlgSSQ-RwaUg",
            //     Path.GetFullPath("./captions/CitizensInvestigativeReport"));
            //
            // await OutputCaptionsToDirectory(youtubeService, "UCAyrKoW31y5UcsRjh2ItvxQ",
            //     Path.GetFullPath("./captions/IPOT"));
            //
            // await OutputCaptionsToDirectory(youtubeService, "UCAHCehFYe02Ihviho8D_ZcQ",
            //     Path.GetFullPath("./captions/TruthandArtTV"));
            //
            await OutputCaptionsToDirectory(youtubeService, "UCB1o7_gbFp2PLsamWxFenBg",
                                            Path.GetFullPath("./captions/X22Report"));

            return(0);
        }
        private static async Task <int> GetCaptions(GetCaptionsOptions options)
        {
            options.Init();
            var indexPath = Path.Combine(options.IndexDirectory, "index.json");

            if (!File.Exists(indexPath))
            {
                Log.Logger.Error("The index.json file doesn't exist. Run \"index\" first.");
                return(1);
            }

            var index = JsonConvert.DeserializeObject <YouTubeDumpChannelIndex>(File.ReadAllText(indexPath));

            var captionDirectory = Path.Combine(options.IndexDirectory, "captions");

            if (!Directory.Exists(captionDirectory))
            {
                Directory.CreateDirectory(captionDirectory);
            }

            foreach (var video in index.Videos)
            {
                Log.Logger.Information("Downloading captions for {videoId}...", video.Id);

                var captionPath = Path.Combine(captionDirectory, $"{video.Id}.json");
                if (File.Exists(captionPath))
                {
                    Log.Logger.Information("Already downloaded, skipping...");
                    continue;
                }

                try
                {
                    var getVideoResponse =
                        GetRequestBody($"https://www.youtube.com/get_video_info?html5=1&video_id={video.Id}");

                    var keys = getVideoResponse.Split("&").Select(x =>
                    {
                        var split = x.Split("=");
                        return(new Tuple <string, string>(split[0], HttpUtility.UrlDecode(split[1])));
                    }).ToDictionary(x => x.Item1, x => x.Item2);

                    var playerResponse = JsonConvert.DeserializeObject <GetVideoPlayerObject>(keys["player_response"]);

                    var caption = playerResponse.Captions?.PlayerCaptionsTracklistRenderer?.CaptionTracks
                                  .FirstOrDefault();

                    if (caption == null)
                    {
                        Log.Logger.Warning("No caption present.");
                        continue;
                    }

                    var captionXml = GetRequestBody(caption.BaseUrl);

                    var xmlDoc = new XmlDocument();
                    xmlDoc.LoadXml(captionXml);

                    var captions = new List <YouTubeDumpCaption>();
                    foreach (XmlElement item in xmlDoc.GetElementsByTagName("text"))
                    {
                        var innerText = item.InnerText;
                        if (!string.IsNullOrEmpty(innerText))
                        {
                            innerText = HttpUtility.HtmlDecode(innerText);
                        }

                        innerText = Regex.Replace(innerText, @"<[^>]*>", "");

                        captions.Add(new YouTubeDumpCaption
                        {
                            Start    = double.Parse(item.GetAttribute("start")),
                            Duration = double.Parse(item.GetAttribute("dur")),
                            Value    = innerText
                        });
                    }

                    File.WriteAllText(captionPath, JsonConvert.SerializeObject(captions, Formatting.Indented));

                    Log.Logger.Information("Saved!");
                }
                catch (Exception ex)
                {
                    Log.Logger.Error(ex, "Couldn't get timed text for {videoId}. " + ex.Message, video.Id);
                }
            }

            return(0);
        }