Пример #1
0
        private async Task <string> GenrateSubTitleAsync(string id, string Path, string Name)
        {
            try
            {
                string FullPath = $@"{Path}\\{ Name.ValidNameForWindows()}.srt";

                var trackInfos = await client.GetVideoClosedCaptionTrackInfosAsync(id);

                var trackInfo = trackInfos.First(t => t.Language.Code == "en");
                var track     = await client.GetClosedCaptionTrackAsync(trackInfo);

                using StreamWriter file =
                          new StreamWriter(FullPath);
                int line = 1;
                foreach (var item in track.Captions)
                {
                    string   from    = $"{item.Offset.Hours.ToString("00")}:{item.Offset.Minutes.ToString("00")}:{item.Offset.Seconds.ToString("00")},{item.Offset.Milliseconds.ToString("000")}";
                    TimeSpan ToSpaon = item.Offset.Add(item.Duration);
                    string   to      = $"{ToSpaon.Hours.ToString("00")}:{ToSpaon.Minutes.ToString("00")}:{ToSpaon.Seconds.ToString("00")},{ToSpaon.Milliseconds.ToString("000")}";

                    file.WriteLine(line);
                    file.WriteLine($"{from} --> {to}");
                    file.WriteLine(item.Text);
                    file.WriteLine();
                    line++;
                }
                return($"subtitle Write success to file : {FullPath}");
            }
            catch (Exception ex)
            {
                throw ex;
            }
        }
Пример #2
0
        public async Task <ClosedCaptionTrack> Subtitles(string video, int n)
        {
            var client = new YoutubeClient();
            var info   = await client.GetVideoClosedCaptionTrackInfosAsync(video);

            return(await client.GetClosedCaptionTrackAsync(info[n]));
        }
Пример #3
0
        public void RunYoutubeDetailPipeline(string urlField, string detailField, string videoField, string streamField, string captionField)
        {
            var     it       = this.Data;
            JObject obj      = JObject.FromObject(it);
            var     urlVideo = obj[urlField].ToString();

            var client = new YoutubeClient();
            var id     = YoutubeClient.ParseVideoId(urlVideo); // "bnsUkE8i0tU"

            obj[detailField] = new JObject();
            if (!string.IsNullOrEmpty(videoField))
            {
                var video = client.GetVideoAsync(id).Result;
                var json1 = JsonConvert.SerializeObject(video, new StringEnumConverter());
                obj[detailField][videoField] = JObject.Parse(json1);
            }

            if (!string.IsNullOrEmpty(streamField))
            {
                var streamInfoSet = client.GetVideoMediaStreamInfosAsync(id).Result;
                var json2         = JsonConvert.SerializeObject(streamInfoSet, new StringEnumConverter());
                obj[detailField][streamField] = JObject.Parse(json2);
            }

            if (!string.IsNullOrEmpty(captionField))
            {
                var caption = client.GetVideoClosedCaptionTrackInfosAsync(id).Result;
                var json3   = JsonConvert.SerializeObject(caption, new StringEnumConverter());
                obj[detailField][captionField] = JArray.Parse(json3);
            }
        }
Пример #4
0
        /*public bool GetVideoThumbnail(string path, string saveThumbnailTo, int seconds)
         * {
         *  string parameters = $"-ss {seconds} -i {path} -f image2 -vframes 1 -y {saveThumbnailTo}";
         *
         *  var processInfo = new ProcessStartInfo();
         *  processInfo.FileName = pathToConvertor;
         *  processInfo.Arguments = parameters;
         *  processInfo.CreateNoWindow = true;
         *  processInfo.UseShellExecute = false;
         *
         *  File.Delete(saveThumbnailTo);
         *
         *  using (var process = new Process())
         *  {
         *      process.StartInfo = processInfo;
         *      process.Start();
         *      process.WaitForExit();
         *  }
         *
         *  return File.Exists(saveThumbnailTo);
         * }*/

        private static void Test()
        {
            var client = new YoutubeClient();

            client.GetChannelUploadsAsync("UCZqh6VE-OYFz2RCWDbZQOqw").ContinueWith(task =>
            {
                var video = task.Result.FirstOrDefault(v => v.Title.Contains("listening practice test"));

                client.GetVideoAsync("").ContinueWith(t =>
                {
                    //t.Result.
                    //t.Result.
                });

                client.GetVideoClosedCaptionTrackInfosAsync(video.Id).ContinueWith(task1 =>
                {
                    //task1.Result[0].
                    var captionTrackInfo = task1.Result.FirstOrDefault(info => info.Language.Code == "en");
                    client.GetClosedCaptionTrackAsync(captionTrackInfo).ContinueWith(task2 =>
                    {
                        var captionTrack = task2.Result;
                        foreach (var captionTrackCaption in captionTrack.Captions)
                        {
                            Console.WriteLine($"{captionTrackCaption.Offset}: {captionTrackCaption.Text}");
                        }
                    });
                });
            });
        }
Пример #5
0
        static async Task Load()
        {
            var client = new YoutubeClient();
            var list   = await client.GetChannelUploadsAsync("UC4V3oCikXeSqYQr0hBMARwg");

            foreach (var video in list)
            {
                var tubeUrl = video.GetUrl();
                if (await(await videoCollection.FindAsync(x => x.YoutubeUrl == tubeUrl)).AnyAsync())
                {
                    continue;
                }

                var info = await client.GetVideoClosedCaptionTrackInfosAsync(video.Id);

                if (info.Count <= 0)
                {
                    continue;
                }

                var minfo = await client.GetVideoMediaStreamInfosAsync(video.Id);

                var subVideo = new SubVideo
                {
                    Title      = video.Title,
                    YoutubeUrl = tubeUrl,
                    Thumbnail  = video.Thumbnails.MediumResUrl,
                    SourceUrl  = minfo.Muxed[0].Url,
                    Published  = video.UploadDate.DateTime
                };

                await videoCollection.InsertOneAsync(subVideo);

                var captions = await client.GetClosedCaptionTrackAsync(info.First());

                var mergedCaptions = new List <VideoText>();



                for (int i = 0; i < captions.Captions.Count; i++)
                {
                    for (int size = 1; size < 4; size++)
                    {
                        var cap = captions.Captions.Skip(i).Take(size).Aggregate((f, s) =>
                                                                                 new ClosedCaption($"{f.Text} {s.Text}", f.Offset, f.Duration + s.Duration));

                        mergedCaptions.Add(new VideoText
                        {
                            VideoId  = subVideo.Id,
                            OffSet   = cap.Offset,
                            Text     = cap.Text,
                            Duration = cap.Duration
                        });
                    }
                }

                await textCollection.InsertManyAsync(mergedCaptions);
            }
        }
Пример #6
0
        public async Task YoutubeClient_GetVideoClosedCaptionTrackInfosAsync_Test(string videoId)
        {
            var client = new YoutubeClient();

            var trackInfos = await client.GetVideoClosedCaptionTrackInfosAsync(videoId);

            Assert.That(trackInfos, Is.Not.Null);
        }
Пример #7
0
        private async Task <IReadOnlyList <ClosedCaption> > LoadCaptions(string videoId, string lang)
        {
            var client = new YoutubeClient();
            var track  = await client.GetVideoClosedCaptionTrackInfosAsync(videoId);

            var captionTrackInfo    = track.FirstOrDefault(info => info.Language.Code == lang);
            var closedCaptionsTrack = await client.GetClosedCaptionTrackAsync(captionTrackInfo);

            return(closedCaptionsTrack.Captions);
        }
Пример #8
0
        private async Task GetClosedCaptionTrackAsync(string id)
        {
            var client = new YoutubeClient();

            var trackInfos = await client.GetVideoClosedCaptionTrackInfosAsync(id);

            var trackInfo = trackInfos.First(t => t.Language.Code == "en");
            var track     = await client.GetClosedCaptionTrackAsync(trackInfo);

            var caption = track.Captions;
            //var text = caption.Text; // "And the game was afoot."
        }
        public async Task YoutubeClient_GetClosedCaptionTrackAsync_Test(string videoId)
        {
            var client = new YoutubeClient();

            var closedCaptionTrackInfos = await client.GetVideoClosedCaptionTrackInfosAsync(videoId);

            Assert.That(closedCaptionTrackInfos, Is.Not.Empty);

            foreach (var trackInfo in closedCaptionTrackInfos)
            {
                var track = await client.GetClosedCaptionTrackAsync(trackInfo);

                Assert.That(track, Is.Not.Null);
            }
        }
Пример #10
0
        /// <summary>
        /// Download closed captions for a youtube video
        /// </summary>
        /// <param name="videoUrl">Youtube url (e.g. https://www.youtube.com/watch?v=VIDEO_ID)</param>
        /// <returns>A ClosedCaptionTrack object.</returns>
        public async Task <ClosedCaptionTrack> DownloadClosedCaptions(string videoUrl = null)
        {
            videoUrl = videoUrl ?? youtubeUrl;
            var videoId    = GetVideoId(videoUrl);
            var trackInfos = await youtubeClient.GetVideoClosedCaptionTrackInfosAsync(videoId);

            if (trackInfos?.Count == 0)
            {
                return(null);
            }

            var trackInfo = trackInfos.FirstOrDefault(t => t.Language.Code == "en") ?? trackInfos.First();

            return(await youtubeClient.GetClosedCaptionTrackAsync(trackInfo));
        }
Пример #11
0
        public async Task YoutubeClient_DownloadClosedCaptionTrackAsync_Test(string videoId)
        {
            var client = new YoutubeClient();

            var closedCaptionTrackInfos = await client.GetVideoClosedCaptionTrackInfosAsync(videoId);

            var trackInfo      = closedCaptionTrackInfos.First();
            var outputFilePath = Path.Combine(_tempDirPath, Guid.NewGuid().ToString());

            Directory.CreateDirectory(_tempDirPath);
            await client.DownloadClosedCaptionTrackAsync(trackInfo, outputFilePath);

            var fileInfo = new FileInfo(outputFilePath);

            Assert.That(fileInfo.Exists, Is.True);
            Assert.That(fileInfo.Length, Is.GreaterThan(0));
        }
Пример #12
0
        public async Task <string> GetAndUpdateVideoCaptions(string channelId, string videoId, ILogger log)
        {
            IReadOnlyList <ClosedCaptionTrackInfo> tracks;

            try {
                tracks = await ytScaper.GetVideoClosedCaptionTrackInfosAsync(videoId);
            }
            catch (Exception ex) {
                log.Warning(ex, "Unable to get captions for {VideoID}: {Error}", videoId, ex.Message);
                return(null);
            }
            var en = tracks.FirstOrDefault(t => t.Language.Code == "en");

            if (en == null)
            {
                return(null);
            }

            ClosedCaptionTrack track;

            try {
                track = await Policy.Handle <HttpRequestException>()
                        .RetryWithBackoff()
                        .ExecuteAsync(() => ytScaper.GetClosedCaptionTrackAsync(en));
            }
            catch (Exception ex) {
                log.Warning(ex, "Unable to get captions for {VideoID}: {Error}", videoId, ex.Message);
                return(null);
            }
            var text = track.Captions.Select(c => c.Text).Join("\n");

            if (text != null)
            {
                var path = StringPath.Relative("VideoCaptions", channelId, $"{videoId}.txt");
                try {
                    await Store.Save(path, text.AsStream());
                }
                catch (Exception ex) {
                    log.Warning(ex, "Error when saving captions {Path}", path);
                }
            }
            return(text);
        }
        /// <summary>
        /// Download closed captions for a youtube video
        /// </summary>
        /// <param name="videoUrl">Youtube url (e.g. https://www.youtube.com/watch?v=VIDEO_ID)</param>
        /// <returns>A ClosedCaptionTrack object.</returns>
        public async Task <ClosedCaptionTrack> DownloadClosedCaptions(string videoUrl = null)
        {
            if (overrideURLWithInputField && !string.IsNullOrEmpty(m_inputField.text))
            {
                youtubeUrl = m_inputField.text;
            }

            videoUrl = videoUrl ?? youtubeUrl;
            var videoId    = GetVideoId(videoUrl);
            var trackInfos = await youtubeClient.GetVideoClosedCaptionTrackInfosAsync(videoId);

            if (trackInfos?.Count == 0)
            {
                return(null);
            }

            var trackInfo = trackInfos.FirstOrDefault(t => t.Language.Code == "en") ?? trackInfos.First();

            return(await youtubeClient.GetClosedCaptionTrackAsync(trackInfo));
        }
        public void AddYoutubeDetailPipeline(string urlField, string detailField, string videoField, string streamField, string captionField)
        {
            this.AddPipeline(it =>
            {
                JObject obj  = JObject.FromObject(it);
                var urlVideo = obj[urlField].ToString();

                var client = new YoutubeClient();
                var id     = YoutubeClient.ParseVideoId(urlVideo); // "bnsUkE8i0tU"

                obj[detailField] = new JObject();
                if (!string.IsNullOrEmpty(videoField))
                {
                    var video = client.GetVideoAsync(id).Result;
                    var json1 = JsonConvert.SerializeObject(video, new StringEnumConverter());
                    obj[detailField][videoField] = JObject.Parse(json1);
                }

                if (!string.IsNullOrEmpty(streamField))
                {
                    var streamInfoSet             = client.GetVideoMediaStreamInfosAsync(id).Result;
                    var json2                     = JsonConvert.SerializeObject(streamInfoSet, new StringEnumConverter());
                    obj[detailField][streamField] = JObject.Parse(json2);
                }

                if (!string.IsNullOrEmpty(captionField))
                {
                    var caption = client.GetVideoClosedCaptionTrackInfosAsync(id).Result;
                    var json3   = JsonConvert.SerializeObject(caption, new StringEnumConverter());
                    obj[detailField][captionField] = JArray.Parse(json3);
                }

                //var streamInfo = (YoutubeExplode.Models.MediaStreams.MuxedStreamInfo)streamInfoSet.Muxed.OrderByDescending(o => o.Size).FirstOrDefault();

                //var tmp = $"{Path.GetTempFileName()}.{streamInfo.Container.ToString().ToLower().Trim()}";
                //var tas = client.DownloadMediaStreamAsync(streamInfo, tmp);
                //tas.Wait();

                return(obj);
            });
        }
Пример #15
0
        public async Task <SubtitledVideo> GetSubtitledVideoAsync(string videoId)
        {
            var client = new YoutubeClient();

            var video = await client.GetVideoAsync(videoId);

            var captionTrackInfos = await client.GetVideoClosedCaptionTrackInfosAsync(videoId);

            var mediaStreamInfoSet = await client.GetVideoMediaStreamInfosAsync(videoId);

            var captions = await client.GetClosedCaptionTrackAsync(captionTrackInfos.First());

            var subVideo = new SubtitledVideo
            {
                Title          = video.Title,
                YoutubeUrl     = video.GetUrl(),
                YoutubeId      = videoId,
                Thumbnail      = video.Thumbnails.MediumResUrl,
                VideoSourceUrl = mediaStreamInfoSet.Muxed[0].Url,
                Published      = video.UploadDate.DateTime,
                TextBody       = string.Join(" ", captions.Captions.Select(x => x.Text))
            };

            var strHtml = $"<span onclick=\"goto(0)\">{captions.Captions[0].Text}</span>";

            for (int i = 1; i < captions.Captions.Count; i++)
            {
                var diff = captions.Captions[i].Offset - (captions.Captions[i - 1].Offset + captions.Captions[i - 1].Duration);

                if (diff >= TimeSpan.FromMilliseconds(150))
                {
                    strHtml += ".</br>";
                }

                strHtml += $"<span onclick=\"goto({(int)captions.Captions[i].Offset.TotalSeconds})\">{captions.Captions[i].Text}</span>";
            }

            subVideo.HtmlBody = strHtml;

            return(subVideo);
        }
Пример #16
0
        public void YoutubeClient_GetVideoClosedCaptionTrackInfosAsync_Unavailable_Test(string videoId)
        {
            var client = new YoutubeClient();

            Assert.ThrowsAsync <VideoUnavailableException>(() => client.GetVideoClosedCaptionTrackInfosAsync(videoId));
        }
Пример #17
0
        static async Task Main(string[] args)
        {
            var client = new YoutubeClient();

            // The full json with all relevant data is loaded. At this point in the pipeline it contains the information about each channel plus the id of the top three
            // videos from each channel.
            // The task of this script is to download the captions for each video
            using (StreamReader file = File.OpenText("../../output/unlabeled_data/U_channelDataWithScrapedTopVideos.json"))
                using (JsonTextReader reader = new JsonTextReader(file))
                {
                    JObject channelData = (JObject)JToken.ReadFrom(reader);


                    foreach (KeyValuePair <string, JToken> entry in channelData)
                    {
                        // if (entry.Key == "UCtD9a-aXIYS6-e-8s7DISiw"){
                        Console.WriteLine("Now processing " + entry.Key);
                        foreach (JToken topVideo in entry.Value["top3videos"])
                        {
                            // JToken videoId = topVideo["VideoId"];
                            JToken videoId  = topVideo;
                            string captions = null;
                            try {
                                var trackInfos = await client.GetVideoClosedCaptionTrackInfosAsync((string)videoId);

                                var trackInfo = trackInfos.First(t => t.Language.Code == "en");
                                var track     = await client.GetClosedCaptionTrackAsync(trackInfo);

                                var captionList = track.Captions;
                                foreach (var line in captionList)
                                {
                                    captions = captions + " " + line;
                                }
                            } catch (System.InvalidOperationException e1) {
                                Console.WriteLine("no captions found for: " + videoId);
                            } catch (YoutubeExplode.Exceptions.VideoUnavailableException e2) {
                                Console.WriteLine("Video deleted: " + videoId + " Channel: " + entry.Key);
                            } catch (System.ArgumentNullException e3) {
                                Console.WriteLine("Caption could not be correctly loaded: " + videoId);
                            }

                            var captionData = new Dictionary <string, string>()
                            {
                                { "VideoId", (string)videoId },
                                { "Captions", captions },
                                { "Info", "downloaded" }
                            };
                            var filePath = "../Data/Captions/" + entry.Key + "/" + (string)videoId + ".jsonl";
                            if (!File.Exists(filePath + ".gz"))
                            {
                                Console.WriteLine("################ " + filePath);
                                System.IO.FileInfo dirCheck = new System.IO.FileInfo(filePath);
                                dirCheck.Directory.Create(); // If the directory already exists, this method does nothing.
                                using (StreamWriter jsonFile = File.CreateText(filePath))
                                {
                                    JsonSerializer serializer = new JsonSerializer();
                                    //serialize object directly into file stream
                                    serializer.Serialize(jsonFile, captionData);
                                }
                                var bytes = File.ReadAllBytes(filePath);
                                using (FileStream fs = new FileStream(filePath + ".gz", FileMode.CreateNew))
                                    using (GZipStream zipStream = new GZipStream(fs, CompressionMode.Compress, false))
                                    {
                                        zipStream.Write(bytes, 0, bytes.Length);
                                    }
                                // Get rid of the unzipped json
                                File.Delete(filePath);
                            }
                            else
                            {
                                Console.WriteLine("already scraped: " + videoId);
                            }
                        }
                    }
                }
        }
Пример #18
0
        public async Task <IEnumerable <ClosedCaptionTrackInfo> > Subtitles(string video)
        {
            var client = new YoutubeClient();

            return(await client.GetVideoClosedCaptionTrackInfosAsync(video));
        }