private async Task <string> GenrateSubTitleAsync(string id, string Path, string Name) { try { string FullPath = $@"{Path}\\{ Name.ValidNameForWindows()}.srt"; var trackInfos = await client.GetVideoClosedCaptionTrackInfosAsync(id); var trackInfo = trackInfos.First(t => t.Language.Code == "en"); var track = await client.GetClosedCaptionTrackAsync(trackInfo); using StreamWriter file = new StreamWriter(FullPath); int line = 1; foreach (var item in track.Captions) { string from = $"{item.Offset.Hours.ToString("00")}:{item.Offset.Minutes.ToString("00")}:{item.Offset.Seconds.ToString("00")},{item.Offset.Milliseconds.ToString("000")}"; TimeSpan ToSpaon = item.Offset.Add(item.Duration); string to = $"{ToSpaon.Hours.ToString("00")}:{ToSpaon.Minutes.ToString("00")}:{ToSpaon.Seconds.ToString("00")},{ToSpaon.Milliseconds.ToString("000")}"; file.WriteLine(line); file.WriteLine($"{from} --> {to}"); file.WriteLine(item.Text); file.WriteLine(); line++; } return($"subtitle Write success to file : {FullPath}"); } catch (Exception ex) { throw ex; } }
/*public bool GetVideoThumbnail(string path, string saveThumbnailTo, int seconds) * { * string parameters = $"-ss {seconds} -i {path} -f image2 -vframes 1 -y {saveThumbnailTo}"; * * var processInfo = new ProcessStartInfo(); * processInfo.FileName = pathToConvertor; * processInfo.Arguments = parameters; * processInfo.CreateNoWindow = true; * processInfo.UseShellExecute = false; * * File.Delete(saveThumbnailTo); * * using (var process = new Process()) * { * process.StartInfo = processInfo; * process.Start(); * process.WaitForExit(); * } * * return File.Exists(saveThumbnailTo); * }*/ private static void Test() { var client = new YoutubeClient(); client.GetChannelUploadsAsync("UCZqh6VE-OYFz2RCWDbZQOqw").ContinueWith(task => { var video = task.Result.FirstOrDefault(v => v.Title.Contains("listening practice test")); client.GetVideoAsync("").ContinueWith(t => { //t.Result. //t.Result. }); client.GetVideoClosedCaptionTrackInfosAsync(video.Id).ContinueWith(task1 => { //task1.Result[0]. var captionTrackInfo = task1.Result.FirstOrDefault(info => info.Language.Code == "en"); client.GetClosedCaptionTrackAsync(captionTrackInfo).ContinueWith(task2 => { var captionTrack = task2.Result; foreach (var captionTrackCaption in captionTrack.Captions) { Console.WriteLine($"{captionTrackCaption.Offset}: {captionTrackCaption.Text}"); } }); }); }); }
public async Task <ClosedCaptionTrack> Subtitles(string video, int n) { var client = new YoutubeClient(); var info = await client.GetVideoClosedCaptionTrackInfosAsync(video); return(await client.GetClosedCaptionTrackAsync(info[n])); }
static async Task Load() { var client = new YoutubeClient(); var list = await client.GetChannelUploadsAsync("UC4V3oCikXeSqYQr0hBMARwg"); foreach (var video in list) { var tubeUrl = video.GetUrl(); if (await(await videoCollection.FindAsync(x => x.YoutubeUrl == tubeUrl)).AnyAsync()) { continue; } var info = await client.GetVideoClosedCaptionTrackInfosAsync(video.Id); if (info.Count <= 0) { continue; } var minfo = await client.GetVideoMediaStreamInfosAsync(video.Id); var subVideo = new SubVideo { Title = video.Title, YoutubeUrl = tubeUrl, Thumbnail = video.Thumbnails.MediumResUrl, SourceUrl = minfo.Muxed[0].Url, Published = video.UploadDate.DateTime }; await videoCollection.InsertOneAsync(subVideo); var captions = await client.GetClosedCaptionTrackAsync(info.First()); var mergedCaptions = new List <VideoText>(); for (int i = 0; i < captions.Captions.Count; i++) { for (int size = 1; size < 4; size++) { var cap = captions.Captions.Skip(i).Take(size).Aggregate((f, s) => new ClosedCaption($"{f.Text} {s.Text}", f.Offset, f.Duration + s.Duration)); mergedCaptions.Add(new VideoText { VideoId = subVideo.Id, OffSet = cap.Offset, Text = cap.Text, Duration = cap.Duration }); } } await textCollection.InsertManyAsync(mergedCaptions); } }
public async Task YoutubeClient_GetClosedCaptionTrackAsync_Normal_Test() { var client = new YoutubeClient(); var videoInfo = await client.GetVideoInfoAsync("_QdPW8JrYzQ"); var trackInfo = videoInfo.ClosedCaptionTracks.First(); var track = await client.GetClosedCaptionTrackAsync(trackInfo); Assert.That.IsSet(track); }
private async Task <IReadOnlyList <ClosedCaption> > LoadCaptions(string videoId, string lang) { var client = new YoutubeClient(); var track = await client.GetVideoClosedCaptionTrackInfosAsync(videoId); var captionTrackInfo = track.FirstOrDefault(info => info.Language.Code == lang); var closedCaptionsTrack = await client.GetClosedCaptionTrackAsync(captionTrackInfo); return(closedCaptionsTrack.Captions); }
public async Task YoutubeClient_GetClosedCaptionTrackAsync_Test(string videoId) { var client = new YoutubeClient(); var closedCaptionTrackInfos = await client.GetVideoClosedCaptionTrackInfosAsync(videoId); var trackInfo = closedCaptionTrackInfos.First(); var track = await client.GetClosedCaptionTrackAsync(trackInfo); Assert.That(track, Is.Not.Null); }
private async Task GetClosedCaptionTrackAsync(string id) { var client = new YoutubeClient(); var trackInfos = await client.GetVideoClosedCaptionTrackInfosAsync(id); var trackInfo = trackInfos.First(t => t.Language.Code == "en"); var track = await client.GetClosedCaptionTrackAsync(trackInfo); var caption = track.Captions; //var text = caption.Text; // "And the game was afoot." }
public async Task YoutubeClient_GetClosedCaptionTrackAsync_Test() { string id = (string)TestContext.DataRow["Id"]; var client = new YoutubeClient(); var videoInfo = await client.GetVideoInfoAsync(id); var trackInfo = videoInfo.ClosedCaptionTracks.First(); var track = await client.GetClosedCaptionTrackAsync(trackInfo); Assert.That.IsSet(track); }
/// <summary> /// Download closed captions for a youtube video /// </summary> /// <param name="videoUrl">Youtube url (e.g. https://www.youtube.com/watch?v=VIDEO_ID)</param> /// <returns>A ClosedCaptionTrack object.</returns> public async Task <ClosedCaptionTrack> DownloadClosedCaptions(string videoUrl = null) { videoUrl = videoUrl ?? youtubeUrl; var videoId = GetVideoId(videoUrl); var trackInfos = await youtubeClient.GetVideoClosedCaptionTrackInfosAsync(videoId); if (trackInfos?.Count == 0) { return(null); } var trackInfo = trackInfos.FirstOrDefault(t => t.Language.Code == "en") ?? trackInfos.First(); return(await youtubeClient.GetClosedCaptionTrackAsync(trackInfo)); }
public async Task <string> GetAndUpdateVideoCaptions(string channelId, string videoId, ILogger log) { IReadOnlyList <ClosedCaptionTrackInfo> tracks; try { tracks = await ytScaper.GetVideoClosedCaptionTrackInfosAsync(videoId); } catch (Exception ex) { log.Warning(ex, "Unable to get captions for {VideoID}: {Error}", videoId, ex.Message); return(null); } var en = tracks.FirstOrDefault(t => t.Language.Code == "en"); if (en == null) { return(null); } ClosedCaptionTrack track; try { track = await Policy.Handle <HttpRequestException>() .RetryWithBackoff() .ExecuteAsync(() => ytScaper.GetClosedCaptionTrackAsync(en)); } catch (Exception ex) { log.Warning(ex, "Unable to get captions for {VideoID}: {Error}", videoId, ex.Message); return(null); } var text = track.Captions.Select(c => c.Text).Join("\n"); if (text != null) { var path = StringPath.Relative("VideoCaptions", channelId, $"{videoId}.txt"); try { await Store.Save(path, text.AsStream()); } catch (Exception ex) { log.Warning(ex, "Error when saving captions {Path}", path); } } return(text); }
/// <summary> /// Download closed captions for a youtube video /// </summary> /// <param name="videoUrl">Youtube url (e.g. https://www.youtube.com/watch?v=VIDEO_ID)</param> /// <returns>A ClosedCaptionTrack object.</returns> public async Task <ClosedCaptionTrack> DownloadClosedCaptions(string videoUrl = null) { if (overrideURLWithInputField && !string.IsNullOrEmpty(m_inputField.text)) { youtubeUrl = m_inputField.text; } videoUrl = videoUrl ?? youtubeUrl; var videoId = GetVideoId(videoUrl); var trackInfos = await youtubeClient.GetVideoClosedCaptionTrackInfosAsync(videoId); if (trackInfos?.Count == 0) { return(null); } var trackInfo = trackInfos.FirstOrDefault(t => t.Language.Code == "en") ?? trackInfos.First(); return(await youtubeClient.GetClosedCaptionTrackAsync(trackInfo)); }
public async Task <SubtitledVideo> GetSubtitledVideoAsync(string videoId) { var client = new YoutubeClient(); var video = await client.GetVideoAsync(videoId); var captionTrackInfos = await client.GetVideoClosedCaptionTrackInfosAsync(videoId); var mediaStreamInfoSet = await client.GetVideoMediaStreamInfosAsync(videoId); var captions = await client.GetClosedCaptionTrackAsync(captionTrackInfos.First()); var subVideo = new SubtitledVideo { Title = video.Title, YoutubeUrl = video.GetUrl(), YoutubeId = videoId, Thumbnail = video.Thumbnails.MediumResUrl, VideoSourceUrl = mediaStreamInfoSet.Muxed[0].Url, Published = video.UploadDate.DateTime, TextBody = string.Join(" ", captions.Captions.Select(x => x.Text)) }; var strHtml = $"<span onclick=\"goto(0)\">{captions.Captions[0].Text}</span>"; for (int i = 1; i < captions.Captions.Count; i++) { var diff = captions.Captions[i].Offset - (captions.Captions[i - 1].Offset + captions.Captions[i - 1].Duration); if (diff >= TimeSpan.FromMilliseconds(150)) { strHtml += ".</br>"; } strHtml += $"<span onclick=\"goto({(int)captions.Captions[i].Offset.TotalSeconds})\">{captions.Captions[i].Text}</span>"; } subVideo.HtmlBody = strHtml; return(subVideo); }
static async Task Main(string[] args) { var client = new YoutubeClient(); // The full json with all relevant data is loaded. At this point in the pipeline it contains the information about each channel plus the id of the top three // videos from each channel. // The task of this script is to download the captions for each video using (StreamReader file = File.OpenText("../../output/unlabeled_data/U_channelDataWithScrapedTopVideos.json")) using (JsonTextReader reader = new JsonTextReader(file)) { JObject channelData = (JObject)JToken.ReadFrom(reader); foreach (KeyValuePair <string, JToken> entry in channelData) { // if (entry.Key == "UCtD9a-aXIYS6-e-8s7DISiw"){ Console.WriteLine("Now processing " + entry.Key); foreach (JToken topVideo in entry.Value["top3videos"]) { // JToken videoId = topVideo["VideoId"]; JToken videoId = topVideo; string captions = null; try { var trackInfos = await client.GetVideoClosedCaptionTrackInfosAsync((string)videoId); var trackInfo = trackInfos.First(t => t.Language.Code == "en"); var track = await client.GetClosedCaptionTrackAsync(trackInfo); var captionList = track.Captions; foreach (var line in captionList) { captions = captions + " " + line; } } catch (System.InvalidOperationException e1) { Console.WriteLine("no captions found for: " + videoId); } catch (YoutubeExplode.Exceptions.VideoUnavailableException e2) { Console.WriteLine("Video deleted: " + videoId + " Channel: " + entry.Key); } catch (System.ArgumentNullException e3) { Console.WriteLine("Caption could not be correctly loaded: " + videoId); } var captionData = new Dictionary <string, string>() { { "VideoId", (string)videoId }, { "Captions", captions }, { "Info", "downloaded" } }; var filePath = "../Data/Captions/" + entry.Key + "/" + (string)videoId + ".jsonl"; if (!File.Exists(filePath + ".gz")) { Console.WriteLine("################ " + filePath); System.IO.FileInfo dirCheck = new System.IO.FileInfo(filePath); dirCheck.Directory.Create(); // If the directory already exists, this method does nothing. using (StreamWriter jsonFile = File.CreateText(filePath)) { JsonSerializer serializer = new JsonSerializer(); //serialize object directly into file stream serializer.Serialize(jsonFile, captionData); } var bytes = File.ReadAllBytes(filePath); using (FileStream fs = new FileStream(filePath + ".gz", FileMode.CreateNew)) using (GZipStream zipStream = new GZipStream(fs, CompressionMode.Compress, false)) { zipStream.Write(bytes, 0, bytes.Length); } // Get rid of the unzipped json File.Delete(filePath); } else { Console.WriteLine("already scraped: " + videoId); } } } } }