private List <Devmasters.SpeechToText.VoiceToTextFormatter.TextWithTimestamp> _checkDownloadAndStartV2TOrGet(bool startV2T, string datasetid, string recordid, string videourl) { List <Devmasters.SpeechToText.VoiceToTextFormatter.TextWithTimestamp> blocks = null; string recId = recordid; string fnFile = $"{Mp3Path}\\{datasetid}\\{recId}"; var MP3Fn = $"{fnFile}.mp3"; var newtonFn = $"{fnFile}.mp3.raw_s2t"; var dockerFn = $"{fnFile}.ctm"; if (System.IO.File.Exists(MP3Fn) == false) { System.Diagnostics.ProcessStartInfo piv = new System.Diagnostics.ProcessStartInfo("youtube-dl.exe", $"--no-progress --extract-audio --audio-format mp3 --postprocessor-args \" -ac 1 -ar 16000\" -o \"{fnFile}.%(ext)s\" " + videourl ); Devmasters.ProcessExecutor pev = new Devmasters.ProcessExecutor(piv, 60 * 6 * 24); pev.StandardOutputDataReceived += (o, e) => { Devmasters.Logging.Logger.Root.Debug(e.Data); }; Devmasters.Logging.Logger.Root.Info($"Starting Youtube-dl for {videourl} "); pev.Start(); } bool exists_S2T = System.IO.File.Exists(newtonFn) || System.IO.File.Exists(dockerFn); if (exists_S2T == false && startV2T) { using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent( $"https://www.hlidacstatu.cz/api/v2/internalq/Voice2TextNewTask/{datasetid}/{recId}") ) { net.Method = Devmasters.Net.HttpClient.MethodEnum.POST; net.RequestParams.Headers.Add("Authorization", Apikey); net.GetContent(); } } if (exists_S2T) { if (System.IO.File.Exists(newtonFn)) { var tt = new Newton.SpeechToText.Cloud.FileAPI.VoiceToTerms(System.IO.File.ReadAllText(newtonFn)); blocks = new Devmasters.SpeechToText.VoiceToTextFormatter(tt.Terms) .TextWithTimestamps(TimeSpan.FromSeconds(10), true); } else if (System.IO.File.Exists(dockerFn)) { var tt = new KaldiASR.SpeechToText.VoiceToTerms(System.IO.File.ReadAllText(dockerFn)); blocks = new Devmasters.SpeechToText.VoiceToTextFormatter(tt.Terms) .TextWithTimestamps(TimeSpan.FromSeconds(10), true); } } return(blocks); }
public static void Process(osoba o, string playlist, int threads, int max, string[] vids, string mp3path) { logger.Info($"Starting {o.Jmeno} {o.Prijmeni} {o.NameId} for {playlist} "); List <string> videos = null; if (vids?.Count() > 0) { videos = vids .Select(m => "https://www.youtube.com/watch?v=" + m) .ToList(); } else { System.Diagnostics.ProcessStartInfo pi = new System.Diagnostics.ProcessStartInfo("youtube-dl", $"--flat-playlist --get-id --playlist-end {max} " + playlist ); Devmasters.ProcessExecutor pe = new Devmasters.ProcessExecutor(pi, 60 * 6 * 24); logger.Info($"Starting Youtube-dl playlist video list "); pe.Start(); videos = pe.StandardOutput .Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries) .Select(m => "https://www.youtube.com/watch?v=" + m) .ToList(); } Console.WriteLine(); Console.WriteLine($"Processing {videos.Count} videos"); Console.WriteLine(); Console.WriteLine(); Console.WriteLine(); Console.WriteLine(); Devmasters.Batch.Manager.DoActionForAll(videos, vid => { string uniqId = record.UniqueID(vid); record rec = null; bool merge = false; bool changed = false; if (Program.api2.ItemExists(uniqId)) { rec = Program.api2.GetItem(uniqId); merge = true; } else { rec = YTDL.GetVideoInfo(vid); if (rec == null) { return(new Devmasters.Batch.ActionOutputData()); } rec.osobaid = o.NameId; changed = true; } string recId = uniqId; string fnFile = $"{mp3path}\\{DataSetId}\\{recId}"; var MP3Fn = $"{fnFile}.mp3"; var newtonFn = $"{fnFile}.mp3.raw_s2t"; var dockerFn = $"{fnFile}.ctm"; if (System.IO.File.Exists(MP3Fn) == false) { System.Diagnostics.ProcessStartInfo piv = new System.Diagnostics.ProcessStartInfo("youtube-dl.exe", $"--no-progress --extract-audio --audio-format mp3 --postprocessor-args \" -ac 1 -ar 16000\" -o \"{fnFile}.%(ext)s\" " + vid ); Devmasters.ProcessExecutor pev = new Devmasters.ProcessExecutor(piv, 60 * 6 * 24); pev.StandardOutputDataReceived += (ox, e) => { logger.Debug(e.Data); }; logger.Info($"Starting Youtube-dl for {vid} "); pev.Start(); } bool exists_S2T = System.IO.File.Exists(newtonFn) || System.IO.File.Exists(dockerFn); if (exists_S2T == false && rec.prepisAudia == null) { using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent( $"https://www.hlidacstatu.cz/api/v2/internalq/Voice2TextNewTask/{DataSetId}/{recId}?priority=2") ) { net.Method = Devmasters.Net.HttpClient.MethodEnum.POST; net.RequestParams.Headers.Add("Authorization", System.Configuration.ConfigurationManager.AppSettings["apikey"]); net.GetContent(); } } if (exists_S2T && !(rec.prepisAudia?.Count() > 0)) { if (System.IO.File.Exists(dockerFn)) { var tt = new KaldiASR.SpeechToText.VoiceToTerms(System.IO.File.ReadAllText(dockerFn)); var blocks = new Devmasters.SpeechToText.VoiceToTextFormatter(tt.Terms) .TextWithTimestamps(TimeSpan.FromSeconds(10), true) .Select(t => new record.Blok() { sekundOdZacatku = (long)t.Start.TotalSeconds, text = t.Text }) .ToArray(); //TODO opravit casem var tmpRec = YTDL.GetVideoInfo(vid); if (tmpRec != null) { rec.text = tmpRec.text + "\n\n" + new Devmasters.SpeechToText.VoiceToTextFormatter(tt.Terms).Text(true); } rec.prepisAudia = blocks; changed = true; } } if (changed) { api2.AddOrUpdateItem(rec, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite); } return(new Devmasters.Batch.ActionOutputData()); }, Devmasters.Batch.Manager.DefaultOutputWriter, Devmasters.Batch.Manager.DefaultProgressWriter, !System.Diagnostics.Debugger.IsAttached, maxDegreeOfParallelism: threads ); }