static void Main(string[] args) { using (IDeepSpeech sttClient = new DeepSpeech(DEEPSPEECH_MODEL)) { sttClient.EnableExternalScorer(DEEPSPEECH_SCORER); string AudioFilePath = @"audio\2830-3980-0043.wav"; WaveBuffer waveBuffer = new WaveBuffer(File.ReadAllBytes(AudioFilePath)); string speechResult = sttClient.SpeechToText( waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2)); Console.WriteLine(speechResult); waveBuffer.Clear(); } }
static void Main(string[] args) { string model = null; string lm = null; string trie = null; string audio = null; bool extended = false; if (args.Length > 0) { model = GetArgument(args, "--model"); lm = GetArgument(args, "--lm"); trie = GetArgument(args, "--trie"); audio = GetArgument(args, "--audio"); extended = !string.IsNullOrWhiteSpace(GetArgument(args, "--extended")); } const uint BEAM_WIDTH = 500; const float LM_ALPHA = 0.75f; const float LM_BETA = 1.85f; Stopwatch stopwatch = new Stopwatch(); using (IDeepSpeech sttClient = new DeepSpeech()) { try { Console.WriteLine("Loading model..."); stopwatch.Start(); sttClient.CreateModel( model ?? "output_graph.pbmm", BEAM_WIDTH); stopwatch.Stop(); Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms"); stopwatch.Reset(); if (lm != null) { Console.WriteLine("Loadin LM..."); sttClient.EnableDecoderWithLM( lm ?? "lm.binary", trie ?? "trie", LM_ALPHA, LM_BETA); } string audioFile = audio ?? "arctic_a0024.wav"; var waveBuffer = new WaveBuffer(File.ReadAllBytes(audioFile)); using (var waveInfo = new WaveFileReader(audioFile)) { Console.WriteLine("Running inference...."); stopwatch.Start(); string speechResult; if (extended) { Metadata metaResult = sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2)); speechResult = MetadataToString(metaResult); } else { speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2)); } stopwatch.Stop(); Console.WriteLine($"Audio duration: {waveInfo.TotalTime.ToString()}"); Console.WriteLine($"Inference took: {stopwatch.Elapsed.ToString()}"); Console.WriteLine((extended ? $"Extended result: " : "Recognized text: ") + speechResult); } waveBuffer.Clear(); } catch (Exception ex) { Console.WriteLine(ex.Message); } } }
static void Main(string[] args) { string model = null; string alphabet = null; string lm = null; string trie = null; string audio = null; if (args.Length > 0) { model = GetArgument(args, "--model"); alphabet = GetArgument(args, "--alphabet"); lm = GetArgument(args, "--lm"); trie = GetArgument(args, "--trie"); audio = GetArgument(args, "--audio"); } const uint N_CEP = 26; const uint N_CONTEXT = 9; const uint BEAM_WIDTH = 200; const float LM_ALPHA = 0.75f; const float LM_BETA = 1.85f; Stopwatch stopwatch = new Stopwatch(); using (IDeepSpeech sttClient = new DeepSpeech()) { var result = 1; Console.WriteLine("Loading model..."); stopwatch.Start(); try { result = sttClient.CreateModel( model ?? "output_graph.pbmm", N_CEP, N_CONTEXT, alphabet ?? "alphabet.txt", BEAM_WIDTH); } catch (IOException ex) { Console.WriteLine("Error loading lm."); Console.WriteLine(ex.Message); } stopwatch.Stop(); if (result == 0) { Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms"); stopwatch.Reset(); if (lm != null) { Console.WriteLine("Loadin LM..."); try { result = sttClient.EnableDecoderWithLM( alphabet ?? "alphabet.txt", lm ?? "lm.binary", trie ?? "trie", LM_ALPHA, LM_BETA); } catch (IOException ex) { Console.WriteLine("Error loading lm."); Console.WriteLine(ex.Message); } } string audioFile = audio ?? "arctic_a0024.wav"; var waveBuffer = new WaveBuffer(File.ReadAllBytes(audioFile)); using (var waveInfo = new WaveFileReader(audioFile)) { Console.WriteLine("Running inference...."); stopwatch.Start(); string speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2), 16000); stopwatch.Stop(); Console.WriteLine($"Audio duration: {waveInfo.TotalTime.ToString()}"); Console.WriteLine($"Inference took: {stopwatch.Elapsed.ToString()}"); Console.WriteLine($"Recognized text: {speechResult}"); } waveBuffer.Clear(); } else { Console.WriteLine("Error loding the model."); } } }
static void Main(string[] args) { string model = null; string scorer = null; string audio = null; bool extended = false; if (args.Length > 0) { model = GetArgument(args, "--model"); scorer = GetArgument(args, "--scorer"); audio = GetArgument(args, "--audio"); extended = !string.IsNullOrWhiteSpace(GetArgument(args, "--extended")); } Stopwatch stopwatch = new Stopwatch(); try { Console.WriteLine("Loading model..."); stopwatch.Start(); // sphinx-doc: csharp_ref_model_start using IDeepSpeech sttClient = new DeepSpeech(model ?? "output_graph.pbmm"); // sphinx-doc: csharp_ref_model_stop stopwatch.Stop(); Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms"); stopwatch.Reset(); if (scorer != null) { Console.WriteLine("Loading scorer..."); sttClient.EnableExternalScorer(scorer ?? "kenlm.scorer"); } string audioFile = audio ?? "new-home-in-the-stars-16k.wav"; var waveBuffer = new WaveBuffer(File.ReadAllBytes(audioFile)); using (var waveInfo = new WaveFileReader(audioFile)) { Console.WriteLine("Running inference...."); stopwatch.Start(); string speechResult; // sphinx-doc: csharp_ref_inference_start if (extended) { Metadata metaResult = sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2), 1); speechResult = MetadataToString(metaResult.Transcripts[0]); } else { speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2)); } // sphinx-doc: csharp_ref_inference_stop stopwatch.Stop(); Console.WriteLine($"Audio duration: {waveInfo.TotalTime.ToString()}"); Console.WriteLine($"Inference took: {stopwatch.Elapsed.ToString()}"); Console.WriteLine((extended ? $"Extended result: " : "Recognized text: ") + speechResult); } waveBuffer.Clear(); } catch (Exception ex) { Console.WriteLine(ex.Message); } }
public static string Main1(string s) { string model = null; string audio = null; string hotwords = null; bool extended = false; model = "C:/Users/Nour El-Din/Documents/deepspeech/deepspeech-0.9.3-models.pbmm"; var scorer = "C:/Users/Nour El-Din/Documents/deepspeech/deepspeech-0.9.3-models.scorer"; audio = "C:/Users/Nour El-Din/Documents/deepspeech/audio"; //hotwords = GetArgument(args, "--hot_words"); extended = true; Stopwatch stopwatch = new Stopwatch(); try { Console.WriteLine("Loading model..."); stopwatch.Start(); // sphinx-doc: csharp_ref_model_start using (IDeepSpeech sttClient = new DeepSpeech("C:/Users/Nour El-Din/Documents/deepspeech/deepspeech-0.9.3-models.pbmm")) { // sphinx-doc: csharp_ref_model_stop stopwatch.Stop(); Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms"); stopwatch.Reset(); if (scorer != null) { Console.WriteLine("Loading scorer..."); sttClient.EnableExternalScorer("C:/Users/Nour El-Din/Documents/deepspeech/deepspeech-0.9.3-models.scorer"); } if (hotwords != null) { Console.WriteLine($"Adding hot-words {hotwords}"); char[] sep = { ',' }; string[] word_boosts = hotwords.Split(sep); foreach (string word_boost in word_boosts) { char[] sep1 = { ':' }; string[] word = word_boost.Split(sep1); sttClient.AddHotWord(word[0], float.Parse(word[1])); } } Directory.SetCurrentDirectory("C:/Users/Nour El-Din/Downloads/Graduation-Project-Dragons-master/Graduation Project Dragons/wwwroot/Video"); string v_Name = s.Split('.')[0]; var enviroment = System.Environment.CurrentDirectory; string projectDirectory = Directory.GetParent(enviroment).Parent.FullName; Console.WriteLine(projectDirectory); string m_Path = @$ "C:/Users/Nour El-Din/Downloads/Graduation-Project-Dragons-master/Graduation Project Dragons/wwwroot/Video/{v_Name}.mp3"; bool mp3_Found = File.Exists(m_Path) ? true : false; string w_Path = @$ "C:/Users/Nour El-Din/Downloads/Graduation-Project-Dragons-master/Graduation Project Dragons/wwwroot/Video/{v_Name}.wav"; bool wav_Found = File.Exists(w_Path) ? true : false; string strCmdText; if (!mp3_Found) { strCmdText = $"/c ffmpeg -i {v_Name}.mp4 {v_Name}.mp3"; System.Diagnostics.Process.Start("CMD.exe", strCmdText); } else if (!wav_Found) { strCmdText = $"/c ffmpeg -i {v_Name}.mp3 -acodec pcm_s16le -ac 1 -ar 16000 {v_Name}.wav"; System.Diagnostics.Process.Start("CMD.exe", strCmdText); } else { string audioFile = $"C:/Users/Nour El-Din/Downloads/Graduation-Project-Dragons-master/Graduation Project Dragons/wwwroot/Video/{v_Name}.wav"; var waveBuffer = new WaveBuffer(File.ReadAllBytes(audioFile)); using (var waveInfo = new WaveFileReader(audioFile)) { Console.WriteLine("Running inference...."); stopwatch.Start(); string w = ""; string speechResult; // sphinx-doc: csharp_ref_inference_start if (extended) { Metadata metaResult = sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2), 1); speechResult = MetadataToString(metaResult.Transcripts[0]); data new_Data = new data(); List <data> full_Text = new List <data>(); double word_Start = 0;; double word_End; string temp = ""; int counter = 0; int counter2 = 0; foreach (var c in metaResult.Transcripts[0].Tokens) { counter2++; if (counter == 0) { word_Start = c.StartTime; counter++; } temp += c.Text; if (c.Text == " ") { counter = 0; word_End = c.StartTime; new_Data.start = word_Start; new_Data.end = word_End; new_Data.word = temp; full_Text.Add(new_Data); temp = ""; new_Data = new data(); continue; } w += c.Text; if (counter2 == metaResult.Transcripts[0].Tokens.Length - 1) { word_End = c.StartTime; new_Data.start = word_Start; new_Data.end = word_End; new_Data.word = temp; full_Text.Add(new_Data); temp = ""; } } } else { speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2)); } // sphinx-doc: csharp_ref_inference_stop stopwatch.Stop(); Console.WriteLine($"Audio duration: {waveInfo.TotalTime.ToString()}"); Console.WriteLine($"Inference took: {stopwatch.Elapsed.ToString()}"); Console.WriteLine((extended ? $"Extended result: " : "Recognized text: ") + speechResult); return(w); } waveBuffer.Clear(); } } } catch (Exception ex) { Console.WriteLine(ex.Message); } return(" "); }
public static string Main1(string s) { string model = null; bool extended = false; model = "C:/Users/Nour El-Din/Documents/deepspeech/deepspeech-0.9.3-models.pbmm"; var scorer = "C:/Users/Nour El-Din/Documents/deepspeech/deepspeech-0.9.3-models.scorer"; extended = true; string wwwPath = HomeController.Environment.WebRootPath; string path = Path.Combine(HomeController.Environment.WebRootPath, "Video"); Stopwatch stopwatch = new Stopwatch(); try { Console.WriteLine("Loading model..."); stopwatch.Start(); // sphinx-doc: csharp_ref_model_start using (IDeepSpeech sttClient = new DeepSpeech(model)) { // sphinx-doc: csharp_ref_model_stop stopwatch.Stop(); Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms"); stopwatch.Reset(); if (scorer != null) { Console.WriteLine("Loading scorer..."); sttClient.EnableExternalScorer(scorer); } Directory.SetCurrentDirectory(path); string v_Name = s.Split('.')[0]; string x = path + @$ "/{v_Name}.mp4"; string m_Path = path + @$ "/{v_Name}.mp3"; bool mp3_Found = File.Exists(m_Path) ? true : false; string w_Path = path + @$ "/{v_Name}.wav"; bool wav_Found = File.Exists(w_Path) ? true : false; string strCmdText; if (!mp3_Found) { strCmdText = $"/c ffmpeg -i {v_Name}.mp4 {v_Name}.mp3"; Process process = new Process(); process.StartInfo.FileName = "cmd.exe"; process.StartInfo.WindowStyle = ProcessWindowStyle.Hidden; process.StartInfo.Arguments = strCmdText; process.Start(); process.WaitForExit(); } if (!wav_Found) { strCmdText = $"/c ffmpeg -i {v_Name}.mp3 -acodec pcm_s16le -ac 1 -ar 16000 {v_Name}.wav"; Process process = new Process(); process.StartInfo.FileName = "cmd.exe"; process.StartInfo.WindowStyle = ProcessWindowStyle.Hidden; process.StartInfo.Arguments = strCmdText; process.Start(); process.WaitForExit(); } string audioFile = path + $"/{v_Name}.wav"; var waveBuffer = new WaveBuffer(File.ReadAllBytes(audioFile)); using (var waveInfo = new WaveFileReader(audioFile)) { Console.WriteLine("Running inference...."); stopwatch.Start(); string w = ""; string speechResult; // sphinx-doc: csharp_ref_inference_start if (extended) { Metadata metaResult = sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2), 1); speechResult = MetadataToString(metaResult.Transcripts[0]); data new_Data = new data(); List <data> full_Text = new List <data>(); double word_Start = 0;; double word_End; string temp = ""; int counter = 0; int counter2 = 0; int count = 0; foreach (var c in metaResult.Transcripts[0].Tokens) { counter2++; if (counter == 0) { word_Start = c.StartTime; counter++; } temp += c.Text; if (c.Text == " ") { counter = 0; word_End = c.StartTime; new_Data.start = word_Start; new_Data.end = word_End; new_Data.word = temp; full_Text.Add(new_Data); w += temp + ','; w += (double)word_Start + ","; w += (double)word_End + "/"; temp = ""; new_Data = new data(); continue; } //w += c.Text; if (counter2 == metaResult.Transcripts[0].Tokens.Length) { word_End = c.StartTime; new_Data.start = word_Start; new_Data.end = word_End; new_Data.word = temp; full_Text.Add(new_Data); w += temp + ','; w += (double)word_Start + ","; w += (double)word_End + "/"; temp = ""; } count++; } } else { speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2)); } // sphinx-doc: csharp_ref_inference_stop stopwatch.Stop(); Console.WriteLine($"Audio duration: {waveInfo.TotalTime.ToString()}"); Console.WriteLine($"Inference took: {stopwatch.Elapsed.ToString()}"); Console.WriteLine((extended ? $"Extended result: " : "Recognized text: ") + speechResult); waveBuffer.Clear(); return(w); } } } catch (Exception ex) { Console.WriteLine(ex.Message); } return(" "); }