public void Process() { // Not initialized yet if (numTracks == 0) { return; } foreach (var buf in trackBuffers) { Array.Clear(buf, 0, buf.Length); } audio.Clear(); foreach (var c in dsChannels) { if (c.OwnerIdx != 0xFF) { c.Process(trackBuffers[c.OwnerIdx]); } } // Reverb only applies to DirectSound for (int i = 0; i < numTracks; i++) { reverbs[i]?.Process(trackBuffers[i], SamplesPerBuffer); } foreach (var c in gbChannels) { if (c.OwnerIdx != 0xFF) { c.Process(trackBuffers[c.OwnerIdx]); } } for (int i = 0; i < numTracks; i++) { if (mutes[i]) { continue; } var buf = trackBuffers[i]; for (int j = 0; j < SamplesPerBuffer; j++) { audio.FloatBuffer[j * 2] += buf[j * 2] * MasterVolume; audio.FloatBuffer[j * 2 + 1] += buf[j * 2 + 1] * MasterVolume; } } buffer.AddSamples(audio, 0, audio.ByteBufferCount); }
public void Process(Track[] tracks, bool output, bool recording) { _audio.Clear(); float masterStep; float masterLevel; if (_isFading && _fadeMicroFramesLeft == 0) { masterStep = 0; masterLevel = 0; } else { float fromMaster = 1f; float toMaster = 1f; if (_fadeMicroFramesLeft > 0) { const float scale = 10f / 6f; fromMaster *= (_fadePos < 0f) ? 0f : (float)Math.Pow(_fadePos, scale); _fadePos += _fadeStepPerMicroframe; toMaster *= (_fadePos < 0f) ? 0f : (float)Math.Pow(_fadePos, scale); _fadeMicroFramesLeft--; } masterStep = (toMaster - fromMaster) * _samplesReciprocal; masterLevel = fromMaster; } for (int i = 0; i < Player.NumTracks; i++) { Track track = tracks[i]; if (track.Enabled && track.NoteDuration != 0 && !track.Channel.Stopped && !Mutes[i]) { float level = masterLevel; float[] buf = _trackBuffers[i]; Array.Clear(buf, 0, buf.Length); track.Channel.Process(buf); for (int j = 0; j < SamplesPerBuffer; j++) { _audio.FloatBuffer[j * 2] += buf[j * 2] * level; _audio.FloatBuffer[(j * 2) + 1] += buf[(j * 2) + 1] * level; level += masterStep; } } } if (output) { _buffer.AddSamples(_audio.ByteBuffer, 0, _audio.ByteBufferCount); } if (recording) { _waveWriter.Write(_audio.ByteBuffer, 0, _audio.ByteBufferCount); } }
internal static void Process() { foreach (var buf in trackBuffers) { Array.Clear(buf, 0, buf.Length); } audio.Clear(); foreach (var c in dsChannels) { if (c.OwnerIdx != 0xFF) { c.Process(trackBuffers[c.OwnerIdx]); } } // Reverb only applies to DirectSound for (int i = 0; i < trackBuffers.Length; i++) { reverbs[i]?.Process(trackBuffers[i], (int)SamplesPerBuffer); } foreach (var c in gbChannels) { if (c.OwnerIdx != 0xFF) { c.Process(trackBuffers[c.OwnerIdx]); } } for (int i = 0; i < MAX_TRACKS; i++) { if (mutes[i]) { continue; } var buf = trackBuffers[i]; for (int j = 0; j < SamplesPerBuffer; j++) { audio.FloatBuffer[j * 2] += buf[j * 2] * MasterVolume; audio.FloatBuffer[j * 2 + 1] += buf[j * 2 + 1] * MasterVolume; } } buffer.AddSamples(audio, 0, audio.ByteBufferCount); }
static void Main(string[] args) { using (IDeepSpeech sttClient = new DeepSpeech(DEEPSPEECH_MODEL)) { sttClient.EnableExternalScorer(DEEPSPEECH_SCORER); string AudioFilePath = @"audio\2830-3980-0043.wav"; WaveBuffer waveBuffer = new WaveBuffer(File.ReadAllBytes(AudioFilePath)); string speechResult = sttClient.SpeechToText( waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2)); Console.WriteLine(speechResult); waveBuffer.Clear(); } }
private List <String> Transcribe_Offline() { List <String> result = new List <string>(); var waveBuffer = new WaveBuffer(File.ReadAllBytes(tmpWavFilePath)); using (var waveInfo = new WaveFileReader(tmpWavFilePath)) { Metadata metaResult = _sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2), 16000); List <CandidateTranscript> candidateTranscriptions = metaResult.Transcripts.ToList(); candidateTranscriptions.OrderByDescending(x => x.Confidence); foreach (CandidateTranscript ct in candidateTranscriptions) { result.Add(MetadataToString(ct)); } } waveBuffer.Clear(); return(result); }
public void Process(Track[] tracks, bool output, bool recording) { audio.Clear(); float fromMaster = 1f, toMaster = 1f; if (fadeMicroFramesLeft > 0) { const float scale = 10f / 6f; fromMaster *= (fadePos < 0f) ? 0f : (float)Math.Pow(fadePos, scale); fadePos += fadeStepPerMicroframe; toMaster *= (fadePos < 0f) ? 0f : (float)Math.Pow(fadePos, scale); fadeMicroFramesLeft--; } float masterStep = (toMaster - fromMaster) * samplesReciprocal; for (int i = 0; i < 0x10; i++) { Track track = tracks[i]; if (track.Enabled && track.NoteDuration != 0 && !track.Channel.Stopped && !Mutes[i]) { float masterLevel = fromMaster; float[] buf = trackBuffers[i]; Array.Clear(buf, 0, buf.Length); track.Channel.Process(buf); for (int j = 0; j < SamplesPerBuffer; j++) { audio.FloatBuffer[j * 2] += buf[j * 2] * masterLevel; audio.FloatBuffer[(j * 2) + 1] += buf[(j * 2) + 1] * masterLevel; masterLevel += masterStep; } } } if (output) { buffer.AddSamples(audio.ByteBuffer, 0, audio.ByteBufferCount); } if (recording) { waveWriter.Write(audio.ByteBuffer, 0, audio.ByteBufferCount); } }
//this is called from the soundcard public int Read(byte[] buffer, int offset, int count) { int samplesNeeded = count / 4; WaveBuffer wb = new WaveBuffer(buffer); //fix buffer size FMixerBuffer = BufferHelpers.Ensure(FMixerBuffer, samplesNeeded); //empty buffer wb.Clear(); lock (source) { var inputCount = source.Count; //var invCount = 1.0f/inputCount; for (int i = 0; i < inputCount; i++) { if (source[i] != null) { //starts the calculation of the audio graph source[i].Read(FMixerBuffer, offset / 4, samplesNeeded); //add to output buffer for (int j = 0; j < samplesNeeded; j++) { wb.FloatBuffer[j] += FMixerBuffer[j]; FMixerBuffer[j] = 0; } } } //tell the engine that reading has finished FReadingFinished(); } return(count); //always run }
static void Main(string[] args) { string model = null; string lm = null; string trie = null; string audio = null; bool extended = false; if (args.Length > 0) { model = GetArgument(args, "--model"); lm = GetArgument(args, "--lm"); trie = GetArgument(args, "--trie"); audio = GetArgument(args, "--audio"); extended = !string.IsNullOrWhiteSpace(GetArgument(args, "--extended")); } const uint BEAM_WIDTH = 500; const float LM_ALPHA = 0.75f; const float LM_BETA = 1.85f; Stopwatch stopwatch = new Stopwatch(); using (IDeepSpeech sttClient = new DeepSpeech()) { try { Console.WriteLine("Loading model..."); stopwatch.Start(); sttClient.CreateModel( model ?? "output_graph.pbmm", BEAM_WIDTH); stopwatch.Stop(); Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms"); stopwatch.Reset(); if (lm != null) { Console.WriteLine("Loadin LM..."); sttClient.EnableDecoderWithLM( lm ?? "lm.binary", trie ?? "trie", LM_ALPHA, LM_BETA); } string audioFile = audio ?? "arctic_a0024.wav"; var waveBuffer = new WaveBuffer(File.ReadAllBytes(audioFile)); using (var waveInfo = new WaveFileReader(audioFile)) { Console.WriteLine("Running inference...."); stopwatch.Start(); string speechResult; if (extended) { Metadata metaResult = sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2)); speechResult = MetadataToString(metaResult); } else { speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2)); } stopwatch.Stop(); Console.WriteLine($"Audio duration: {waveInfo.TotalTime.ToString()}"); Console.WriteLine($"Inference took: {stopwatch.Elapsed.ToString()}"); Console.WriteLine((extended ? $"Extended result: " : "Recognized text: ") + speechResult); } waveBuffer.Clear(); } catch (Exception ex) { Console.WriteLine(ex.Message); } } }
public void Process(bool output, bool recording) { for (int i = 0; i < _trackBuffers.Length; i++) { float[] buf = _trackBuffers[i]; Array.Clear(buf, 0, buf.Length); } _audio.Clear(); for (int i = 0; i < _pcm8Channels.Length; i++) { PCM8Channel c = _pcm8Channels[i]; if (c.Owner != null) { c.Process(_trackBuffers[c.Owner.Index]); } } for (int i = 0; i < _psgChannels.Length; i++) { PSGChannel c = _psgChannels[i]; if (c.Owner != null) { c.Process(_trackBuffers[c.Owner.Index]); } } float masterStep; float masterLevel; if (_isFading && _fadeMicroFramesLeft == 0) { masterStep = 0; masterLevel = 0; } else { float fromMaster = 1f; float toMaster = 1f; if (_fadeMicroFramesLeft > 0) { const float scale = 10f / 6f; fromMaster *= (_fadePos < 0f) ? 0f : (float)Math.Pow(_fadePos, scale); _fadePos += _fadeStepPerMicroframe; toMaster *= (_fadePos < 0f) ? 0f : (float)Math.Pow(_fadePos, scale); _fadeMicroFramesLeft--; } masterStep = (toMaster - fromMaster) * _samplesReciprocal; masterLevel = fromMaster; } for (int i = 0; i < _trackBuffers.Length; i++) { if (!Mutes[i]) { float level = masterLevel; float[] buf = _trackBuffers[i]; for (int j = 0; j < SamplesPerBuffer; j++) { _audio.FloatBuffer[j * 2] += buf[j * 2] * level; _audio.FloatBuffer[(j * 2) + 1] += buf[(j * 2) + 1] * level; level += masterStep; } } } if (output) { _buffer.AddSamples(_audio.ByteBuffer, 0, _audio.ByteBufferCount); } if (recording) { _waveWriter.Write(_audio.ByteBuffer, 0, _audio.ByteBufferCount); } }
static void Main(string[] args) { string model = null; string alphabet = null; string lm = null; string trie = null; string audio = null; if (args.Length > 0) { model = GetArgument(args, "--model"); alphabet = GetArgument(args, "--alphabet"); lm = GetArgument(args, "--lm"); trie = GetArgument(args, "--trie"); audio = GetArgument(args, "--audio"); } const uint N_CEP = 26; const uint N_CONTEXT = 9; const uint BEAM_WIDTH = 200; const float LM_ALPHA = 0.75f; const float LM_BETA = 1.85f; Stopwatch stopwatch = new Stopwatch(); using (IDeepSpeech sttClient = new DeepSpeech()) { var result = 1; Console.WriteLine("Loading model..."); stopwatch.Start(); try { result = sttClient.CreateModel( model ?? "output_graph.pbmm", N_CEP, N_CONTEXT, alphabet ?? "alphabet.txt", BEAM_WIDTH); } catch (IOException ex) { Console.WriteLine("Error loading lm."); Console.WriteLine(ex.Message); } stopwatch.Stop(); if (result == 0) { Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms"); stopwatch.Reset(); if (lm != null) { Console.WriteLine("Loadin LM..."); try { result = sttClient.EnableDecoderWithLM( alphabet ?? "alphabet.txt", lm ?? "lm.binary", trie ?? "trie", LM_ALPHA, LM_BETA); } catch (IOException ex) { Console.WriteLine("Error loading lm."); Console.WriteLine(ex.Message); } } string audioFile = audio ?? "arctic_a0024.wav"; var waveBuffer = new WaveBuffer(File.ReadAllBytes(audioFile)); using (var waveInfo = new WaveFileReader(audioFile)) { Console.WriteLine("Running inference...."); stopwatch.Start(); string speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2), 16000); stopwatch.Stop(); Console.WriteLine($"Audio duration: {waveInfo.TotalTime.ToString()}"); Console.WriteLine($"Inference took: {stopwatch.Elapsed.ToString()}"); Console.WriteLine($"Recognized text: {speechResult}"); } waveBuffer.Clear(); } else { Console.WriteLine("Error loding the model."); } } }
static void Main(string[] args) { string model = null; string scorer = null; string audio = null; bool extended = false; if (args.Length > 0) { model = GetArgument(args, "--model"); scorer = GetArgument(args, "--scorer"); audio = GetArgument(args, "--audio"); extended = !string.IsNullOrWhiteSpace(GetArgument(args, "--extended")); } Stopwatch stopwatch = new Stopwatch(); try { Console.WriteLine("Loading model..."); stopwatch.Start(); // sphinx-doc: csharp_ref_model_start using IDeepSpeech sttClient = new DeepSpeech(model ?? "output_graph.pbmm"); // sphinx-doc: csharp_ref_model_stop stopwatch.Stop(); Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms"); stopwatch.Reset(); if (scorer != null) { Console.WriteLine("Loading scorer..."); sttClient.EnableExternalScorer(scorer ?? "kenlm.scorer"); } string audioFile = audio ?? "new-home-in-the-stars-16k.wav"; var waveBuffer = new WaveBuffer(File.ReadAllBytes(audioFile)); using (var waveInfo = new WaveFileReader(audioFile)) { Console.WriteLine("Running inference...."); stopwatch.Start(); string speechResult; // sphinx-doc: csharp_ref_inference_start if (extended) { Metadata metaResult = sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2), 1); speechResult = MetadataToString(metaResult.Transcripts[0]); } else { speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2)); } // sphinx-doc: csharp_ref_inference_stop stopwatch.Stop(); Console.WriteLine($"Audio duration: {waveInfo.TotalTime.ToString()}"); Console.WriteLine($"Inference took: {stopwatch.Elapsed.ToString()}"); Console.WriteLine((extended ? $"Extended result: " : "Recognized text: ") + speechResult); } waveBuffer.Clear(); } catch (Exception ex) { Console.WriteLine(ex.Message); } }
static void Main(string[] args) { FFMPEG ffmpeg = new FFMPEG("ffmpeg.exe"); pythonProcess = new ProcessStartInfo { FileName = "python.exe", CreateNoWindow = true, // No window UseShellExecute = false, RedirectStandardOutput = true }; var dirs = Directory.GetFileSystemEntries("test-clean/LibriSpeech/test-clean", "*.txt", SearchOption.AllDirectories); IDictionary <string, string> dataset = new Dictionary <string, string>(); foreach (var transcriptionFile in dirs) { FileInfo fileInf = new FileInfo(transcriptionFile); foreach (var sentenceLine in File.ReadAllLines(transcriptionFile)) { var sentenceSplit = sentenceLine.Split(' '); string audioName = fileInf.FullName.Replace(fileInf.Name, $"{sentenceSplit[0]}.flac"); string sentence = string.Join(" ", sentenceSplit.ToList().Skip(1).ToArray()).ToLower(); dataset.Add(audioName, sentence); } } const uint N_CEP = 26; const uint N_CONTEXT = 9; const uint BEAM_WIDTH = 200; const float LM_ALPHA = 0.75f; const float LM_BETA = 1.85f; const string modelVersion = "0.4.1"; List <Sentence> samples = new List <Sentence>(); using (var sttClient = new DeepSpeechClient.DeepSpeech()) { var result = 1; Console.WriteLine("Loading model..."); try { result = sttClient.CreateModel($"{modelVersion}/output_graph.pbmm", N_CEP, N_CONTEXT, $"{modelVersion}/alphabet.txt", BEAM_WIDTH); } catch (IOException ex) { Console.WriteLine("Error loading lm."); Console.WriteLine(ex.Message); } if (result == 0) { Console.WriteLine("Loadin LM..."); try { result = sttClient.EnableDecoderWithLM( $"{modelVersion}/alphabet.txt", $"{modelVersion}/lm.binary", $"{modelVersion}/trie", LM_ALPHA, LM_BETA); } catch (IOException ex) { Console.WriteLine("Error loading lm."); Console.WriteLine(ex.Message); } foreach (var sentencePair in dataset) { ConvertFileToWav(sentencePair.Key, ffmpeg); var waveBuffer = new WaveBuffer(File.ReadAllBytes(TempAudioFile)); Console.WriteLine("Running inference...."); string speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2), 16000); Sentence sentenceResult = RunPythonWER(sentencePair.Value, speechResult); Console.WriteLine("================================================================================"); Console.WriteLine($"Recognized text: {speechResult}"); Console.WriteLine($"Correct text: {sentencePair.Value}"); Console.WriteLine($"WER {Math.Round(sentenceResult.Wer,2)*100} %"); Console.WriteLine("================================================================================"); Console.WriteLine(); samples.Add(sentenceResult); waveBuffer.Clear(); } } else { Console.WriteLine("Error loding the model."); } } double totalLevenshtein = samples.Select(x => x.Levenshtein).Sum(); int totalLabelLength = samples.Select(x => x.Length).Sum(); double finalWer = totalLevenshtein / totalLabelLength; File.WriteAllText("result.txt", finalWer.ToString(), Encoding.UTF8); Console.WriteLine($"Final WER: {finalWer} %"); Console.ReadKey(); }
//this gets called from the soundcard public int Read(byte[] buffer, int offset, int count) { var channels = WaveFormat.Channels; int samplesNeeded = count / (4 * channels); WaveBuffer wb = new WaveBuffer(buffer); //fix buffer size FMixerBuffer = BufferHelpers.Ensure(FMixerBuffer, samplesNeeded); //empty buffer wb.Clear(); lock (FSourceLock) { //first notify to prepare for buffer foreach (var notify in FNotifys) { try { notify.NotifyProcess(samplesNeeded); } catch (Exception e) { System.Diagnostics.Debug.WriteLine(e.Message); System.Diagnostics.Debug.WriteLine(e.Source); System.Diagnostics.Debug.WriteLine(e.StackTrace); } } //evaluate the sinks, //e.g. buffer writers should write first to have the latest data in the buffer storage foreach (var sink in FSinks) { try { sink.Read(offset / 4, samplesNeeded); } catch (Exception e) { System.Diagnostics.Debug.WriteLine(e.Message); System.Diagnostics.Debug.WriteLine(e.Source); System.Diagnostics.Debug.WriteLine(e.StackTrace); } } //evaluate the inputs var inputCount = FSources.Count; for (int i = 0; i < inputCount; i++) { try { if (FSources[i].Signal != null) { //starts the calculation of the audio graph FSources[i].Signal.Read(FMixerBuffer, offset / 4, samplesNeeded); var chan = FSources[i].Channel % channels; //add to output buffer for (int j = 0; j < samplesNeeded; j++) { wb.FloatBuffer[j * channels + chan] += FMixerBuffer[j]; FMixerBuffer[j] = 0; } } } catch (Exception e) { System.Diagnostics.Debug.WriteLine(e.Message); System.Diagnostics.Debug.WriteLine(e.Source); System.Diagnostics.Debug.WriteLine(e.StackTrace); } } //tell the engine that reading has finished FReadingFinished(samplesNeeded); } return(count); //always run }
public static string Main1(string s) { string model = null; string audio = null; string hotwords = null; bool extended = false; model = "C:/Users/Nour El-Din/Documents/deepspeech/deepspeech-0.9.3-models.pbmm"; var scorer = "C:/Users/Nour El-Din/Documents/deepspeech/deepspeech-0.9.3-models.scorer"; audio = "C:/Users/Nour El-Din/Documents/deepspeech/audio"; //hotwords = GetArgument(args, "--hot_words"); extended = true; Stopwatch stopwatch = new Stopwatch(); try { Console.WriteLine("Loading model..."); stopwatch.Start(); // sphinx-doc: csharp_ref_model_start using (IDeepSpeech sttClient = new DeepSpeech("C:/Users/Nour El-Din/Documents/deepspeech/deepspeech-0.9.3-models.pbmm")) { // sphinx-doc: csharp_ref_model_stop stopwatch.Stop(); Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms"); stopwatch.Reset(); if (scorer != null) { Console.WriteLine("Loading scorer..."); sttClient.EnableExternalScorer("C:/Users/Nour El-Din/Documents/deepspeech/deepspeech-0.9.3-models.scorer"); } if (hotwords != null) { Console.WriteLine($"Adding hot-words {hotwords}"); char[] sep = { ',' }; string[] word_boosts = hotwords.Split(sep); foreach (string word_boost in word_boosts) { char[] sep1 = { ':' }; string[] word = word_boost.Split(sep1); sttClient.AddHotWord(word[0], float.Parse(word[1])); } } Directory.SetCurrentDirectory("C:/Users/Nour El-Din/Downloads/Graduation-Project-Dragons-master/Graduation Project Dragons/wwwroot/Video"); string v_Name = s.Split('.')[0]; var enviroment = System.Environment.CurrentDirectory; string projectDirectory = Directory.GetParent(enviroment).Parent.FullName; Console.WriteLine(projectDirectory); string m_Path = @$ "C:/Users/Nour El-Din/Downloads/Graduation-Project-Dragons-master/Graduation Project Dragons/wwwroot/Video/{v_Name}.mp3"; bool mp3_Found = File.Exists(m_Path) ? true : false; string w_Path = @$ "C:/Users/Nour El-Din/Downloads/Graduation-Project-Dragons-master/Graduation Project Dragons/wwwroot/Video/{v_Name}.wav"; bool wav_Found = File.Exists(w_Path) ? true : false; string strCmdText; if (!mp3_Found) { strCmdText = $"/c ffmpeg -i {v_Name}.mp4 {v_Name}.mp3"; System.Diagnostics.Process.Start("CMD.exe", strCmdText); } else if (!wav_Found) { strCmdText = $"/c ffmpeg -i {v_Name}.mp3 -acodec pcm_s16le -ac 1 -ar 16000 {v_Name}.wav"; System.Diagnostics.Process.Start("CMD.exe", strCmdText); } else { string audioFile = $"C:/Users/Nour El-Din/Downloads/Graduation-Project-Dragons-master/Graduation Project Dragons/wwwroot/Video/{v_Name}.wav"; var waveBuffer = new WaveBuffer(File.ReadAllBytes(audioFile)); using (var waveInfo = new WaveFileReader(audioFile)) { Console.WriteLine("Running inference...."); stopwatch.Start(); string w = ""; string speechResult; // sphinx-doc: csharp_ref_inference_start if (extended) { Metadata metaResult = sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2), 1); speechResult = MetadataToString(metaResult.Transcripts[0]); data new_Data = new data(); List <data> full_Text = new List <data>(); double word_Start = 0;; double word_End; string temp = ""; int counter = 0; int counter2 = 0; foreach (var c in metaResult.Transcripts[0].Tokens) { counter2++; if (counter == 0) { word_Start = c.StartTime; counter++; } temp += c.Text; if (c.Text == " ") { counter = 0; word_End = c.StartTime; new_Data.start = word_Start; new_Data.end = word_End; new_Data.word = temp; full_Text.Add(new_Data); temp = ""; new_Data = new data(); continue; } w += c.Text; if (counter2 == metaResult.Transcripts[0].Tokens.Length - 1) { word_End = c.StartTime; new_Data.start = word_Start; new_Data.end = word_End; new_Data.word = temp; full_Text.Add(new_Data); temp = ""; } } } else { speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2)); } // sphinx-doc: csharp_ref_inference_stop stopwatch.Stop(); Console.WriteLine($"Audio duration: {waveInfo.TotalTime.ToString()}"); Console.WriteLine($"Inference took: {stopwatch.Elapsed.ToString()}"); Console.WriteLine((extended ? $"Extended result: " : "Recognized text: ") + speechResult); return(w); } waveBuffer.Clear(); } } } catch (Exception ex) { Console.WriteLine(ex.Message); } return(" "); }
public static string Main1(string s) { string model = null; bool extended = false; model = "C:/Users/Nour El-Din/Documents/deepspeech/deepspeech-0.9.3-models.pbmm"; var scorer = "C:/Users/Nour El-Din/Documents/deepspeech/deepspeech-0.9.3-models.scorer"; extended = true; string wwwPath = HomeController.Environment.WebRootPath; string path = Path.Combine(HomeController.Environment.WebRootPath, "Video"); Stopwatch stopwatch = new Stopwatch(); try { Console.WriteLine("Loading model..."); stopwatch.Start(); // sphinx-doc: csharp_ref_model_start using (IDeepSpeech sttClient = new DeepSpeech(model)) { // sphinx-doc: csharp_ref_model_stop stopwatch.Stop(); Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms"); stopwatch.Reset(); if (scorer != null) { Console.WriteLine("Loading scorer..."); sttClient.EnableExternalScorer(scorer); } Directory.SetCurrentDirectory(path); string v_Name = s.Split('.')[0]; string x = path + @$ "/{v_Name}.mp4"; string m_Path = path + @$ "/{v_Name}.mp3"; bool mp3_Found = File.Exists(m_Path) ? true : false; string w_Path = path + @$ "/{v_Name}.wav"; bool wav_Found = File.Exists(w_Path) ? true : false; string strCmdText; if (!mp3_Found) { strCmdText = $"/c ffmpeg -i {v_Name}.mp4 {v_Name}.mp3"; Process process = new Process(); process.StartInfo.FileName = "cmd.exe"; process.StartInfo.WindowStyle = ProcessWindowStyle.Hidden; process.StartInfo.Arguments = strCmdText; process.Start(); process.WaitForExit(); } if (!wav_Found) { strCmdText = $"/c ffmpeg -i {v_Name}.mp3 -acodec pcm_s16le -ac 1 -ar 16000 {v_Name}.wav"; Process process = new Process(); process.StartInfo.FileName = "cmd.exe"; process.StartInfo.WindowStyle = ProcessWindowStyle.Hidden; process.StartInfo.Arguments = strCmdText; process.Start(); process.WaitForExit(); } string audioFile = path + $"/{v_Name}.wav"; var waveBuffer = new WaveBuffer(File.ReadAllBytes(audioFile)); using (var waveInfo = new WaveFileReader(audioFile)) { Console.WriteLine("Running inference...."); stopwatch.Start(); string w = ""; string speechResult; // sphinx-doc: csharp_ref_inference_start if (extended) { Metadata metaResult = sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2), 1); speechResult = MetadataToString(metaResult.Transcripts[0]); data new_Data = new data(); List <data> full_Text = new List <data>(); double word_Start = 0;; double word_End; string temp = ""; int counter = 0; int counter2 = 0; int count = 0; foreach (var c in metaResult.Transcripts[0].Tokens) { counter2++; if (counter == 0) { word_Start = c.StartTime; counter++; } temp += c.Text; if (c.Text == " ") { counter = 0; word_End = c.StartTime; new_Data.start = word_Start; new_Data.end = word_End; new_Data.word = temp; full_Text.Add(new_Data); w += temp + ','; w += (double)word_Start + ","; w += (double)word_End + "/"; temp = ""; new_Data = new data(); continue; } //w += c.Text; if (counter2 == metaResult.Transcripts[0].Tokens.Length) { word_End = c.StartTime; new_Data.start = word_Start; new_Data.end = word_End; new_Data.word = temp; full_Text.Add(new_Data); w += temp + ','; w += (double)word_Start + ","; w += (double)word_End + "/"; temp = ""; } count++; } } else { speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2)); } // sphinx-doc: csharp_ref_inference_stop stopwatch.Stop(); Console.WriteLine($"Audio duration: {waveInfo.TotalTime.ToString()}"); Console.WriteLine($"Inference took: {stopwatch.Elapsed.ToString()}"); Console.WriteLine((extended ? $"Extended result: " : "Recognized text: ") + speechResult); waveBuffer.Clear(); return(w); } } } catch (Exception ex) { Console.WriteLine(ex.Message); } return(" "); }
public void Process(bool output, bool recording) { for (int i = 0; i < trackBuffers.Length; i++) { float[] buf = trackBuffers[i]; Array.Clear(buf, 0, buf.Length); } audio.Clear(); for (int i = 0; i < pcm8Channels.Length; i++) { PCM8Channel c = pcm8Channels[i]; if (c.Owner != null) { c.Process(trackBuffers[c.Owner.Index]); } } for (int i = 0; i < psgChannels.Length; i++) { PSGChannel c = psgChannels[i]; if (c.Owner != null) { c.Process(trackBuffers[c.Owner.Index]); } } float fromMaster = 1f, toMaster = 1f; if (fadeMicroFramesLeft > 0) { const float scale = 10f / 6f; fromMaster *= (fadePos < 0f) ? 0f : (float)Math.Pow(fadePos, scale); fadePos += fadeStepPerMicroframe; toMaster *= (fadePos < 0f) ? 0f : (float)Math.Pow(fadePos, scale); fadeMicroFramesLeft--; } float masterStep = (toMaster - fromMaster) * samplesReciprocal; for (int i = 0; i < trackBuffers.Length; i++) { if (!Mutes[i]) { float masterLevel = fromMaster; float[] buf = trackBuffers[i]; for (int j = 0; j < SamplesPerBuffer; j++) { audio.FloatBuffer[j * 2] += buf[j * 2] * masterLevel; audio.FloatBuffer[(j * 2) + 1] += buf[(j * 2) + 1] * masterLevel; masterLevel += masterStep; } } } if (output) { buffer.AddSamples(audio.ByteBuffer, 0, audio.ByteBufferCount); } if (recording) { waveWriter.Write(audio.ByteBuffer, 0, audio.ByteBufferCount); } }