private string CreateTextPartFilesFromSnippet(string ExportFolder, TrackSnippetViewModel snip, MessageBoxResult yesIfVoice, ref int index) { var transcriptionsFileText = ""; foreach (var tp in snip.TextParts) { if (!tp.IsOK || string.IsNullOrWhiteSpace(tp.Text)) { continue; } var fileNumberString = $"{index.ToString().PadLeft(4, '0')}"; var fileName = Path.Combine(ExportFolder, $"{fileNumberString}.wav"); //Path.GetFileName(snip.FilePath); if (yesIfVoice == MessageBoxResult.Yes) { transcriptionsFileText += $"{fileNumberString}\t{tp.Text.Trim()}{Environment.NewLine}"; // is for Custom Voice } else { transcriptionsFileText += $"{fileNumberString}.wav\t{tp.Text.Trim()}{Environment.NewLine}"; // is for Acoutic Dataset } WavFileUtils.TakeClipAddSilence(snip.FilePath, TimeSpan.FromMilliseconds(800), TimeSpan.FromMilliseconds(tp.StartMills), TimeSpan.FromMilliseconds(tp.TextWidth), fileName); index++; } return(transcriptionsFileText); }
private void MakeSnippetAudioFiles( TrackSnippetViewModel snippet, int ix) { var newProjectFolder = Path.Combine(Settings.Default.ProjectsFolder, SelectedProject.Name, $"{DateTime.Now.ToString("ddMMyyyyHHmmss")}"); Directory.CreateDirectory(newProjectFolder); snippet.FilePath = Path.Combine(newProjectFolder, $"{Path.GetFileNameWithoutExtension(AudioFilePath)}_{ix++}.wav"); var startTime = TimeSpan.FromTicks(snippet.AudioSnippet.OffsetInTicks); WavFileUtils.TakeClipAddSilence(AudioFilePath, TimeSpan.Zero, startTime, snippet.AudioSnippet.Duration, snippet.FilePath); }
private async Task GetTranscriptionFromAzure(TaskCompletionSource <int> stopRecognition, List <TrackSnippetViewModel> newCollection) { // Creates an instance of a speech config with specified subscription key and service region. // Replace with your own subscription key and service region (e.g., "westus"). var config = SpeechConfig.FromSubscription(TranscribeEndpointsVM.SelectedTranscribeEndpoint.Key, TranscribeEndpointsVM.SelectedTranscribeEndpoint.Region); if (!string.IsNullOrWhiteSpace(TranscribeEndpointsVM.SelectedTranscribeEndpoint.Endpoint)) { config.EndpointId = TranscribeEndpointsVM.SelectedTranscribeEndpoint.Endpoint; } config.OutputFormat = OutputFormat.Detailed; var log = ""; double OnePercent = 100D / CurrentAudioVM.Duration.TimeSpan.Ticks; var tLog = new transLog { Started = DateTime.Now, FilePath = AudioFilePath }; try { if (Path.GetExtension(AudioFilePath).ToLower() != ".wav") { // extract the WAV var wavFileName = Path.Combine(Path.GetDirectoryName(AudioFilePath), Path.GetFileNameWithoutExtension(AudioFilePath) + ".wav"); WavFileUtils.ExtractWavFromMedia(AudioFilePath, wavFileName); AudioFilePath = wavFileName; } using (var audioInput = AudioConfig.FromWavFileInput(AudioFilePath)) { using (var recognizer = new SpeechRecognizer(config, audioInput)) { // Subscribes to events. recognizer.Recognizing += (s, e) => { transLogRow dyn = new transLogRow(); var txtCnt = textPartsAll.Count; var dur = e.Result.Duration; var totDone = TimeSpan.FromTicks(e.Result.OffsetInTicks + dur.Ticks); var perc = OnePercent * (dur.Ticks + e.Result.OffsetInTicks); TranscribeInfo = $"{Math.Floor(perc)}%: {newCollection.Count + 1}: {totDone.Hours}:{totDone.Minutes}:{totDone.Seconds}: {txtCnt}: {e.Result.Text}"; dyn.Args = e; var textBit = e.Result.Text; var ary = textBit.Split(' '); var lastGoodIx = 0; var goodTextParts = new List <TextPart>(); var isChange = false; for (var a = 0; a < ary.Length; a++) { if (a > txtCnt - 1) { dyn.Notes += $"[new={txtCnt - a}]"; break; } if (ary[a].Trim() != textPartsAll[a].Text.Trim()) { dyn.Notes += $"[chg={a}:{textPartsAll[a].Text}={ary[a]}]"; textPartsAll[a].Text = ary[a]; isChange = true; } goodTextParts.Add(textPartsAll[a]); lastGoodIx = a; } if (lastGoodIx < textPartsAll.Count - 1) { dyn.Notes += $"[lth chg]"; } double detectedEstimate; try { if (goodTextParts.Count >= ary.Length) { return; // nothing new } var newBit = ""; for (var x = goodTextParts.Count; x < ary.Length; x++) { newBit += ary[x] + " "; } // 200 magic number - identification time about 200 mills detectedEstimate = e.Result.Duration.TotalMilliseconds - 200; var sinceLastEvent = detectedEstimate - lastMillisecondsTotal; if (sinceLastEvent < 1) { sinceLastEvent = 1; } var cnt = goodTextParts.Count; if (cnt > 1 && !isChange) { if (sinceLastEvent < ADJUSTED_SIZE_MAX_WORD_LENGTH) { dyn.Notes += $"[adj={cnt - 1}, snc={sinceLastEvent}, lgt={goodTextParts[cnt - 1].TextWidth} - {ADJUSTED_SIZE_MAX_WORD_LENGTH - sinceLastEvent}]"; goodTextParts[cnt - 1].TextWidth -= (ADJUSTED_SIZE_MAX_WORD_LENGTH - sinceLastEvent); } } // Finally, save any new bit(s) goodTextParts.Add(new TextPart { Text = newBit.Trim(), StartMills = detectedEstimate, TextWidth = ADJUSTED_SIZE_MAX_WORD_LENGTH }); } catch (Exception ex) { throw; } finally { log += JsonConvert.SerializeObject(dyn) + Environment.NewLine; } lastMillisecondsTotal = detectedEstimate; // start point for next text // lastCharLength = textBit.Length; textPartsAll = goodTextParts; }; recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.RecognizedSpeech) { //Console.WriteLine($"RECOGNIZED: Text={e.Result.Text}"); var snippet = new TrackSnippetViewModel() { AudioSnippet = e.Result, Scale = scale, Gain = gain, RawText = e.Result.Text, OffsetInTicks = e.Result.OffsetInTicks }; snippet.DurationMilliseconds = e.Result.Duration.TotalMilliseconds; //WavFileUtils.GetSoundLength(AudioFilePath); //var rawAry = e.Result.Text.Split(' '); //if (rawAry.Length == textPartsAll.Count) //{ // for(var a = 0; a < rawAry.Length; a++) // { // textPartsAll[a].Text = rawAry[a]; // } //} //else //{ // // to do, merge tidied text back into recognised text //} snippet.TextParts = new ObservableCollection <TextPart>(textPartsAll); parentControl.Dispatcher.Invoke(() => newCollection.Add(snippet)); textPartsAll = new List <TextPart>(); lastMillisecondsTotal = 0; } else if (e.Result.Reason == ResultReason.NoMatch) { Console.WriteLine($"NOMATCH: Speech could not be recognized."); } }; recognizer.Canceled += (s, e) => { Console.WriteLine($"CANCELED: Reason={e.Reason}"); if (e.Reason == CancellationReason.Error) { MessageBox.Show($"FAILED: Are you using a valid subscription key and region? ErrorCode = {e.ErrorCode}: ErrorDetails = {e.ErrorDetails}"); } stopRecognition.TrySetResult(0); }; recognizer.SessionStarted += (s, e) => Console.WriteLine("\n Session started event."); recognizer.SessionStopped += (s, e) => { Console.WriteLine("\n Session stopped event."); Console.WriteLine("\nStop recognition."); stopRecognition.TrySetResult(0); }; // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition. await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); // Waits for completion. // Use Task.WaitAny to keep the task rooted. Task.WaitAny(new[] { stopRecognition.Task }); // Stops recognition. await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); } } } catch (Exception excp) { Debug.WriteLine($"{excp}"); } finally { if (Settings.Default.TranscriptionLogging) { try { var logFile = $"TranscriptionLog_{DateTime.Now.ToString($"{DateTime.Now.ToString("ddMMyyyyHHmmss")}")}.txt"; logFile = Path.Combine(Path.GetFullPath(SettingsVM.ProjectsFolder), logFile); File.WriteAllText(logFile, log); tLog.LogPath = logFile; SelectedProject.TranscribeLogs.Add(tLog); Debug.WriteLine($"Log file created at {logFile}"); } catch (Exception ex) { throw; } } } }