Exemple #1
0
        private string CreateTextPartFilesFromSnippet(string ExportFolder, TrackSnippetViewModel snip, MessageBoxResult yesIfVoice, ref int index)
        {
            var transcriptionsFileText = "";

            foreach (var tp in snip.TextParts)
            {
                if (!tp.IsOK || string.IsNullOrWhiteSpace(tp.Text))
                {
                    continue;
                }

                var fileNumberString = $"{index.ToString().PadLeft(4, '0')}";
                var fileName         = Path.Combine(ExportFolder, $"{fileNumberString}.wav"); //Path.GetFileName(snip.FilePath);
                if (yesIfVoice == MessageBoxResult.Yes)
                {
                    transcriptionsFileText += $"{fileNumberString}\t{tp.Text.Trim()}{Environment.NewLine}"; // is for Custom Voice
                }
                else
                {
                    transcriptionsFileText += $"{fileNumberString}.wav\t{tp.Text.Trim()}{Environment.NewLine}"; // is for Acoutic Dataset
                }

                WavFileUtils.TakeClipAddSilence(snip.FilePath, TimeSpan.FromMilliseconds(800), TimeSpan.FromMilliseconds(tp.StartMills), TimeSpan.FromMilliseconds(tp.TextWidth), fileName);

                index++;
            }

            return(transcriptionsFileText);
        }
Exemple #2
0
        private void MakeSnippetAudioFiles(
            TrackSnippetViewModel snippet,
            int ix)
        {
            var newProjectFolder = Path.Combine(Settings.Default.ProjectsFolder, SelectedProject.Name, $"{DateTime.Now.ToString("ddMMyyyyHHmmss")}");

            Directory.CreateDirectory(newProjectFolder);

            snippet.FilePath = Path.Combine(newProjectFolder, $"{Path.GetFileNameWithoutExtension(AudioFilePath)}_{ix++}.wav");

            var startTime = TimeSpan.FromTicks(snippet.AudioSnippet.OffsetInTicks);

            WavFileUtils.TakeClipAddSilence(AudioFilePath, TimeSpan.Zero, startTime, snippet.AudioSnippet.Duration, snippet.FilePath);
        }
Exemple #3
0
        private async Task GetTranscriptionFromAzure(TaskCompletionSource <int> stopRecognition, List <TrackSnippetViewModel> newCollection)
        {
            // Creates an instance of a speech config with specified subscription key and service region.
            // Replace with your own subscription key and service region (e.g., "westus").
            var config = SpeechConfig.FromSubscription(TranscribeEndpointsVM.SelectedTranscribeEndpoint.Key, TranscribeEndpointsVM.SelectedTranscribeEndpoint.Region);

            if (!string.IsNullOrWhiteSpace(TranscribeEndpointsVM.SelectedTranscribeEndpoint.Endpoint))
            {
                config.EndpointId = TranscribeEndpointsVM.SelectedTranscribeEndpoint.Endpoint;
            }
            config.OutputFormat = OutputFormat.Detailed;

            var    log        = "";
            double OnePercent = 100D / CurrentAudioVM.Duration.TimeSpan.Ticks;
            var    tLog       = new transLog {
                Started = DateTime.Now, FilePath = AudioFilePath
            };

            try
            {
                if (Path.GetExtension(AudioFilePath).ToLower() != ".wav")
                {
                    // extract the WAV
                    var wavFileName = Path.Combine(Path.GetDirectoryName(AudioFilePath), Path.GetFileNameWithoutExtension(AudioFilePath) + ".wav");
                    WavFileUtils.ExtractWavFromMedia(AudioFilePath, wavFileName);

                    AudioFilePath = wavFileName;
                }

                using (var audioInput = AudioConfig.FromWavFileInput(AudioFilePath))
                {
                    using (var recognizer = new SpeechRecognizer(config, audioInput))
                    {
                        // Subscribes to events.
                        recognizer.Recognizing += (s, e) =>
                        {
                            transLogRow dyn     = new transLogRow();
                            var         txtCnt  = textPartsAll.Count;
                            var         dur     = e.Result.Duration;
                            var         totDone = TimeSpan.FromTicks(e.Result.OffsetInTicks + dur.Ticks);
                            var         perc    = OnePercent * (dur.Ticks + e.Result.OffsetInTicks);

                            TranscribeInfo = $"{Math.Floor(perc)}%: {newCollection.Count + 1}: {totDone.Hours}:{totDone.Minutes}:{totDone.Seconds}: {txtCnt}: {e.Result.Text}";

                            dyn.Args = e;

                            var textBit       = e.Result.Text;
                            var ary           = textBit.Split(' ');
                            var lastGoodIx    = 0;
                            var goodTextParts = new List <TextPart>();
                            var isChange      = false;

                            for (var a = 0; a < ary.Length; a++)
                            {
                                if (a > txtCnt - 1)
                                {
                                    dyn.Notes += $"[new={txtCnt - a}]";
                                    break;
                                }
                                if (ary[a].Trim() != textPartsAll[a].Text.Trim())
                                {
                                    dyn.Notes           += $"[chg={a}:{textPartsAll[a].Text}={ary[a]}]";
                                    textPartsAll[a].Text = ary[a];
                                    isChange             = true;
                                }
                                goodTextParts.Add(textPartsAll[a]);
                                lastGoodIx = a;
                            }

                            if (lastGoodIx < textPartsAll.Count - 1)
                            {
                                dyn.Notes += $"[lth chg]";
                            }

                            double detectedEstimate;

                            try
                            {
                                if (goodTextParts.Count >= ary.Length)
                                {
                                    return; // nothing new
                                }

                                var newBit = "";
                                for (var x = goodTextParts.Count; x < ary.Length; x++)
                                {
                                    newBit += ary[x] + " ";
                                }

                                // 200 magic number - identification time about 200 mills
                                detectedEstimate = e.Result.Duration.TotalMilliseconds - 200;

                                var sinceLastEvent = detectedEstimate - lastMillisecondsTotal;
                                if (sinceLastEvent < 1)
                                {
                                    sinceLastEvent = 1;
                                }

                                var cnt = goodTextParts.Count;

                                if (cnt > 1 && !isChange)
                                {
                                    if (sinceLastEvent < ADJUSTED_SIZE_MAX_WORD_LENGTH)
                                    {
                                        dyn.Notes += $"[adj={cnt - 1}, snc={sinceLastEvent}, lgt={goodTextParts[cnt - 1].TextWidth} - {ADJUSTED_SIZE_MAX_WORD_LENGTH - sinceLastEvent}]";
                                        goodTextParts[cnt - 1].TextWidth -= (ADJUSTED_SIZE_MAX_WORD_LENGTH - sinceLastEvent);
                                    }
                                }

                                // Finally, save any new bit(s)
                                goodTextParts.Add(new TextPart {
                                    Text = newBit.Trim(), StartMills = detectedEstimate, TextWidth = ADJUSTED_SIZE_MAX_WORD_LENGTH
                                });
                            }
                            catch (Exception ex)
                            {
                                throw;
                            }
                            finally
                            {
                                log += JsonConvert.SerializeObject(dyn) + Environment.NewLine;
                            }
                            lastMillisecondsTotal = detectedEstimate; // start point for next text
                                                                      //           lastCharLength = textBit.Length;

                            textPartsAll = goodTextParts;
                        };

                        recognizer.Recognized += (s, e) =>
                        {
                            if (e.Result.Reason == ResultReason.RecognizedSpeech)
                            {
                                //Console.WriteLine($"RECOGNIZED: Text={e.Result.Text}");
                                var snippet = new TrackSnippetViewModel()
                                {
                                    AudioSnippet  = e.Result,
                                    Scale         = scale,
                                    Gain          = gain,
                                    RawText       = e.Result.Text,
                                    OffsetInTicks = e.Result.OffsetInTicks
                                };

                                snippet.DurationMilliseconds = e.Result.Duration.TotalMilliseconds; //WavFileUtils.GetSoundLength(AudioFilePath);

                                //var rawAry = e.Result.Text.Split(' ');
                                //if (rawAry.Length == textPartsAll.Count)
                                //{
                                //    for(var a = 0; a < rawAry.Length; a++)
                                //    {
                                //        textPartsAll[a].Text = rawAry[a];
                                //    }
                                //}
                                //else
                                //{
                                //    // to do, merge tidied text back into recognised text
                                //}

                                snippet.TextParts = new ObservableCollection <TextPart>(textPartsAll);

                                parentControl.Dispatcher.Invoke(() => newCollection.Add(snippet));

                                textPartsAll          = new List <TextPart>();
                                lastMillisecondsTotal = 0;
                            }
                            else if (e.Result.Reason == ResultReason.NoMatch)
                            {
                                Console.WriteLine($"NOMATCH: Speech could not be recognized.");
                            }
                        };

                        recognizer.Canceled += (s, e) =>
                        {
                            Console.WriteLine($"CANCELED: Reason={e.Reason}");

                            if (e.Reason == CancellationReason.Error)
                            {
                                MessageBox.Show($"FAILED: Are you using a valid subscription key and region? ErrorCode = {e.ErrorCode}: ErrorDetails = {e.ErrorDetails}");
                            }

                            stopRecognition.TrySetResult(0);
                        };

                        recognizer.SessionStarted += (s, e) => Console.WriteLine("\n    Session started event.");

                        recognizer.SessionStopped += (s, e) =>
                        {
                            Console.WriteLine("\n    Session stopped event.");
                            Console.WriteLine("\nStop recognition.");
                            stopRecognition.TrySetResult(0);
                        };

                        // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition.
                        await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false);

                        // Waits for completion.
                        // Use Task.WaitAny to keep the task rooted.
                        Task.WaitAny(new[] { stopRecognition.Task });

                        // Stops recognition.
                        await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false);
                    }
                }
            }
            catch (Exception excp)
            {
                Debug.WriteLine($"{excp}");
            }
            finally
            {
                if (Settings.Default.TranscriptionLogging)
                {
                    try
                    {
                        var logFile = $"TranscriptionLog_{DateTime.Now.ToString($"{DateTime.Now.ToString("ddMMyyyyHHmmss")}")}.txt";
                        logFile = Path.Combine(Path.GetFullPath(SettingsVM.ProjectsFolder), logFile);
                        File.WriteAllText(logFile, log);

                        tLog.LogPath = logFile;
                        SelectedProject.TranscribeLogs.Add(tLog);
                        Debug.WriteLine($"Log file created at {logFile}");
                    }
                    catch (Exception ex)
                    {
                        throw;
                    }
                }
            }
        }