Exemplo n.º 1
0
        //  === FixSpeakerTags method ===
        // The LongRunningRecognizeResponse does not put SpeakerTag values
        // on the words initially until it has completed the transcription.
        // At that point, it creates one more result that has the entire text
        // in its Transcript field, and a word array that contains
        // every word in the entire text. The SpeakerTag fields now contains values.
        // FixSpeakerTags moves the SpeakerTag values from the last result in the response
        // to the corresponding words in the initial results and then removes the final result.

        public static Transcribed_Dto FixSpeakerTags(Transcribed_Dto transcribed)
        {
            int resultCount = transcribed.Talks.Count;
            TranscribedTalk_Dto lastResult       = transcribed.Talks[resultCount - 1];
            TranscribedTalk_Dto nextToLastResult = transcribed.Talks[resultCount - 2];

            int lastWordnum       = lastResult.Words[^ 1].WordNum;
Exemplo n.º 2
0
        public void TestTranscriptionOfLocalFile(string language)
        {
            //TranscribeAudio ta = new TranscribeAudio(_config);

            // Test transcription on a local file. We will use sychronous calls to the Google Speech API. These allow a max of 1 minute per request.
            string folder = config.TestdataPath + @"..\testdata\BBH Selectmen\USA_ME_LincolnCounty_BoothbayHarbor_Selectmen\2017-01-09\step 2 extract\";
            //TranscribeResultOrig transcript = transcribe.TranscribeFile(folder + "USA_ME_LincolnCounty_BoothbayHarbor_Selectmen_EN_2017-01-09#00-01-40.flac", language);
            Transcribed_Dto transcript = transcribe.TranscribeLocalFile(folder + "USA_ME_LincolnCounty_BoothbayHarbor_Selectmen_EN_2017-01-09#00-01-40.flac", language);

            string stringValue = JsonConvert.SerializeObject(transcript, Formatting.Indented);
        }
Exemplo n.º 3
0
        static void FixSpeakerTags(string responseFile, string fixedRsp)
        {
            // Clean up from last run
            File.Delete(fixedRsp);

            string          priorResponse  = File.ReadAllText(responseFile);
            Transcribed_Dto beforeFix      = JsonConvert.DeserializeObject <Transcribed_Dto>(priorResponse);
            Transcribed_Dto afterFix       = TransformResponse.FixSpeakerTags(beforeFix);
            string          afterFixString = JsonConvert.SerializeObject(afterFix, Formatting.Indented);

            File.WriteAllText(fixedRsp, afterFixString);
        }
Exemplo n.º 4
0
        static void SimplifyRaw(string responseFile, string simplified)
        {
            // Clean up from last run
            File.Delete(simplified);

            string priorResponse = File.ReadAllText(responseFile);
            LongRunningRecognizeResponse beforeFix = JsonConvert.DeserializeObject <LongRunningRecognizeResponse>(priorResponse);
            Transcribed_Dto afterFix       = TransformResponse.Simpify(beforeFix.Results);
            string          afterFixString = JsonConvert.SerializeObject(afterFix, Formatting.Indented);

            File.WriteAllText(simplified, afterFixString);
        }
Exemplo n.º 5
0
        static void TranscribeVideo(
            SampleVideo sample,              // sample video to use
            string fixedTags,                // file in which to save the fixed transcription
            string audio,                    // file in which to save the extracted audio
            bool useSmallSample,             // if true, use a small sample of the video/audio
            bool useAudioFileAlreadyInCloud, // if true, use prior audio in cloud if it exists
            string rawTranscription)         // file in which to save the raw transcription
        {
            string videofilePath                   = sample.filepath;
            string objectName                      = sample.objectname;
            RepeatedField <string> phrases         = sample.phrases;
            AudioProcessing        audioProcessing = new AudioProcessing();

            string googleCloudBucketName = "govmeeting-transcribe";

            TranscribeParameters transParams = new TranscribeParameters
            {
                audiofilePath              = audio,
                objectName                 = objectName,
                GoogleCloudBucketName      = googleCloudBucketName,
                useAudioFileAlreadyInCloud = useAudioFileAlreadyInCloud,
                language        = "en",
                MinSpeakerCount = 2,
                MaxSpeakerCount = 6,
                phrases         = phrases
            };

            // Clean up from last run
            File.Delete(audio);
            File.Delete(fixedTags);

            if (useSmallSample)
            {
                string shortVideoFile = videofilePath.Replace(".mp4", "-3min.mp4");
                //SplitRecording splitRecording = new SplitRecording();
                audioProcessing.ExtractPart(videofilePath, shortVideoFile, 60, 3 * 60);
                videofilePath = shortVideoFile;
            }

            audioProcessing.Extract(videofilePath, audio);

            GMFileAccess.SetGoogleCredentialsEnvironmentVariable();

            // Transcribe the audio file
            TranscribeAudio transcribe     = new TranscribeAudio();
            Transcribed_Dto response       = transcribe.TranscribeAudioFile(transParams, rawTranscription);
            string          responseString = JsonConvert.SerializeObject(response, Formatting.Indented);

            File.WriteAllText(fixedTags, responseString);

            WriteCopyOfResponse(responseString, fixedTags);
        }
Exemplo n.º 6
0
        // Create the EditTranscriptView structure used by EditTranscript
        static void CreateEditTranscriptView(string fixedTags, string editmeetingFile)
        {
            // Clean up from last run
            File.Delete(editmeetingFile);

            // Reformat the response to what the editmeeting routine will use.
            string               responseString = File.ReadAllText(fixedTags);
            Transcribed_Dto      response       = JsonConvert.DeserializeObject <Transcribed_Dto>(responseString);
            ModifyTranscriptJson convert        = new ModifyTranscriptJson();
            EditMeeting_Dto      editmeeting    = convert.Modify(response);
            string               stringValue    = JsonConvert.SerializeObject(editmeeting, Formatting.Indented);

            File.WriteAllText(editmeetingFile, stringValue);
        }
Exemplo n.º 7
0
        public EditMeeting_Dto Modify2(Transcribed_Dto transcript)
        {
            EditMeeting_Dto editmeeting = new EditMeeting_Dto();
            int             wordNum     = 0; // running word sequence number

            foreach (TranscribedTalk_Dto result in transcript.Talks)
            {
                EditMeetingTalk_Dto talk = new EditMeetingTalk_Dto(result.Transcript, result.Confidence);
                int speaker = -1;

                foreach (TranscribedWord_Dto respword in result.Words)
                {
                    EditMeetingWord_Dto word = new EditMeetingWord_Dto(
                        respword.Word,
                        respword.Confidence,
                        respword.StartTime,
                        respword.EndTime,
                        respword.SpeakerTag,
                        ++wordNum
                        );

                    // Check if the speaker is the same for all words
                    // "speaker" will equal "-2" if different speakers.
                    if (speaker != -2)
                    {
                        if (speaker == -1)
                        {
                            speaker = word.SpeakerTag;  // we found first speaker (could also be 0)
                        }
                        else
                        {
                            if (speaker != word.SpeakerTag)
                            {
                                speaker = -2;  // we found two speakers do not match
                            }
                        }
                    }

                    talk.SpeakerName = speaker switch
                    {
                        0 => "UNKOWN",
                        -2 => "DIFFERENT",
                        _ => "Speaker " + speaker.ToString(),
                    };
                    talk.Words.Add(word);
                }
                editmeeting.Talks.Add(talk);
            }
            return(editmeeting);
        }
Exemplo n.º 8
0
        public Transcribed_Dto TranscribeAudioFile(TranscribeParameters transParams, string rawResponseFile = null)
        {
            LongRunningRecognizeResponse response = UploadAndTranscribeInCloud(transParams);

            // Save the raw response, if we were passed a file path.
            if (rawResponseFile != "")
            {
                string responseString = JsonConvert.SerializeObject(response, Formatting.Indented);
                File.WriteAllText(rawResponseFile, responseString);
            }

            Transcribed_Dto resp = TransformResponse.Simpify(response.Results);

            return(TransformResponse.FixSpeakerTags(resp));
        }
Exemplo n.º 9
0
        public void Process(string videoFile, string meetingFolder, string language)
        {
            /////// Copy video to meeting folder  /////////

            AudioProcessing audioProcessing = new AudioProcessing();
            string          videofileCopy   = Path.Combine(meetingFolder, "video.mp4");

            // #### If MaxRecordingSize is not zero, we shorted the recording. ####
            if (config.MaxRecordingSize == 0)
            {
                File.Copy(videoFile, videofileCopy);
            }
            else
            {
                audioProcessing.ExtractPart(videoFile, videofileCopy, 0, config.MaxRecordingSize);
            }

            /////// Extract the audio. ////////////////////////

            ExtractAudio extract   = new ExtractAudio();
            string       audioFile = Path.Combine(meetingFolder, "audio.flac");

            audioProcessing.Extract(videofileCopy, audioFile);

            /////// Transcribe the audio file. /////////////

            // We want the object name in the cloud to be the original video file name with ".flac" extension.
            string objectName = Path.GetFileNameWithoutExtension(videoFile) + ".flac";

            TranscribeParameters transParams = new TranscribeParameters
            {
                audiofilePath              = audioFile,
                objectName                 = objectName,
                GoogleCloudBucketName      = config.GoogleCloudBucketName,
                useAudioFileAlreadyInCloud = config.UseAudioFileAlreadyInCloud,
                language        = language,
                MinSpeakerCount = 2,
                MaxSpeakerCount = 6
                                  // TODO Add "phrases" field: names of officers
            };

            Transcribed_Dto transcript = transcribeAudio.TranscribeAudioFile(transParams);

            string stringValue    = JsonConvert.SerializeObject(transcript, Formatting.Indented);
            string outputJsonFile = Path.Combine(meetingFolder, "transcribed.json");

            File.WriteAllText(outputJsonFile, stringValue);
        }
Exemplo n.º 10
0
        public void TestMoveToCloudAndTranscribe(string language)
        {
            AudioProcessing audioProcessing = new AudioProcessing();

            string baseName     = "USA_ME_LincolnCounty_BoothbayHarbor_Selectmen_EN_2017-02-15";
            string videoFile    = Path.Combine(config.TestdataPath, baseName + ".mp4");
            string outputFolder = Path.Combine(config.TestdataPath, "TestMoveToCloudAndTranscribe");

            GMFileAccess.DeleteAndCreateDirectory(outputFolder);

            string outputBasePath = Path.Combine(outputFolder, baseName);
            string shortFile      = outputBasePath + ".mp4";
            string audioFile      = outputBasePath + ".flac";
            string jsonFile       = outputBasePath + ".json";


            // Extract short version
            //SplitRecording splitRecording = new SplitRecording();
            audioProcessing.ExtractPart(videoFile, shortFile, 60, 4 * 60);

            // Extract audio.
            audioProcessing.Extract(shortFile, audioFile);

            // Transcribe
            //TranscribeAudio ta = new TranscribeAudio(_config);

            //TranscribeResultOrig response = new TranscribeResultOrig();
            Transcribed_Dto response = new Transcribed_Dto();

            // TODO - signature of TranscribeInCloud has changed.
            // response = transcribe.MoveToCloudAndTranscribe(audioFile, baseName + ".flac", config.GoogleCloudBucketName, config.UseAudioFileAlreadyInCloud, language);

            string stringValue = JsonConvert.SerializeObject(response, Formatting.Indented);

            File.WriteAllText(outputBasePath + "-rsp.json", stringValue);

            // Modify Transcript json format
            //ModifyTranscriptJson_1 mt = new ModifyTranscriptJson_1();
            ModifyTranscriptJson mt = new ModifyTranscriptJson();
            //FixasrViewModel fixasr = mt.Modify(response);
            EditMeeting_Dto meetingEditDto = mt.Modify(response);

            // Create JSON file
            //stringValue = JsonConvert.SerializeObject(fixasr, Formatting.Indented);
            stringValue = JsonConvert.SerializeObject(meetingEditDto, Formatting.Indented);
            File.WriteAllText(jsonFile, stringValue);
        }
Exemplo n.º 11
0
        // Transcribe a local audio file. We can only use this with audios up to 1 minute long.
        public Transcribed_Dto TranscribeLocalFile(string fileName, string language)
        {
            //    // var speechClient = SpeechClient.Create();
            RecognitionAudio recogAudio = RecognitionAudio.FromFile(fileName);

            var response = speechClient.Recognize(new RecognitionConfig()
            {
                Encoding              = RecognitionConfig.Types.AudioEncoding.Flac,
                SampleRateHertz       = 48000,
                EnableWordTimeOffsets = true,
                LanguageCode          = language,
            }, recogAudio);

            Transcribed_Dto resp = TransformResponse.Simpify(response.Results);

            return(TransformResponse.FixSpeakerTags(resp));
        }
Exemplo n.º 12
0
        /*  === TransformResponse.Simplify method ===
         *  We want to extract all the useful data from the response that comes back from the cloud.
         *  But we don't want the superlous fields that make it more complicated to use.
         *
         *  The raw response structure contains:
         *  A single unnamed object with a "Results" array.
         *  The "Results" array consists of unnamed objects, each containing:
         *      "Alternatives" array, "ChannelTag" integer, "LanguageCode" string
         *  The "Alternatives" arrays appear to always consists of a single unnamed object containing:
         *      "Transcript" string, "Confidence" decimal, "Words" array
         *      WHEN DOES THIS EVER CONSIST OF MORE THEN ONE ALTERNATIVE?
         *  The "Words" array consists of unnamed objects containing:
         *      "StartTime" object, "EndTime" object, "Word" object
         *  The "StartTime" and "EndTime" objects both contain:
         *      "Seconds" int, "Nanos" integer
         *  The "Word" objects contain:
         *      "Word" string, "Confidence" decimal, "SpeakerTag" integer
         *
         *  The new structure contains:
         *  A single unnamed object with a "Results" array.
         *  The "Results" array consists of unnamed objects, each containing:
         *      "Transcript" string, "Confidence" decimal, "Words" array and "WordCount" integer
         *  The "Words" array consists of unnamed objects, eash containing:
         *      "Word" string, "Confidence" decimal, "StartTime" integer, "EndTime integer, "speakerTag" integer,
         *      and "WordNum" integer.
         *      Both StartTime and EndTime integers are in milliseconds.
         *      "WordCount" and "WordNum" are new fields added to help in fixing speaker tags,
         *      but we leave them in the final structure for possible future use.
         */

        public static Transcribed_Dto Simpify(RepeatedField <SpeechRecognitionResult> recogResults)
        {
            Transcribed_Dto transcript = new Transcribed_Dto();
            int             altCount   = 0;
            int             wordNum    = 0;

            foreach (SpeechRecognitionResult recogResult in recogResults)
            {
                if (recogResult.Alternatives.Count > 1)
                {
                    altCount++;
                    Console.WriteLine($"ERROR: more than 1 alternative - result {altCount}");
                }
                ;

                SpeechRecognitionAlternative recogAlt = recogResult.Alternatives[0];

                TranscribedTalk_Dto result = new TranscribedTalk_Dto(recogAlt.Transcript, recogAlt.Confidence)
                {
                    // The new "WordCount" field in Result is populated with the total word count.
                    WordCount = recogAlt.Words.Count,
                };
                Console.WriteLine($"Next result: {recogAlt.Words.Count} words");

                foreach (var item in recogAlt.Words)
                {
                    long startTime = item.StartTime.Seconds * 1000 + item.StartTime.Nanos / 1000000;
                    long endTime   = item.EndTime.Seconds * 1000 + item.EndTime.Nanos / 1000000;

                    // The new "WordNum" field in RespWord is popluated with the sequencial "wordnum"
                    wordNum++;
                    result.Words.Add(new TranscribedWord_Dto(item.Word, item.Confidence, startTime, endTime, item.SpeakerTag, wordNum));
                }
                transcript.Talks.Add(result);
            }
            return(transcript);
        }
Exemplo n.º 13
0
        public EditMeeting_Dto Modify(Transcribed_Dto transcript)
        {
            EditMeeting_Dto editmeeting = new EditMeeting_Dto();
            int             wordNum     = 0; // running word sequence number

            foreach (TranscribedTalk_Dto result in transcript.Talks)
            {
                EditMeetingTalk_Dto talk = new EditMeetingTalk_Dto(result.Transcript, result.Confidence);
                int speaker = -1;

                foreach (TranscribedWord_Dto respword in result.Words)
                {
                    EditMeetingWord_Dto word = new EditMeetingWord_Dto(
                        respword.Word,
                        respword.Confidence,
                        respword.StartTime,
                        respword.EndTime,
                        respword.SpeakerTag,
                        ++wordNum
                        );
                    if (speaker != word.SpeakerTag)
                    {
                        if (speaker != -1)
                        {
                            editmeeting.Talks.Add(talk);
                            talk = new EditMeetingTalk_Dto(result.Transcript, result.Confidence);
                        }
                        speaker          = word.SpeakerTag;
                        talk.SpeakerName = "Speaker " + speaker.ToString();
                    }
                    talk.Words.Add(word);
                }
                editmeeting.Talks.Add(talk);
            }
            return(editmeeting);
        }