Beispiel #1
0
        /* Extract the audio from mp4 files in subfolders of specified folder.
         * In the "Fix" folder for a recording, there will be a subfolder for each
         * segment of the recording: 00-03-00, 00-06-00, 00=09-00, etc.
         * Each of these subfolders is initialized with three files:
         *    "ToFix.mp4"  - the video of this segment
         *    "ToFix.flac" - the audio of this segment
         *    "ToFix.json" - the transcription of this segment
         */
        public void ExtractAll(string inputFolder)
        {
            foreach (string dir in Directory.GetDirectories(inputFolder))
            {
                string inputFile = Path.Combine(dir, "ToFix.mp4");
                // TODO - convert to mp3 instead of flac.
                string outputFile = Path.Combine(dir, "ToFix.flac");

                audioProcessing.Extract(inputFile, outputFile);
            }
        }
Beispiel #2
0
        static void TranscribeVideo(
            SampleVideo sample,              // sample video to use
            string fixedTags,                // file in which to save the fixed transcription
            string audio,                    // file in which to save the extracted audio
            bool useSmallSample,             // if true, use a small sample of the video/audio
            bool useAudioFileAlreadyInCloud, // if true, use prior audio in cloud if it exists
            string rawTranscription)         // file in which to save the raw transcription
        {
            string videofilePath                   = sample.filepath;
            string objectName                      = sample.objectname;
            RepeatedField <string> phrases         = sample.phrases;
            AudioProcessing        audioProcessing = new AudioProcessing();

            string googleCloudBucketName = "govmeeting-transcribe";

            TranscribeParameters transParams = new TranscribeParameters
            {
                audiofilePath              = audio,
                objectName                 = objectName,
                GoogleCloudBucketName      = googleCloudBucketName,
                useAudioFileAlreadyInCloud = useAudioFileAlreadyInCloud,
                language        = "en",
                MinSpeakerCount = 2,
                MaxSpeakerCount = 6,
                phrases         = phrases
            };

            // Clean up from last run
            File.Delete(audio);
            File.Delete(fixedTags);

            if (useSmallSample)
            {
                string shortVideoFile = videofilePath.Replace(".mp4", "-3min.mp4");
                //SplitRecording splitRecording = new SplitRecording();
                audioProcessing.ExtractPart(videofilePath, shortVideoFile, 60, 3 * 60);
                videofilePath = shortVideoFile;
            }

            audioProcessing.Extract(videofilePath, audio);

            GMFileAccess.SetGoogleCredentialsEnvironmentVariable();

            // Transcribe the audio file
            TranscribeAudio transcribe     = new TranscribeAudio();
            Transcribed_Dto response       = transcribe.TranscribeAudioFile(transParams, rawTranscription);
            string          responseString = JsonConvert.SerializeObject(response, Formatting.Indented);

            File.WriteAllText(fixedTags, responseString);

            WriteCopyOfResponse(responseString, fixedTags);
        }
Beispiel #3
0
        public void Process(string videoFile, string meetingFolder, string language)
        {
            /////// Copy video to meeting folder  /////////

            AudioProcessing audioProcessing = new AudioProcessing();
            string          videofileCopy   = Path.Combine(meetingFolder, "video.mp4");

            // #### If MaxRecordingSize is not zero, we shorted the recording. ####
            if (config.MaxRecordingSize == 0)
            {
                File.Copy(videoFile, videofileCopy);
            }
            else
            {
                audioProcessing.ExtractPart(videoFile, videofileCopy, 0, config.MaxRecordingSize);
            }

            /////// Extract the audio. ////////////////////////

            ExtractAudio extract   = new ExtractAudio();
            string       audioFile = Path.Combine(meetingFolder, "audio.flac");

            audioProcessing.Extract(videofileCopy, audioFile);

            /////// Transcribe the audio file. /////////////

            // We want the object name in the cloud to be the original video file name with ".flac" extension.
            string objectName = Path.GetFileNameWithoutExtension(videoFile) + ".flac";

            TranscribeParameters transParams = new TranscribeParameters
            {
                audiofilePath              = audioFile,
                objectName                 = objectName,
                GoogleCloudBucketName      = config.GoogleCloudBucketName,
                useAudioFileAlreadyInCloud = config.UseAudioFileAlreadyInCloud,
                language        = language,
                MinSpeakerCount = 2,
                MaxSpeakerCount = 6
                                  // TODO Add "phrases" field: names of officers
            };

            Transcribed_Dto transcript = transcribeAudio.TranscribeAudioFile(transParams);

            string stringValue    = JsonConvert.SerializeObject(transcript, Formatting.Indented);
            string outputJsonFile = Path.Combine(meetingFolder, "transcribed.json");

            File.WriteAllText(outputJsonFile, stringValue);
        }
Beispiel #4
0
        public void TestMoveToCloudAndTranscribe(string language)
        {
            AudioProcessing audioProcessing = new AudioProcessing();

            string baseName     = "USA_ME_LincolnCounty_BoothbayHarbor_Selectmen_EN_2017-02-15";
            string videoFile    = Path.Combine(config.TestdataPath, baseName + ".mp4");
            string outputFolder = Path.Combine(config.TestdataPath, "TestMoveToCloudAndTranscribe");

            GMFileAccess.DeleteAndCreateDirectory(outputFolder);

            string outputBasePath = Path.Combine(outputFolder, baseName);
            string shortFile      = outputBasePath + ".mp4";
            string audioFile      = outputBasePath + ".flac";
            string jsonFile       = outputBasePath + ".json";


            // Extract short version
            //SplitRecording splitRecording = new SplitRecording();
            audioProcessing.ExtractPart(videoFile, shortFile, 60, 4 * 60);

            // Extract audio.
            audioProcessing.Extract(shortFile, audioFile);

            // Transcribe
            //TranscribeAudio ta = new TranscribeAudio(_config);

            //TranscribeResultOrig response = new TranscribeResultOrig();
            Transcribed_Dto response = new Transcribed_Dto();

            // TODO - signature of TranscribeInCloud has changed.
            // response = transcribe.MoveToCloudAndTranscribe(audioFile, baseName + ".flac", config.GoogleCloudBucketName, config.UseAudioFileAlreadyInCloud, language);

            string stringValue = JsonConvert.SerializeObject(response, Formatting.Indented);

            File.WriteAllText(outputBasePath + "-rsp.json", stringValue);

            // Modify Transcript json format
            //ModifyTranscriptJson_1 mt = new ModifyTranscriptJson_1();
            ModifyTranscriptJson mt = new ModifyTranscriptJson();
            //FixasrViewModel fixasr = mt.Modify(response);
            EditMeeting_Dto meetingEditDto = mt.Modify(response);

            // Create JSON file
            //stringValue = JsonConvert.SerializeObject(fixasr, Formatting.Indented);
            stringValue = JsonConvert.SerializeObject(meetingEditDto, Formatting.Indented);
            File.WriteAllText(jsonFile, stringValue);
        }