Beispiel #1
0
        public FixasrView Modify(TranscribeResponse transcript)
        {
            int        MaxCharactersPerRecord = 40;
            FixasrView fixasr    = new FixasrView();
            string     line      = "";
            int        startTime = 0;

            fixasr.lastedit = 0;

            foreach (RspAlternative alternative in transcript.alternatives)
            {
                foreach (RspWord word in alternative.words)
                {
                    if (line.Length + word.text.Length > MaxCharactersPerRecord)
                    {
                        AsrSegment segment = NewSegment(startTime, line);
                        fixasr.asrsegments.Add(segment);
                        line      = "";
                        startTime = word.startTime;
                    }
                    line = line + ((line.Length == 0) ? word.text : " " + word.text);
                }
            }
            if (line != "")
            {
                AsrSegment segment = NewSegment(startTime, line);
                fixasr.asrsegments.Add(segment);
            }
            return(fixasr);
        }
Beispiel #2
0
        /*   Split the video, audio and JSON data for this meeting into smaller
         *   segments. This allows multiple people to work on the fixes to
         *   the text at the same time.
         */
        public void Split(string meetingFolder, string videofile, string fixasrFile,
                          int segmentSize, int segmentOverlap)
        {
            string splitFolder = meetingFolder + "\\" + "FixText";

            // The processed recording will next go through the following workflow:
            //   1. Users will fix errors in the text generated by auto voice recognition.
            //   2. Users will add metadata tags to the transcript.
            // To facilitate this, we will split the video, audio and transcript files into smaller segments.
            // This has the advantages that:
            //   1. More than one volunteer can work on the recording at the same time.
            //   2. Less video or audio data needs to be downloaded to the user at one time.

            string     stringValue = File.ReadAllText(fixasrFile);
            FixasrView fixasr      = JsonConvert.DeserializeObject <FixasrView>(stringValue);

            // Split the recording into parts and put them each in subfolders of subfolder "parts".
            SplitRecording splitRecording = new SplitRecording();
            int            parts          = splitRecording.Split(videofile, splitFolder, segmentSize, segmentOverlap);

            // Also extract the audio from each of these segments.
            // Some user may prefer to work with the audio for fixing the transcript.
            // We will put the audio files in the same folder as the video.
            ExtractAudio extract = new ExtractAudio();

            extract.ExtractAll(splitFolder);

            // Split the full transcript into segments that match the audio and video segments in size.
            SplitTranscript splitTranscript = new SplitTranscript();

            splitTranscript.split(fixasr, splitFolder, segmentSize, segmentOverlap, parts);
        }
        public bool Put(FixasrView value, long meetingId, int part)
        {
            string partFolder  = GetPartFolder(meetingId, part);
            string stringValue = JsonConvert.SerializeObject(value, Formatting.Indented);

            CircularBuffer cb      = new CircularBuffer(partFolder, WORK_FILE_NAME, config.MaxWorkFileBackups);
            bool           success = cb.WriteLatest(stringValue);

            return(success);
        }
        public AddtagsView ConvertFixasrToAddtags(FixasrView fixasr)
        {
            AddtagsView addtags = new AddtagsView();

            addtags.talks = new List <TalksView>();
            TalksView talk = new TalksView();

            talk.said = "";
            foreach (AsrSegment segment in fixasr.asrsegments)
            {
                string text                = segment.said;
                string speaker             = null;
                int    startNextSpokenText = 0;
                int    endLastSpokenText   = -1;
                do
                {
                    int  start        = startNextSpokenText;
                    bool isNewSpeaker = GetNextSpeaker(text, ref startNextSpokenText, ref endLastSpokenText, ref speaker);

                    // If we have the start of a new speaker, close the current talk object and start another.
                    if (isNewSpeaker)
                    {
                        // If anything said by prior speaker on this line, add it to the text.
                        if ((endLastSpokenText - start) > 0)
                        {
                            string lasttext = text.Substring(start, endLastSpokenText - start);
                            talk.said = talk.said + lasttext;
                        }
                        // If the prior speaker said anything at all, add the talk object to addtags and start a new talk object.
                        if (talk.said.Length > 0)
                        {
                            addtags.talks.Add(talk);
                            talk         = new TalksView();
                            talk.speaker = speaker;
                            talk.said    = "";
                        }
                    }
                    else
                    {
                        if (talk.said != "")
                        {
                            talk.said = talk.said + " ";
                        }
                        talk.said = talk.said + text.Substring(start);
                        break;
                    }
                } while (true);
            }
            if (talk.said.Length > 0)
            {
                addtags.talks.Add(talk);
            }
            return(addtags);
        }
        public FixasrView Get(long meetingId, int part)
        {
            string partFolder = GetPartFolder(meetingId, part);

            CircularBuffer cb          = new CircularBuffer(partFolder, WORK_FILE_NAME, config.MaxWorkFileBackups);
            string         latestFixes = cb.GetLatest();

            FixasrView fixasr = JsonConvert.DeserializeObject <FixasrView>(latestFixes);

            return(fixasr);
        }
        public void split(FixasrView fixasr, string outputFolder, int sectionSize, int overlap, int parts)
        {
            fixasrSegment[0]          = new FixasrView();
            fixasrSegment[1]          = new FixasrView();
            fixasrSegment[0].lastedit = 0;
            fixasrSegment[1].lastedit = 0;
            sectionNumber             = 1;

            // Since we want the sections to overlap (if overlap is non-zero), we can be adding to 2 outputs at once
            // during the overlap. The first output that we start becomes the "primary" output.
            // When we reach sectionSize for the primary, we open the next output. We now add to both outputs until
            // we reach "sectionSize + overlap" on the primary. At that time we close the primary and write it to disk.
            // The output that just started writing to now becomes the primary.
            int primary     = 0; // current primary output
            int secondary   = 1;
            int currentPart = 1;

            foreach (AsrSegment asrsegment in fixasr.asrsegments)
            {
                TimeSpan timespan    = TimeSpan.ParseExact(asrsegment.startTime, format, culture);
                int      currentTime = (int)timespan.TotalSeconds;

                // If we are within the overlap time, add to both. Unless we are on the last part.
                if ((currentTime >= (sectionNumber * sectionSize)) && (currentTime <= (sectionNumber * sectionSize + overlap)) &&
                    (currentPart != parts))
                {
                    fixasrSegment[secondary].asrsegments.Add(asrsegment);
                }

                // If we are past the overlap. Write out the primary to disk and swap primary/secondary.
                //  Unless we are on the last part.
                if (currentTime > (sectionNumber * sectionSize + overlap) &&
                    (currentPart != parts))
                {
                    WriteSection(outputFolder, primary, sectionNumber);
                    sectionNumber++;
                    currentPart++;
                    fixasrSegment[primary] = new FixasrView();

                    // Swap primary and secondary
                    int x = primary;
                    primary   = secondary;
                    secondary = x;
                }

                fixasrSegment[primary].asrsegments.Add(asrsegment);
            }
            // Handle end of file
            if (fixasrSegment[primary].asrsegments.Count > 0)
            {
                WriteSection(outputFolder, primary, sectionNumber);
            }
        }
Beispiel #7
0
        public void TestSplitTranscript()
        {
            string     fixasrFile   = @"C:\GOVMEETING\_SOURCECODE\src\Datafiles\USA_ME_LincolnCounty_BoothbayHarbor_Selectmen_en\2017-02-15\R3-ToBeFixed.json";
            string     stringValue  = File.ReadAllText(fixasrFile);
            FixasrView fixasr       = JsonConvert.DeserializeObject <FixasrView>(stringValue);
            string     outputFolder = @"C:\GOVMEETING\_SOURCECODE\src\Datafiles\USA_ME_LincolnCounty_BoothbayHarbor_Selectmen_en\2017-02-15\FixText";
            int        sectionSize  = 180;
            int        overlap      = 5;
            int        parts        = 4;

            SplitTranscript st = new SplitTranscript();

            st.split(fixasr, outputFolder, sectionSize, overlap, parts);
        }
Beispiel #8
0
        public void TestReformatOfTranscribeResponse()
        {
            string inputFile = testdataPath + @"\USA_ME_LincolnCounty_BoothbayHarbor_Selectmen_EN_2017-02-15-rsp.json";

            string outputFolder = testdataPath + "\\" + "TestReformatOfTranscribeResponse";

            FileDataRepositories.GMFileAccess.DeleteAndCreateDirectory(outputFolder);
            string outputFile = outputFolder + @"\USA_ME_LincolnCounty_BoothbayHarbor_Selectmen_EN_2017-02-15.json";

            string stringValue = File.ReadAllText(inputFile);
            var    transcript  = JsonConvert.DeserializeObject <TranscribeResponse>(stringValue);

            ModifyTranscriptJson convert = new ModifyTranscriptJson();
            FixasrView           fixasr  = convert.Modify(transcript);

            stringValue = JsonConvert.SerializeObject(fixasr, Formatting.Indented);
            File.WriteAllText(outputFile, stringValue);
        }
Beispiel #9
0
        private void StartTagging(Meeting meeting)
        {
            string fixasrText = "";

            // TODO - Check each part of the transcribed meeting.
            // Each should contain a xxxxx-DONE.json.
            // Append them all together into fixasrText.

            bool b = true;

            if (b)
            {
                return;
            }

            FixasrView        fixasr            = JsonConvert.DeserializeObject <FixasrView>(fixasrText);
            FormatConversions formatConversions = new FormatConversions();
            AddtagsView       addtags           = formatConversions.ConvertFixasrToAddtags(fixasr);

            addtagsRepository.Put(addtags, meeting.Id);
        }
        public void TestMoveToCloudAndTranscribe(string language)
        {
            string baseName     = "USA_ME_LincolnCounty_BoothbayHarbor_Selectmen_EN_2017-02-15";
            string videoFile    = _config.TestfilesPath + "\\" + baseName + ".mp4";
            string outputFolder = _config.TestfilesPath + "\\" + "TestMoveToCloudAndTranscribe";

            FileDataRepositories.GMFileAccess.DeleteAndCreateDirectory(outputFolder);

            string outputBasePath = outputFolder + "\\" + baseName;
            string shortFile      = outputBasePath + ".mp4";
            string audioFile      = outputBasePath + ".flac";
            string jsonFile       = outputBasePath + ".json";


            // Extract short version
            SplitRecording splitRecording = new SplitRecording();

            splitRecording.ExtractPart(videoFile, shortFile, 60, 4 * 60);

            // Extract audio.
            ExtractAudio extract = new ExtractAudio();

            extract.Extract(shortFile, audioFile);

            // Transcribe
            //TranscribeAudio ta = new TranscribeAudio(_config);
            TranscribeResponse response = transcribe.MoveToCloudAndTranscribe(audioFile, baseName + ".flac", language);

            string stringValue = JsonConvert.SerializeObject(response, Formatting.Indented);

            File.WriteAllText(outputBasePath + "-rsp.json", stringValue);

            // Modify Transcript json format
            ModifyTranscriptJson mt     = new ModifyTranscriptJson();
            FixasrView           fixasr = mt.Modify(response);

            // Create JSON file
            stringValue = JsonConvert.SerializeObject(fixasr, Formatting.Indented);
            File.WriteAllText(jsonFile, stringValue);
        }
 public bool Post([FromBody] FixasrView value, int meetingId, int part)
 {
     return(fixasr.Put(value, meetingId, part));
 }
        [HttpGet("{meetingId}/{part}")]        // GET: api/fixasr
        public FixasrView Get(int meetingId, int part)
        {
            FixasrView ret = fixasr.Get(meetingId, part);

            return(ret);
        }
Beispiel #13
0
        public void Process(string videoFile, string meetingFolder, string language)
        {
            /////// Copy video to meeting folder  /////////

            FileInfo infile        = new FileInfo(videoFile);
            string   videofileCopy = meetingFolder + "\\" + "01-Video.mp4";

            if (!config.IsDevelopment)
            {
                File.Copy(videoFile, videofileCopy);
            }
            else
            {
                // #### FOR DEVELOPMENT: WE SHORTEN THE RECORDING FILE. ####
                SplitRecording splitRecording = new SplitRecording();
                splitRecording.ExtractPart(videoFile, videofileCopy, 0, config.RecordingSizeForDevelopment);
            }

            /////// Extract the audio. ////////////////////////

            ExtractAudio extract   = new ExtractAudio();
            string       audioFile = meetingFolder + "\\" + "02-Audio.flac";

            extract.Extract(videofileCopy, audioFile);

            /////// Transcribe the audio file. /////////////

            // We want the object name in the cloud to be the original video file name with ".flac" extension.
            string objectName = Path.GetFileNameWithoutExtension(videoFile) + ".flac";

            TranscribeResponse transcript;

            //if (!config.UseAudioFileAlreadyInCloud)
            //{

            // Move audio file to cloud and transcribe
            transcript = transcribeAudio.MoveToCloudAndTranscribe(audioFile, objectName, language);

            //} else
            //{
            //    // For development and it's already in cloud
            //    // TODO - check if it is already in cloud
            //    transcript = transcribeAudio.TranscribeInCloud(objectName, language);
            //}

            string stringValue    = JsonConvert.SerializeObject(transcript, Formatting.Indented);
            string outputJsonFile = meetingFolder + "\\" + "03-Transcribed.json";

            File.WriteAllText(outputJsonFile, stringValue);

            /////// Reformat the JSON transcript to match what the fixasr routine will use.

            ModifyTranscriptJson convert = new ModifyTranscriptJson();

            outputJsonFile = meetingFolder + "\\" + "04-ToFix.json";
            FixasrView fixasr = convert.Modify(transcript);

            stringValue = JsonConvert.SerializeObject(fixasr, Formatting.Indented);
            File.WriteAllText(outputJsonFile, stringValue);

            /////// Split the video, audio and transcript into multiple work segments

            //SplitIntoWorkSegments split = new SplitIntoWorkSegments();
            //split.Split(meetingFolder, videofileCopy, outputJsonFile, config.FixasrSegmentSize,
            //    config.FixasrSegmentOverlap);
        }