[HttpGet("{meetingId}/{part}")] // GET: api/fixasr public EditMeeting_Dto Get(int meetingId, int part) { string meeting = EditMeetingRepo.Get(meetingId, part); EditMeeting_Dto meetingEditDto = JsonConvert.DeserializeObject <EditMeeting_Dto>(meeting); return(meetingEditDto); }
/* Split the video, audio and JSON data for this meeting into smaller * segments. This allows multiple people to work on the fixes to * the text at the same time. */ public void Split(string meetingFolder, string videofile, string fixasrFile, int segmentSize, int segmentOverlap) { string splitFolder = Path.Combine(meetingFolder, "FixText"); // The processed recording will next go through the following workflow: // 1. Users will fix errors in the text generated by auto voice recognition. // 2. Users will add metadata tags to the transcript. // To facilitate this, we will split the video, audio and transcript files into smaller segments. // This has the advantages that: // 1. More than one volunteer can work on the recording at the same time. // 2. Less video or audio data needs to be downloaded to a user at one time. string stringValue = File.ReadAllText(fixasrFile); EditMeeting_Dto meetingEditDto = JsonConvert.DeserializeObject <EditMeeting_Dto>(stringValue); // Split the recording into parts and put them each in subfolders of subfolder "parts". SplitRecording splitRecording = new SplitRecording(); int parts = splitRecording.Split(videofile, splitFolder, segmentSize, segmentOverlap); // Also extract the audio from each of these segments. // Some user may prefer to work with the audio for fixing the transcript. // We will put the audio files in the same folder as the video. ExtractAll(splitFolder); // Split the full transcript into segments that match the audio and video segments in size. SplitTranscript splitTranscript = new SplitTranscript(); splitTranscript.Split(meetingEditDto, splitFolder, segmentSize, segmentOverlap, parts); }
// readonly CultureInfo culture = CultureInfo.CurrentCulture; // readonly string format = "hh\\:mm\\:ss"; // int sectionNumber; // readonly FixasrView[] fixasrSegment = new FixasrView[2]; public void Split(EditMeeting_Dto meetingEditDto, string outputFolder, int sectionSize, int overlap, int parts) { // fixasrSegment[0] = new FixasrView(); // fixasrSegment[1] = new FixasrView(); // fixasrSegment[0].lastedit = 0; // fixasrSegment[1].lastedit = 0; // sectionNumber = 1; // // Since we want the sections to overlap (if overlap is non-zero), we can be adding to 2 outputs at once // // during the overlap. The first output that we start becomes the "primary" output. // // When we reach sectionSize for the primary, we open the next output. We now add to both outputs until // // we reach "sectionSize + overlap" on the primary. At that time we close the primary and write it to disk. // // The output that just started writing to now becomes the primary. // int primary = 0; // current primary output // int secondary = 1; // int currentPart = 1; // foreach (AsrSegment asrsegment in fixasr.asrsegments) // { // TimeSpan timespan = TimeSpan.ParseExact(asrsegment.startTime, format, culture); // int currentTime = (int) timespan.TotalSeconds; // // If we are within the overlap time, add to both. Unless we are on the last part. // if ((currentTime >= (sectionNumber * sectionSize)) && (currentTime <= (sectionNumber * sectionSize + overlap)) // && (currentPart != parts)) // { // fixasrSegment[secondary].asrsegments.Add(asrsegment); // } // // If we are past the overlap. Write out the primary to disk and swap primary/secondary. // // Unless we are on the last part. // if (currentTime > (sectionNumber * sectionSize + overlap) // && (currentPart != parts)) // { // WriteSection(outputFolder, primary, sectionNumber); // sectionNumber++; // currentPart++; // fixasrSegment[primary] = new FixasrView(); // // Swap primary and secondary // int x = primary; // primary = secondary; // secondary = x; // } // fixasrSegment[primary].asrsegments.Add(asrsegment); // } // // Handle end of file // if (fixasrSegment[primary].asrsegments.Count > 0) // { // WriteSection(outputFolder, primary, sectionNumber); // } }
// Create the EditTranscriptView structure used by EditTranscript static void CreateEditTranscriptView(string fixedTags, string editmeetingFile) { // Clean up from last run File.Delete(editmeetingFile); // Reformat the response to what the editmeeting routine will use. string responseString = File.ReadAllText(fixedTags); Transcribed_Dto response = JsonConvert.DeserializeObject <Transcribed_Dto>(responseString); ModifyTranscriptJson convert = new ModifyTranscriptJson(); EditMeeting_Dto editmeeting = convert.Modify(response); string stringValue = JsonConvert.SerializeObject(editmeeting, Formatting.Indented); File.WriteAllText(editmeetingFile, stringValue); }
public EditMeeting_Dto Modify2(Transcribed_Dto transcript) { EditMeeting_Dto editmeeting = new EditMeeting_Dto(); int wordNum = 0; // running word sequence number foreach (TranscribedTalk_Dto result in transcript.Talks) { EditMeetingTalk_Dto talk = new EditMeetingTalk_Dto(result.Transcript, result.Confidence); int speaker = -1; foreach (TranscribedWord_Dto respword in result.Words) { EditMeetingWord_Dto word = new EditMeetingWord_Dto( respword.Word, respword.Confidence, respword.StartTime, respword.EndTime, respword.SpeakerTag, ++wordNum ); // Check if the speaker is the same for all words // "speaker" will equal "-2" if different speakers. if (speaker != -2) { if (speaker == -1) { speaker = word.SpeakerTag; // we found first speaker (could also be 0) } else { if (speaker != word.SpeakerTag) { speaker = -2; // we found two speakers do not match } } } talk.SpeakerName = speaker switch { 0 => "UNKOWN", -2 => "DIFFERENT", _ => "Speaker " + speaker.ToString(), }; talk.Words.Add(word); } editmeeting.Talks.Add(talk); } return(editmeeting); }
public void TestSplitTranscript() { string fixasrFile = @"C:\GOVMEETING\_SOURCECODE\src\DATAFILES\USA_ME_LincolnCounty_BoothbayHarbor_Selectmen_en\2017-02-15\R3-ToBeFixed.json"; string stringValue = File.ReadAllText(fixasrFile); EditMeeting_Dto meetingEditDto = JsonConvert.DeserializeObject <EditMeeting_Dto>(stringValue); string outputFolder = @"C:\GOVMEETING\_SOURCECODE\src\DATAFILES\USA_ME_LincolnCounty_BoothbayHarbor_Selectmen_en\2017-02-15\FixText"; int sectionSize = 180; int overlap = 5; int parts = 4; SplitTranscript st = new SplitTranscript(); st.Split(meetingEditDto, outputFolder, sectionSize, overlap, parts); }
public void TestMoveToCloudAndTranscribe(string language) { AudioProcessing audioProcessing = new AudioProcessing(); string baseName = "USA_ME_LincolnCounty_BoothbayHarbor_Selectmen_EN_2017-02-15"; string videoFile = Path.Combine(config.TestdataPath, baseName + ".mp4"); string outputFolder = Path.Combine(config.TestdataPath, "TestMoveToCloudAndTranscribe"); GMFileAccess.DeleteAndCreateDirectory(outputFolder); string outputBasePath = Path.Combine(outputFolder, baseName); string shortFile = outputBasePath + ".mp4"; string audioFile = outputBasePath + ".flac"; string jsonFile = outputBasePath + ".json"; // Extract short version //SplitRecording splitRecording = new SplitRecording(); audioProcessing.ExtractPart(videoFile, shortFile, 60, 4 * 60); // Extract audio. audioProcessing.Extract(shortFile, audioFile); // Transcribe //TranscribeAudio ta = new TranscribeAudio(_config); //TranscribeResultOrig response = new TranscribeResultOrig(); Transcribed_Dto response = new Transcribed_Dto(); // TODO - signature of TranscribeInCloud has changed. // response = transcribe.MoveToCloudAndTranscribe(audioFile, baseName + ".flac", config.GoogleCloudBucketName, config.UseAudioFileAlreadyInCloud, language); string stringValue = JsonConvert.SerializeObject(response, Formatting.Indented); File.WriteAllText(outputBasePath + "-rsp.json", stringValue); // Modify Transcript json format //ModifyTranscriptJson_1 mt = new ModifyTranscriptJson_1(); ModifyTranscriptJson mt = new ModifyTranscriptJson(); //FixasrViewModel fixasr = mt.Modify(response); EditMeeting_Dto meetingEditDto = mt.Modify(response); // Create JSON file //stringValue = JsonConvert.SerializeObject(fixasr, Formatting.Indented); stringValue = JsonConvert.SerializeObject(meetingEditDto, Formatting.Indented); File.WriteAllText(jsonFile, stringValue); }
public void TestReformatOfTranscribeResponse() { string inputFile = testfilesPath + @"\USA_ME_LincolnCounty_BoothbayHarbor_Selectmen_EN_2017-02-15-rsp.json"; string outputFolder = Path.Combine(testfilesPath, "TestReformatOfTranscribeResponse"); GMFileAccess.DeleteAndCreateDirectory(outputFolder); string outputFile = outputFolder + @"\USA_ME_LincolnCounty_BoothbayHarbor_Selectmen_EN_2017-02-15.json"; string stringValue = File.ReadAllText(inputFile); var transcript = JsonConvert.DeserializeObject <Transcribed_Dto>(stringValue); ModifyTranscriptJson convert = new ModifyTranscriptJson(); EditMeeting_Dto meetingEditDto = convert.Modify(transcript); stringValue = JsonConvert.SerializeObject(meetingEditDto, Formatting.Indented); File.WriteAllText(outputFile, stringValue); }
public EditMeeting_Dto Modify(Transcribed_Dto transcript) { EditMeeting_Dto editmeeting = new EditMeeting_Dto(); int wordNum = 0; // running word sequence number foreach (TranscribedTalk_Dto result in transcript.Talks) { EditMeetingTalk_Dto talk = new EditMeetingTalk_Dto(result.Transcript, result.Confidence); int speaker = -1; foreach (TranscribedWord_Dto respword in result.Words) { EditMeetingWord_Dto word = new EditMeetingWord_Dto( respword.Word, respword.Confidence, respword.StartTime, respword.EndTime, respword.SpeakerTag, ++wordNum ); if (speaker != word.SpeakerTag) { if (speaker != -1) { editmeeting.Talks.Add(talk); talk = new EditMeetingTalk_Dto(result.Transcript, result.Confidence); } speaker = word.SpeakerTag; talk.SpeakerName = "Speaker " + speaker.ToString(); } talk.Words.Add(word); } editmeeting.Talks.Add(talk); } return(editmeeting); }
public bool Post([FromBody] EditMeeting_Dto value, int meetingId, int part) { string stringValue = JsonConvert.SerializeObject(value, Formatting.Indented); return(EditMeetingRepo.Put(stringValue, meetingId, part)); }