/** * /// Decodes the batch of audio files */ public void decode(String batchFile) { BatchItem batchItem; int count = 0; try { recognizer.Allocate(); setBatchFile(batchFile); batchManager.Start(); this.LogInfo("BatchDecoder: decoding files in " + batchManager.Filename); while (count < utteranceId && (batchItem = batchManager.GetNextItem()) != null) { setInputStream(batchItem.Filename); Result result = recognizer.Recognize(batchItem.Transcript); this.LogInfo("File : " + batchItem.Filename); this.LogInfo("Result: " + result); count++; } batchManager.Stop(); recognizer.Deallocate(); } catch (IOException io) { this.LogInfo("I/O error during decoding: " + io.Message); throw io; } this.LogInfo("BatchDecoder: " + count + " files decoded"); }
/// <summary> /// Starts recognition process. /// </summary> /// <param name="stream"></param> /// <param name="timeFrame"></param> public void StartRecognition(Stream stream, TimeFrame timeFrame) { try { Recognizer.Allocate(); Context.SetSpeechSource(stream, timeFrame); } catch (Exception exception) { this.LogError(exception); } }
public List <WordResult> Align(FileInfo audioUrl, List <string> sentenceTranscript) { var transcript = SentenceToWords(sentenceTranscript); var aligner = new LongTextAligner(transcript, TupleSize); var alignedWords = new Dictionary <int, WordResult>(); var ranges = new LinkedList <Range>(); //var texts = new ArrayDeque(); //var timeFrames = new ArrayDeque(); var texts = new LinkedList <List <string> >(); var timeFrames = new LinkedList <TimeFrame>(); ranges.AddLast(new Range(0, transcript.Count)); texts.Offer(transcript); TimeFrame totalTimeFrame = TimeFrame.Infinite; timeFrames.Offer(totalTimeFrame); long lastFrame = TimeFrame.Infinite.End; for (int i = 0; i < 4; i++) { if (i == 3) { _context.SetLocalProperty("decoder->searchManager", "alignerSearchManager"); } while (texts.Count != 0) { Debug.Assert(texts.Count == ranges.Count); Debug.Assert(texts.Count == timeFrames.Count); var text = texts.Poll(); var frame = timeFrames.Poll(); var range = ranges.Poll(); if (i < 3 && texts.Count < MinLmAlignSize) { continue; } this.LogInfo("Aligning frame " + frame + " to text " + text + " range " + range); if (i < 3) { _languageModel.SetText(text); } _recognizer.Allocate(); if (i == 3) { _grammar.SetWords(text); } _context.SetSpeechSource(audioUrl.OpenRead(), frame); var hypothesis = new List <WordResult>(); Result speechResult; while (null != (speechResult = _recognizer.Recognize())) { hypothesis.AddRange(speechResult.GetTimedBestResult(false)); } if (i == 0) { if (hypothesis.Count > 0) { lastFrame = hypothesis[hypothesis.Count - 1].TimeFrame.End; } } var words = new List <string>(); foreach (WordResult wr in hypothesis) { words.Add(wr.Word.Spelling); } int[] alignment = aligner.Align(words, range); List <WordResult> results = hypothesis; this.LogInfo("Decoding result is " + results); // dumpAlignment(transcript, alignment, results); DumpAlignmentStats(transcript, alignment, results); for (int j = 0; j < alignment.Length; j++) { if (alignment[j] != -1) { alignedWords.Add(alignment[j], hypothesis[j]); } } _recognizer.Deallocate(); } ScheduleNextAlignment(transcript, alignedWords, ranges, texts, timeFrames, lastFrame); } return(new List <WordResult>(alignedWords.Values)); }
/// <summary> /// Starts recognition process. /// Recognition process is paused until the next call to startRecognition. /// </summary> /// <param name="clear">clear cached microphone data.</param> public void StartRecognition(bool clear) { Recognizer.Allocate(); _microphone.StartRecording(); }