Esempio n. 1
0
        /**
         * /// Decodes the batch of audio files
         */
        public void decode(String batchFile)
        {
            BatchItem batchItem;
            int       count = 0;

            try {
                recognizer.Allocate();
                setBatchFile(batchFile);

                batchManager.Start();
                this.LogInfo("BatchDecoder: decoding files in "
                             + batchManager.Filename);

                while (count < utteranceId &&
                       (batchItem = batchManager.GetNextItem()) != null)
                {
                    setInputStream(batchItem.Filename);
                    Result result = recognizer.Recognize(batchItem.Transcript);
                    this.LogInfo("File  : " + batchItem.Filename);
                    this.LogInfo("Result: " + result);
                    count++;
                }
                batchManager.Stop();
                recognizer.Deallocate();
            } catch (IOException io)
            {
                this.LogInfo("I/O error during decoding: " + io.Message);
                throw io;
            }
            this.LogInfo("BatchDecoder: " + count + " files decoded");
        }
 /// <summary>
 /// Starts recognition process.
 /// </summary>
 /// <param name="stream"></param>
 /// <param name="timeFrame"></param>
 public void StartRecognition(Stream stream, TimeFrame timeFrame)
 {
     try
     {
         Recognizer.Allocate();
         Context.SetSpeechSource(stream, timeFrame);
     }
     catch (Exception exception)
     {
         this.LogError(exception);
     }
 }
Esempio n. 3
0
        public List <WordResult> Align(FileInfo audioUrl, List <string> sentenceTranscript)
        {
            var transcript = SentenceToWords(sentenceTranscript);

            var aligner      = new LongTextAligner(transcript, TupleSize);
            var alignedWords = new Dictionary <int, WordResult>();
            var ranges       = new LinkedList <Range>();
            //var texts = new ArrayDeque();
            //var timeFrames = new ArrayDeque();
            var texts      = new LinkedList <List <string> >();
            var timeFrames = new LinkedList <TimeFrame>();

            ranges.AddLast(new Range(0, transcript.Count));
            texts.Offer(transcript);
            TimeFrame totalTimeFrame = TimeFrame.Infinite;

            timeFrames.Offer(totalTimeFrame);
            long lastFrame = TimeFrame.Infinite.End;

            for (int i = 0; i < 4; i++)
            {
                if (i == 3)
                {
                    _context.SetLocalProperty("decoder->searchManager", "alignerSearchManager");
                }

                while (texts.Count != 0)
                {
                    Debug.Assert(texts.Count == ranges.Count);
                    Debug.Assert(texts.Count == timeFrames.Count);

                    var text  = texts.Poll();
                    var frame = timeFrames.Poll();
                    var range = ranges.Poll();

                    if (i < 3 && texts.Count < MinLmAlignSize)
                    {
                        continue;
                    }

                    this.LogInfo("Aligning frame " + frame + " to text " + text + " range " + range);

                    if (i < 3)
                    {
                        _languageModel.SetText(text);
                    }

                    _recognizer.Allocate();

                    if (i == 3)
                    {
                        _grammar.SetWords(text);
                    }

                    _context.SetSpeechSource(audioUrl.OpenRead(), frame);

                    var    hypothesis = new List <WordResult>();
                    Result speechResult;
                    while (null != (speechResult = _recognizer.Recognize()))
                    {
                        hypothesis.AddRange(speechResult.GetTimedBestResult(false));
                    }

                    if (i == 0)
                    {
                        if (hypothesis.Count > 0)
                        {
                            lastFrame = hypothesis[hypothesis.Count - 1].TimeFrame.End;
                        }
                    }

                    var words = new List <string>();
                    foreach (WordResult wr in hypothesis)
                    {
                        words.Add(wr.Word.Spelling);
                    }
                    int[]             alignment = aligner.Align(words, range);
                    List <WordResult> results   = hypothesis;
                    this.LogInfo("Decoding result is " + results);

                    // dumpAlignment(transcript, alignment, results);
                    DumpAlignmentStats(transcript, alignment, results);

                    for (int j = 0; j < alignment.Length; j++)
                    {
                        if (alignment[j] != -1)
                        {
                            alignedWords.Add(alignment[j], hypothesis[j]);
                        }
                    }

                    _recognizer.Deallocate();
                }
                ScheduleNextAlignment(transcript, alignedWords, ranges, texts, timeFrames, lastFrame);
            }
            return(new List <WordResult>(alignedWords.Values));
        }
 /// <summary>
 /// Starts recognition process.
 /// Recognition process is paused until the next call to startRecognition.
 /// </summary>
 /// <param name="clear">clear cached microphone data.</param>
 public void StartRecognition(bool clear)
 {
     Recognizer.Allocate();
     _microphone.StartRecording();
 }