private void startSpeechRecognition()
        {
            isFeeding = true;
            stream    = client.CreateStream();
            var feedCount  = 0;
            var decodeRate = 100; // decode every 100 feeds

            Task.Run(async() =>
            {
                while (isFeeding || !bufferQueue.IsEmpty)
                {
                    if (!bufferQueue.IsEmpty && bufferQueue.TryDequeue(out short[] buffer))
                    {
                        client.FeedAudioContent(stream, buffer, Convert.ToUInt32(buffer.Length));

                        if (++feedCount % decodeRate == 0)
                        {
                            var transcription = client.IntermediateDecode(stream);
                            await Dispatcher.RunAsync(Windows.UI.Core.CoreDispatcherPriority.Normal, () =>
                            {
                                result.Text = transcription;
                            });
                        }
                    }
                }
            });
        }
Exemple #2
0
 /// <summary>
 /// Destroy a streaming state without decoding the computed logits.
 /// This can be used if you no longer need the result of an ongoing streaming
 /// inference and don't want to perform a costly decode operation.
 /// </summary>
 public unsafe void FreeStream(MozillaVoiceSttStream stream)
 {
     NativeImp.STT_FreeStream(stream.GetNativePointer());
     stream.Dispose();
 }
Exemple #3
0
 /// <summary>
 /// Computes the intermediate decoding of an ongoing streaming inference, including metadata.
 /// </summary>
 /// <param name="stream">Instance of the stream to decode.</param>
 /// <param name="aNumResults">Maximum number of candidate transcripts to return. Returned list might be smaller than this.</param>
 /// <returns>The STT intermediate result.</returns>
 public unsafe Metadata IntermediateDecodeWithMetadata(MozillaVoiceSttStream stream, uint aNumResults)
 {
     return(NativeImp.STT_IntermediateDecodeWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata());
 }
Exemple #4
0
 /// <summary>
 /// Computes the intermediate decoding of an ongoing streaming inference.
 /// </summary>
 /// <param name="stream">Instance of the stream to decode.</param>
 /// <returns>The STT intermediate result.</returns>
 public unsafe string IntermediateDecode(MozillaVoiceSttStream stream)
 {
     return(NativeImp.STT_IntermediateDecode(stream.GetNativePointer()).PtrToString());
 }
Exemple #5
0
 /// <summary>
 /// Closes the ongoing streaming inference, returns the STT result over the whole audio signal, including metadata.
 /// </summary>
 /// <param name="stream">Instance of the stream to finish.</param>
 /// <param name="aNumResults">Maximum number of candidate transcripts to return. Returned list might be smaller than this.</param>
 /// <returns>The extended metadata result.</returns>
 public unsafe Metadata FinishStreamWithMetadata(MozillaVoiceSttStream stream, uint aNumResults)
 {
     return(NativeImp.STT_FinishStreamWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata());
 }
Exemple #6
0
 /// <summary>
 /// Closes the ongoing streaming inference, returns the STT result over the whole audio signal.
 /// </summary>
 /// <param name="stream">Instance of the stream to finish.</param>
 /// <returns>The STT result.</returns>
 public unsafe string FinishStream(MozillaVoiceSttStream stream)
 {
     return(NativeImp.STT_FinishStream(stream.GetNativePointer()).PtrToString());
 }
Exemple #7
0
 /// <summary>
 /// Feeds audio samples to an ongoing streaming inference.
 /// </summary>
 /// <param name="stream">Instance of the stream to feed the data.</param>
 /// <param name="aBuffer">An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on).</param>
 public unsafe void FeedAudioContent(MozillaVoiceSttStream stream, short[] aBuffer, uint aBufferSize)
 {
     NativeImp.STT_FeedAudioContent(stream.GetNativePointer(), aBuffer, aBufferSize);
 }