private void startSpeechRecognition() { isFeeding = true; stream = client.CreateStream(); var feedCount = 0; var decodeRate = 100; // decode every 100 feeds Task.Run(async() => { while (isFeeding || !bufferQueue.IsEmpty) { if (!bufferQueue.IsEmpty && bufferQueue.TryDequeue(out short[] buffer)) { client.FeedAudioContent(stream, buffer, Convert.ToUInt32(buffer.Length)); if (++feedCount % decodeRate == 0) { var transcription = client.IntermediateDecode(stream); await Dispatcher.RunAsync(Windows.UI.Core.CoreDispatcherPriority.Normal, () => { result.Text = transcription; }); } } } }); }
/// <summary> /// Destroy a streaming state without decoding the computed logits. /// This can be used if you no longer need the result of an ongoing streaming /// inference and don't want to perform a costly decode operation. /// </summary> public unsafe void FreeStream(MozillaVoiceSttStream stream) { NativeImp.STT_FreeStream(stream.GetNativePointer()); stream.Dispose(); }
/// <summary> /// Computes the intermediate decoding of an ongoing streaming inference, including metadata. /// </summary> /// <param name="stream">Instance of the stream to decode.</param> /// <param name="aNumResults">Maximum number of candidate transcripts to return. Returned list might be smaller than this.</param> /// <returns>The STT intermediate result.</returns> public unsafe Metadata IntermediateDecodeWithMetadata(MozillaVoiceSttStream stream, uint aNumResults) { return(NativeImp.STT_IntermediateDecodeWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata()); }
/// <summary> /// Computes the intermediate decoding of an ongoing streaming inference. /// </summary> /// <param name="stream">Instance of the stream to decode.</param> /// <returns>The STT intermediate result.</returns> public unsafe string IntermediateDecode(MozillaVoiceSttStream stream) { return(NativeImp.STT_IntermediateDecode(stream.GetNativePointer()).PtrToString()); }
/// <summary> /// Closes the ongoing streaming inference, returns the STT result over the whole audio signal, including metadata. /// </summary> /// <param name="stream">Instance of the stream to finish.</param> /// <param name="aNumResults">Maximum number of candidate transcripts to return. Returned list might be smaller than this.</param> /// <returns>The extended metadata result.</returns> public unsafe Metadata FinishStreamWithMetadata(MozillaVoiceSttStream stream, uint aNumResults) { return(NativeImp.STT_FinishStreamWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata()); }
/// <summary> /// Closes the ongoing streaming inference, returns the STT result over the whole audio signal. /// </summary> /// <param name="stream">Instance of the stream to finish.</param> /// <returns>The STT result.</returns> public unsafe string FinishStream(MozillaVoiceSttStream stream) { return(NativeImp.STT_FinishStream(stream.GetNativePointer()).PtrToString()); }
/// <summary> /// Feeds audio samples to an ongoing streaming inference. /// </summary> /// <param name="stream">Instance of the stream to feed the data.</param> /// <param name="aBuffer">An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on).</param> public unsafe void FeedAudioContent(MozillaVoiceSttStream stream, short[] aBuffer, uint aBufferSize) { NativeImp.STT_FeedAudioContent(stream.GetNativePointer(), aBuffer, aBufferSize); }