/// <summary> /// Destroy a streaming state without decoding the computed logits. /// This can be used if you no longer need the result of an ongoing streaming /// inference and don't want to perform a costly decode operation. /// </summary> public unsafe void DiscardStream() { NativeImp.DS_DiscardStream(ref _streamingStatePP); }
/// <summary> /// Use the DeepSpeech model to perform Speech-To-Text. /// </summary> /// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate.</param> /// <param name="aBufferSize">The number of samples in the audio signal.</param> /// <param name="aSampleRate">The sample-rate of the audio signal.</param> /// <returns>The extended metadata. The user is responsible for freeing the struct. Returns NULL on error.</returns> public unsafe Models.Metadata SpeechToTextWithMetadata(short[] aBuffer, uint aBufferSize, uint aSampleRate) { return(NativeImp.DS_SpeechToTextWithMetadata(_modelStatePP, aBuffer, aBufferSize, aSampleRate).PtrToMetadata()); }
/// <summary> /// Closes the ongoing streaming inference, returns the STT result over the whole audio signal. /// </summary> /// <returns>The extended metadata. The user is responsible for freeing the struct.</returns> public unsafe Models.Metadata FinishStreamWithMetadata() { return(NativeImp.DS_FinishStreamWithMetadata(_streamingStatePP).PtrToMetadata()); }
/// <summary> /// Prints the versions of Tensorflow and DeepSpeech. /// </summary> public unsafe void PrintVersions() { NativeImp.DS_PrintVersions(); }
/// <summary> /// Set beam width value used by the model. /// </summary> /// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width value generates better results at the cost of decoding time.</param> /// <exception cref="ArgumentException">Thrown on failure.</exception> public unsafe void SetModelBeamWidth(uint aBeamWidth) { var resultCode = NativeImp.DS_SetModelBeamWidth(_modelStatePP, aBeamWidth); EvaluateResultCode(resultCode); }
/// <summary> /// Feeds audio samples to an ongoing streaming inference. /// </summary> /// <param name="aBuffer">An array of 16-bit, mono raw audio samples at the appropriate sample rate.</param> public unsafe void FeedAudioContent(short[] aBuffer, uint aBufferSize) { NativeImp.DS_FeedAudioContent(_streamingStatePP, aBuffer, aBufferSize); }
/// <summary> /// Frees associated resources and destroys models objects. /// </summary> public unsafe void Dispose() { NativeImp.DS_FreeModel(_modelStatePP); }
/// <summary> /// Use the DeepSpeech model to perform Speech-To-Text, return results including metadata. /// </summary> /// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param> /// <param name="aBufferSize">The number of samples in the audio signal.</param> /// <param name="aNumResults">Maximum number of candidate transcripts to return. Returned list might be smaller than this.</param> /// <returns>The extended metadata. Returns NULL on error.</returns> public unsafe Metadata SpeechToTextWithMetadata(short[] aBuffer, uint aBufferSize, uint aNumResults) { return(NativeImp.DS_SpeechToTextWithMetadata(_modelStatePP, aBuffer, aBufferSize, aNumResults).PtrToMetadata()); }
/// <summary> /// Destroy a streaming state without decoding the computed logits. /// This can be used if you no longer need the result of an ongoing streaming /// inference and don't want to perform a costly decode operation. /// </summary> public unsafe void FreeStream() { NativeImp.DS_FreeStream(ref _streamingStatePP); }
/// <summary> /// Free a DeepSpeech allocated Metadata struct /// </summary> public unsafe void FreeMetadata(IntPtr intPtr) { NativeImp.DS_FreeMetadata(intPtr); }
/// <summary> /// Creates a new streaming inference state. /// </summary> /// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception> public unsafe void CreateStream() { var resultCode = NativeImp.DS_CreateStream(_modelStatePP, ref _streamingStatePP); EvaluateResultCode(resultCode); }
/// <summary> /// Frees associated resources and destroys models objects. /// </summary> public unsafe void Dispose() { NativeImp.DS_DestroyModel(_modelStatePP); }
/// <summary> /// Use the DeepSpeech model to perform Speech-To-Text. /// </summary> /// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate.</param> /// <param name="aBufferSize">The number of samples in the audio signal.</param> /// <param name="aSampleRate">The sample-rate of the audio signal.</param> /// <returns>The STT result. The user is responsible for freeing the string. Returns NULL on error.</returns> public unsafe string SpeechToText(short[] aBuffer, uint aBufferSize, uint aSampleRate) { return(NativeImp.DS_SpeechToText(_modelStatePP, aBuffer, aBufferSize, aSampleRate)); }
/// <summary> /// Destroy a streaming state without decoding the computed logits. /// This can be used if you no longer need the result of an ongoing streaming /// inference and don't want to perform a costly decode operation. /// </summary> public unsafe void FreeStream(DeepSpeechStream stream) { NativeImp.DS_FreeStream(stream.GetNativePointer()); stream.Dispose(); }
/// <summary> /// Disable decoding using an external scorer. /// </summary> /// <exception cref="ArgumentException">Thrown when an external scorer is not enabled.</exception> public unsafe void DisableExternalScorer() { var resultCode = NativeImp.DS_DisableExternalScorer(_modelStatePP); EvaluateResultCode(resultCode); }
/// <summary> /// Use the DeepSpeech model to perform Speech-To-Text. /// </summary> /// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param> /// <param name="aBufferSize">The number of samples in the audio signal.</param> /// <returns>The STT result. Returns NULL on error.</returns> public unsafe string SpeechToText(short[] aBuffer, uint aBufferSize) { return(NativeImp.DS_SpeechToText(_modelStatePP, aBuffer, aBufferSize).PtrToString()); }
/// <summary> /// Feeds audio samples to an ongoing streaming inference. /// </summary> /// <param name="stream">Instance of the stream to feed the data.</param> /// <param name="aBuffer">An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on).</param> public unsafe void FeedAudioContent(DeepSpeechStream stream, short[] aBuffer, uint aBufferSize) { NativeImp.DS_FeedAudioContent(stream.GetNativePointer(), aBuffer, aBufferSize); }
/// <summary> /// Get beam width value used by the model. If SetModelBeamWidth was not /// called before, will return the default value loaded from the model file. /// </summary> /// <returns>Beam width value used by the model.</returns> public unsafe uint GetModelBeamWidth() { return(NativeImp.DS_GetModelBeamWidth(_modelStatePP)); }
/// <summary> /// Closes the ongoing streaming inference, returns the STT result over the whole audio signal. /// </summary> /// <param name="stream">Instance of the stream to finish.</param> /// <returns>The STT result.</returns> public unsafe string FinishStream(DeepSpeechStream stream) { return(NativeImp.DS_FinishStream(stream.GetNativePointer()).PtrToString()); }
/// <summary> /// Return the sample rate expected by the model. /// </summary> /// <returns>Sample rate.</returns> public unsafe int GetModelSampleRate() { return(NativeImp.DS_GetModelSampleRate(_modelStatePP)); }
/// <summary> /// Closes the ongoing streaming inference, returns the STT result over the whole audio signal, including metadata. /// </summary> /// <param name="stream">Instance of the stream to finish.</param> /// <param name="aNumResults">Maximum number of candidate transcripts to return. Returned list might be smaller than this.</param> /// <returns>The extended metadata result.</returns> public unsafe Metadata FinishStreamWithMetadata(DeepSpeechStream stream, uint aNumResults) { return(NativeImp.DS_FinishStreamWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata()); }
/// <summary> /// Closes the ongoing streaming inference, returns the STT result over the whole audio signal. /// </summary> /// <returns>The STT result. The user is responsible for freeing the string.</returns> public unsafe string FinishStream() { return(NativeImp.DS_FinishStream(_streamingStatePP).PtrToString()); }
/// <summary> /// Computes the intermediate decoding of an ongoing streaming inference. /// </summary> /// <param name="stream">Instance of the stream to decode.</param> /// <returns>The STT intermediate result.</returns> public unsafe string IntermediateDecode(DeepSpeechStream stream) { return(NativeImp.DS_IntermediateDecode(stream.GetNativePointer()).PtrToString()); }
/// <summary> /// Computes the intermediate decoding of an ongoing streaming inference. This is an expensive process as the decoder implementation isn't /// currently capable of streaming, so it always starts from the beginning of the audio. /// </summary> /// <returns>The STT intermediate result. The user is responsible for freeing the string.</returns> public unsafe string IntermediateDecode() { return(NativeImp.DS_IntermediateDecode(_streamingStatePP)); }
/// <summary> /// Computes the intermediate decoding of an ongoing streaming inference, including metadata. /// </summary> /// <param name="stream">Instance of the stream to decode.</param> /// <param name="aNumResults">Maximum number of candidate transcripts to return. Returned list might be smaller than this.</param> /// <returns>The STT intermediate result.</returns> public unsafe Metadata IntermediateDecodeWithMetadata(DeepSpeechStream stream, uint aNumResults) { return(NativeImp.DS_IntermediateDecodeWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata()); }
/// <summary> /// Creates a new streaming inference state. /// </summary> /// <param name="aPreAllocFrames">Number of timestep frames to reserve. /// One timestep is equivalent to two window lengths(20ms). /// If set to 0 we reserve enough frames for 3 seconds of audio(150).</param> /// <param name="aSampleRate">The sample-rate of the audio signal</param> /// <returns>Zero for success, non-zero on failure</returns> public unsafe int SetupStream(uint aPreAllocFrames, uint aSampleRate) { return(NativeImp.DS_SetupStream(_modelStatePP, aPreAllocFrames, aSampleRate, ref _streamingStatePP)); }
/// <summary> /// Return version of this library. The returned version is a semantic version /// (SemVer 2.0.0). /// </summary> public unsafe string Version() { return(NativeImp.DS_Version().PtrToString()); }
/// <summary> /// Free a DeepSpeech allocated string /// </summary> public unsafe void FreeString(IntPtr intPtr) { NativeImp.DS_FreeString(intPtr); }
/// <summary> /// Creates a new streaming inference state. /// </summary> /// <param name="aSampleRate">The sample-rate of the audio signal</param> /// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception> public unsafe void SetupStream(uint aSampleRate) { var resultCode = NativeImp.DS_SetupStream(_modelStatePP, aSampleRate, ref _streamingStatePP); EvaluateResultCode(resultCode); }