Beispiel #1
0
        /// <summary>
        /// Processes a given audio data and returns its transcription.
        /// </summary>
        /// <param name="pcm">
        /// Audio data. A frame of audio samples. The number of samples per frame can be attained by calling `pv_cheetah_frame_length()`.
        /// The incoming audio needs to have a sample rate equal to `pv_sample_rate()` and be 16-bit linearly-encoded.Cheetah operates on single-channel audio.
        /// </param>
        /// <returns>
        /// Inferred transcription.
        /// </returns>
        public CheetahTranscript Process(Int16[] pcm)
        {
            if (pcm.Length == 0 | pcm == null)
            {
                throw new CheetahInvalidArgumentException("Input audio frame is empty");
            }

            if (pcm.Length != FrameLength)
            {
                throw new CheetahInvalidArgumentException($"Input audio frame size ({pcm.Length}) was not the size specified by Cheetah engine ({FrameLength}). " +
                                                          $"Use cheetah.FrameLength to get the correct size.");
            }

            IntPtr   transcriptPtr = IntPtr.Zero;
            bool     isEndpoint    = false;
            PvStatus status        = pv_cheetah_process(_libraryPointer, pcm, out transcriptPtr, out isEndpoint);

            if (status != PvStatus.SUCCESS)
            {
                throw PvStatusToException(status, "Cheetah failed to process the audio frame.");
            }

            string transcript = Marshal.PtrToStringAnsi(transcriptPtr);

            pv_free(transcriptPtr);
            return(new CheetahTranscript(transcript, isEndpoint));
        }
Beispiel #2
0
        /// <summary>
        /// Creates an instance of the Cheetah Speech-to-Text engine.
        /// </summary>
        /// <param name="accessKey">AccessKey obtained from Picovoice Console (https://console.picovoice.ai/).</param>
        /// <param name="modelPath">
        /// Absolute path to the file containing model parameters. If not set it will be set to the
        /// default location.
        /// </param>
        /// <param name="endpointDurationSec">
        /// Duration of endpoint in seconds. A speech endpoint is detected when there is a segment of audio(with a duration specified herein) after
        /// an utterance without any speech in it.Set to `0` to disable
        /// </param>
        private Cheetah(
            string accessKey,
            string modelPath,
            float endpointDurationSec = 1.0f)
        {
            if (string.IsNullOrEmpty(accessKey))
            {
                throw new CheetahInvalidArgumentException("No AccessKey provided to Cheetah");
            }

            if (!File.Exists(modelPath))
            {
                throw new CheetahIOException($"Couldn't find model file at '{modelPath}'");
            }

            if (endpointDurationSec < 0)
            {
                throw new CheetahInvalidArgumentException("`endpointDurationSec` must be either `0` or a positive number");
            }

            PvStatus status = pv_cheetah_init(
                accessKey,
                modelPath,
                endpointDurationSec,
                out _libraryPointer);

            if (status != PvStatus.SUCCESS)
            {
                throw PvStatusToException(status);
            }

            Version     = Marshal.PtrToStringAnsi(pv_cheetah_version());
            SampleRate  = pv_sample_rate();
            FrameLength = pv_cheetah_frame_length();
        }
Beispiel #3
0
        /// <summary>
        ///  Marks the end of the audio stream, flushes internal state of the object, and returns any remaining transcript.
        /// </summary>
        /// <returns>
        /// Inferred transcription.
        /// </returns>
        public CheetahTranscript Flush()
        {
            IntPtr   transcriptPtr = IntPtr.Zero;
            PvStatus status        = pv_cheetah_flush(_libraryPointer, out transcriptPtr);

            if (status != PvStatus.SUCCESS)
            {
                throw PvStatusToException(status, "Cheetah failed to process the audio frame.");
            }

            string transcript = Marshal.PtrToStringAnsi(transcriptPtr);

            pv_free(transcriptPtr);
            return(new CheetahTranscript(transcript, false));
        }
Beispiel #4
0
        /// <summary>
        /// Coverts status codes to relavent .NET exceptions
        /// </summary>
        /// <param name="status">Picovoice library status code.</param>
        /// <returns>.NET exception</returns>
        private static Exception PvStatusToException(PvStatus status, string message = "")
        {
            switch (status)
            {
            case PvStatus.OUT_OF_MEMORY:
                return(new CheetahMemoryException(message));

            case PvStatus.IO_ERROR:
                return(new CheetahIOException(message));

            case PvStatus.INVALID_ARGUMENT:
                return(new CheetahInvalidArgumentException(message));

            case PvStatus.STOP_ITERATION:
                return(new CheetahStopIterationException(message));

            case PvStatus.KEY_ERROR:
                return(new CheetahKeyException(message));

            case PvStatus.INVALID_STATE:
                return(new CheetahInvalidStateException(message));

            case PvStatus.RUNTIME_ERROR:
                return(new CheetahRuntimeException(message));

            case PvStatus.ACTIVATION_ERROR:
                return(new CheetahActivationException(message));

            case PvStatus.ACTIVATION_LIMIT_REACHED:
                return(new CheetahActivationLimitException(message));

            case PvStatus.ACTIVATION_THROTTLED:
                return(new CheetahActivationThrottledException(message));

            case PvStatus.ACTIVATION_REFUSED:
                return(new CheetahActivationRefusedException(message));

            default:
                return(new CheetahException("Unmapped error code returned from Cheetah."));
            }
        }