Esempio n. 1
0
        public void TestProcess()
        {
            using Cheetah cheetah = Cheetah.Create(ACCESS_KEY, null, 0.2f);
            string       testAudioPath = Path.Combine(_relativeDir, "resources/audio_samples/test.wav");
            List <short> pcm           = GetPcmFromFile(testAudioPath, cheetah.SampleRate);

            int frameLen   = cheetah.FrameLength;
            int framecount = (int)Math.Floor((float)(pcm.Count / frameLen));

            string transcript = "";
            bool   isEndpoint = false;

            for (int i = 0; i < framecount; i++)
            {
                int               start         = i * cheetah.FrameLength;
                List <short>      frame         = pcm.GetRange(start, frameLen);
                CheetahTranscript transcriptObj = cheetah.Process(frame.ToArray());
                transcript += transcriptObj.Transcript;
                isEndpoint  = transcriptObj.IsEndpoint;
            }
            CheetahTranscript finalTranscriptObj = cheetah.Flush();

            transcript += finalTranscriptObj.Transcript;
            Assert.AreEqual(transcript, REF_TRANSCRIPT);
            Assert.IsTrue(isEndpoint);
        }
Esempio n. 2
0
        public void TestCustomModel()
        {
            string testModelPath = Path.Combine(_relativeDir, "lib/common/cheetah_params.pv");

            using Cheetah cheetah = Cheetah.Create(ACCESS_KEY, testModelPath);
            string       testAudioPath = Path.Combine(_relativeDir, "resources/audio_samples/test.wav");
            List <short> pcm           = GetPcmFromFile(testAudioPath, cheetah.SampleRate);

            int frameLen   = cheetah.FrameLength;
            int framecount = (int)Math.Floor((float)(pcm.Count / frameLen));

            string transcript = "";

            for (int i = 0; i < framecount; i++)
            {
                int               start         = i * cheetah.FrameLength;
                List <short>      frame         = pcm.GetRange(start, frameLen);
                CheetahTranscript transcriptObj = cheetah.Process(frame.ToArray());
                transcript += transcriptObj.Transcript;
            }
            CheetahTranscript finalTranscriptObj = cheetah.Flush();

            transcript += finalTranscriptObj.Transcript;
            Assert.AreEqual(transcript, REF_TRANSCRIPT);
        }
Esempio n. 3
0
        /// <summary>
        /// Creates an input audio stream and instantiates an instance of Cheetah object.
        /// </summary>
        /// <param name="accessKey">AccessKey obtained from Picovoice Console (https://console.picovoice.ai/).</param>
        /// <param name="modelPath">Absolute path to the file containing model parameters. If not set it will be set to the default location.</param>
        /// <param name="endpointDurationSec">
        /// Duration of endpoint in seconds. A speech endpoint is detected when there is a segment of audio(with a duration specified herein) after
        /// an utterance without any speech in it. Set to `0` to disable
        /// </param>
        /// <param name="audioDeviceIndex">Optional argument. If provided, audio is recorded from this input device. Otherwise, the default audio input device is used.</param>
        public static void RunDemo(
            string accessKey,
            string modelPath,
            float endpointDurationSec,
            int audioDeviceIndex)
        {
            Cheetah cheetah = null;

            cheetah = Cheetah.Create(
                accessKey,
                modelPath,
                endpointDurationSec);

            PvRecorder recorder = PvRecorder.Create(audioDeviceIndex, cheetah.FrameLength);

            recorder.Start();
            List <short> audioFrame = new List <short>();


            Console.CancelKeyPress += (s, o) =>
            {
                Console.WriteLine("Stopping...");
            };


            Console.WriteLine($"\nUsing device: {recorder.SelectedDevice}");
            Console.WriteLine(">>> Press `CTRL+C` to exit:\n");

            try
            {
                while (true)
                {
                    short[] pcm = recorder.Read();

                    CheetahTranscript transcriptObj = cheetah.Process(pcm);
                    if (!String.IsNullOrEmpty(transcriptObj.Transcript))
                    {
                        Console.Write(transcriptObj.Transcript);
                    }
                    if (transcriptObj.IsEndpoint)
                    {
                        CheetahTranscript finalTranscriptObj = cheetah.Flush();
                        Console.WriteLine(finalTranscriptObj.Transcript);
                    }
                }
            }
            catch (CheetahActivationLimitException)
            {
                Console.WriteLine($"AccessKey '{accessKey}' has reached its processing limit.");
            }
            finally
            {
                cheetah.Dispose();
                recorder.Dispose();
            }
        }
Esempio n. 4
0
        /// <summary>
        /// Reads through input file and prints the transcription returned by Cheetah.
        /// </summary>
        /// <param name="inputAudioPath">Required argument. Absolute path to input audio file.</param>
        /// <param name="accessKey">AccessKey obtained from Picovoice Console (https://console.picovoice.ai/).</param>
        /// <param name="modelPath">Absolute path to the file containing model parameters. If not set it will be set to the default location.</param>
        /// <param name="endpointDurationSec">
        /// Duration of endpoint in seconds. A speech endpoint is detected when there is a segment of audio(with a duration specified herein) after
        /// an utterance without any speech in it. Set to `0` to disable
        /// </param>
        /// </param>
        public static void RunDemo(
            string accessKey,
            string inputAudioPath,
            string modelPath)
        {
            // init Cheetah speech-to-text engine
            using Cheetah cheetah = Cheetah.Create(
                      accessKey,
                      modelPath);

            using BinaryReader reader = new BinaryReader(File.Open(inputAudioPath, FileMode.Open));
            ValidateWavFile(reader, cheetah.SampleRate, 16, out short numChannels);

            short[] cheetahFrame = new short[cheetah.FrameLength];
            int     frameIndex   = 0;

            while (reader.BaseStream.Position != reader.BaseStream.Length)
            {
                cheetahFrame[frameIndex++] = reader.ReadInt16();

                if (frameIndex == cheetahFrame.Length)
                {
                    try
                    {
                        CheetahTranscript transcriptObj = cheetah.Process(cheetahFrame);
                        if (!String.IsNullOrEmpty(transcriptObj.Transcript))
                        {
                            Console.Write(transcriptObj.Transcript);
                        }
                    }
                    catch (CheetahActivationLimitException)
                    {
                        cheetah.Dispose();
                        Console.WriteLine($"AccessKey '{accessKey}' has reached its processing limit.");
                    }

                    frameIndex = 0;
                }

                // skip right channel
                if (numChannels == 2)
                {
                    reader.ReadInt16();
                }
            }
            CheetahTranscript finalTranscriptObj = cheetah.Flush();
            string            transcript         = finalTranscriptObj.Transcript;

            if (!String.IsNullOrEmpty(transcript))
            {
                Console.WriteLine(transcript);
            }
        }