public void TestProcess() { using Cheetah cheetah = Cheetah.Create(ACCESS_KEY, null, 0.2f); string testAudioPath = Path.Combine(_relativeDir, "resources/audio_samples/test.wav"); List <short> pcm = GetPcmFromFile(testAudioPath, cheetah.SampleRate); int frameLen = cheetah.FrameLength; int framecount = (int)Math.Floor((float)(pcm.Count / frameLen)); string transcript = ""; bool isEndpoint = false; for (int i = 0; i < framecount; i++) { int start = i * cheetah.FrameLength; List <short> frame = pcm.GetRange(start, frameLen); CheetahTranscript transcriptObj = cheetah.Process(frame.ToArray()); transcript += transcriptObj.Transcript; isEndpoint = transcriptObj.IsEndpoint; } CheetahTranscript finalTranscriptObj = cheetah.Flush(); transcript += finalTranscriptObj.Transcript; Assert.AreEqual(transcript, REF_TRANSCRIPT); Assert.IsTrue(isEndpoint); }
public void TestFrameLength() { using Cheetah cheetah = Cheetah.Create(ACCESS_KEY); int Num = 0; Assert.IsTrue(int.TryParse(cheetah.FrameLength.ToString(), out Num), "Cheetah did not return a valid frame length."); }
public void TestSampleRate() { using Cheetah cheetah = Cheetah.Create(ACCESS_KEY); int Num = 0; Assert.IsTrue(int.TryParse(cheetah.SampleRate.ToString(), out Num), "Cheetah did not return a valid sample rate."); }
public void TestCustomModel() { string testModelPath = Path.Combine(_relativeDir, "lib/common/cheetah_params.pv"); using Cheetah cheetah = Cheetah.Create(ACCESS_KEY, testModelPath); string testAudioPath = Path.Combine(_relativeDir, "resources/audio_samples/test.wav"); List <short> pcm = GetPcmFromFile(testAudioPath, cheetah.SampleRate); int frameLen = cheetah.FrameLength; int framecount = (int)Math.Floor((float)(pcm.Count / frameLen)); string transcript = ""; for (int i = 0; i < framecount; i++) { int start = i * cheetah.FrameLength; List <short> frame = pcm.GetRange(start, frameLen); CheetahTranscript transcriptObj = cheetah.Process(frame.ToArray()); transcript += transcriptObj.Transcript; } CheetahTranscript finalTranscriptObj = cheetah.Flush(); transcript += finalTranscriptObj.Transcript; Assert.AreEqual(transcript, REF_TRANSCRIPT); }
/// <summary> /// Creates an input audio stream and instantiates an instance of Cheetah object. /// </summary> /// <param name="accessKey">AccessKey obtained from Picovoice Console (https://console.picovoice.ai/).</param> /// <param name="modelPath">Absolute path to the file containing model parameters. If not set it will be set to the default location.</param> /// <param name="endpointDurationSec"> /// Duration of endpoint in seconds. A speech endpoint is detected when there is a segment of audio(with a duration specified herein) after /// an utterance without any speech in it. Set to `0` to disable /// </param> /// <param name="audioDeviceIndex">Optional argument. If provided, audio is recorded from this input device. Otherwise, the default audio input device is used.</param> public static void RunDemo( string accessKey, string modelPath, float endpointDurationSec, int audioDeviceIndex) { Cheetah cheetah = null; cheetah = Cheetah.Create( accessKey, modelPath, endpointDurationSec); PvRecorder recorder = PvRecorder.Create(audioDeviceIndex, cheetah.FrameLength); recorder.Start(); List <short> audioFrame = new List <short>(); Console.CancelKeyPress += (s, o) => { Console.WriteLine("Stopping..."); }; Console.WriteLine($"\nUsing device: {recorder.SelectedDevice}"); Console.WriteLine(">>> Press `CTRL+C` to exit:\n"); try { while (true) { short[] pcm = recorder.Read(); CheetahTranscript transcriptObj = cheetah.Process(pcm); if (!String.IsNullOrEmpty(transcriptObj.Transcript)) { Console.Write(transcriptObj.Transcript); } if (transcriptObj.IsEndpoint) { CheetahTranscript finalTranscriptObj = cheetah.Flush(); Console.WriteLine(finalTranscriptObj.Transcript); } } } catch (CheetahActivationLimitException) { Console.WriteLine($"AccessKey '{accessKey}' has reached its processing limit."); } finally { cheetah.Dispose(); recorder.Dispose(); } }
/// <summary> /// Reads through input file and prints the transcription returned by Cheetah. /// </summary> /// <param name="inputAudioPath">Required argument. Absolute path to input audio file.</param> /// <param name="accessKey">AccessKey obtained from Picovoice Console (https://console.picovoice.ai/).</param> /// <param name="modelPath">Absolute path to the file containing model parameters. If not set it will be set to the default location.</param> /// <param name="endpointDurationSec"> /// Duration of endpoint in seconds. A speech endpoint is detected when there is a segment of audio(with a duration specified herein) after /// an utterance without any speech in it. Set to `0` to disable /// </param> /// </param> public static void RunDemo( string accessKey, string inputAudioPath, string modelPath) { // init Cheetah speech-to-text engine using Cheetah cheetah = Cheetah.Create( accessKey, modelPath); using BinaryReader reader = new BinaryReader(File.Open(inputAudioPath, FileMode.Open)); ValidateWavFile(reader, cheetah.SampleRate, 16, out short numChannels); short[] cheetahFrame = new short[cheetah.FrameLength]; int frameIndex = 0; while (reader.BaseStream.Position != reader.BaseStream.Length) { cheetahFrame[frameIndex++] = reader.ReadInt16(); if (frameIndex == cheetahFrame.Length) { try { CheetahTranscript transcriptObj = cheetah.Process(cheetahFrame); if (!String.IsNullOrEmpty(transcriptObj.Transcript)) { Console.Write(transcriptObj.Transcript); } } catch (CheetahActivationLimitException) { cheetah.Dispose(); Console.WriteLine($"AccessKey '{accessKey}' has reached its processing limit."); } frameIndex = 0; } // skip right channel if (numChannels == 2) { reader.ReadInt16(); } } CheetahTranscript finalTranscriptObj = cheetah.Flush(); string transcript = finalTranscriptObj.Transcript; if (!String.IsNullOrEmpty(transcript)) { Console.WriteLine(transcript); } }
public void TestVersion() { using Cheetah cheetah = Cheetah.Create(ACCESS_KEY); Assert.IsFalse(string.IsNullOrWhiteSpace(cheetah?.Version), "Cheetah did not return a valid version number."); }