public void TestOutOfContext() { using Rhino r = SetUpClass(); int frameLen = r.FrameLength; string testAudioPath = Path.Combine(_relativeDir, "resources/audio_samples/test_out_of_context.wav"); List <short> data = GetPcmFromFile(testAudioPath, r.SampleRate); bool isFinalized = false; int framecount = (int)Math.Floor((float)(data.Count / frameLen)); var results = new List <int>(); for (int i = 0; i < framecount; i++) { int start = i * r.FrameLength; int count = r.FrameLength; List <short> frame = data.GetRange(start, count); isFinalized = r.Process(frame.ToArray()); if (isFinalized) { break; } } Assert.IsTrue(isFinalized, "Failed to finalize."); Inference inference = r.GetInference(); Assert.IsFalse(inference.IsUnderstood, "Shouldn't be able to understand."); }
/// <summary> /// Reads through input file and prints the inference result returned by Rhino. /// </summary> /// <param name="inputAudioPath">Required argument. Absolute path to input audio file.</param> /// <param name="contextPath">Required argument. Absolute path to the Rhino context file.</param> /// <param name="modelPath">Absolute path to the file containing model parameters. If not set it will be set to the default location.</param> /// <param name="sensitivity"> /// Inference sensitivity. It should be a number within [0, 1]. A higher sensitivity value results in /// fewer misses at the cost of (potentially) increasing the erroneous inference rate. If not set, the default value of 0.5 will be used. /// </param> public static void RunDemo(string inputAudioPath, string contextPath, string modelPath, float sensitivity) { // init rhino speech-to-intent engine using Rhino rhino = Rhino.Create(contextPath, modelPath, sensitivity); // open and validate wav file using BinaryReader reader = new BinaryReader(File.Open(inputAudioPath, FileMode.Open)); ValidateWavFile(reader, rhino.SampleRate, 16, out short numChannels); // read audio and send frames to rhino short[] rhinoFrame = new short[rhino.FrameLength]; int frameIndex = 0; while (reader.BaseStream.Position != reader.BaseStream.Length) { rhinoFrame[frameIndex++] = reader.ReadInt16(); if (frameIndex == rhinoFrame.Length) { bool isFinalized = rhino.Process(rhinoFrame); if (isFinalized) { Inference inference = rhino.GetInference(); if (inference.IsUnderstood) { Console.WriteLine("{"); Console.WriteLine($" intent : '{inference.Intent}'"); Console.WriteLine(" slots : {"); foreach (KeyValuePair <string, string> slot in inference.Slots) { Console.WriteLine($" {slot.Key} : '{slot.Value}'"); } Console.WriteLine(" }"); Console.WriteLine("}"); } else { Console.WriteLine("Didn't understand the command."); } return; } frameIndex = 0; } // skip right channel if (numChannels == 2) { reader.ReadInt16(); } } Console.WriteLine("Reached end of audio file before Rhino returned an inference."); }
private void RunTestCase( Rhino rhino, string audioFileName, bool isWithinContext, string expectedIntent = null, Dictionary <string, string> expectedSlots = null) { int frameLen = rhino.FrameLength; string testAudioPath = Path.Combine(_relativeDir, "resources/audio_samples", audioFileName); List <short> data = GetPcmFromFile(testAudioPath, rhino.SampleRate); bool isFinalized = false; int framecount = (int)Math.Floor((float)(data.Count / frameLen)); var results = new List <int>(); for (int i = 0; i < framecount; i++) { int start = i * rhino.FrameLength; int count = rhino.FrameLength; List <short> frame = data.GetRange(start, count); isFinalized = rhino.Process(frame.ToArray()); if (isFinalized) { break; } } Assert.IsTrue(isFinalized, "Failed to finalize."); Inference inference = rhino.GetInference(); if (isWithinContext) { Assert.IsTrue(inference.IsUnderstood, "Couldn't understand."); Assert.AreEqual(expectedIntent, inference.Intent, "Incorrect intent."); Assert.IsTrue(inference.Slots.All((keyValuePair) => expectedSlots.ContainsKey(keyValuePair.Key) && expectedSlots[keyValuePair.Key] == keyValuePair.Value)); } else { Assert.IsFalse(inference.IsUnderstood, "Shouldn't be able to understand."); } }
public void TestWithinContext() { using Rhino r = SetUpClass(); int frameLen = r.FrameLength; string testAudioPath = Path.Combine(_relativeDir, "resources/audio_samples/test_within_context.wav"); List <short> data = GetPcmFromFile(testAudioPath, r.SampleRate); bool isFinalized = false; int framecount = (int)Math.Floor((float)(data.Count / frameLen)); var results = new List <int>(); for (int i = 0; i < framecount; i++) { int start = i * r.FrameLength; int count = r.FrameLength; List <short> frame = data.GetRange(start, count); isFinalized = r.Process(frame.ToArray()); if (isFinalized) { break; } } Assert.IsTrue(isFinalized, "Failed to finalize."); Inference inference = r.GetInference(); Assert.IsTrue(inference.IsUnderstood, "Couldn't understand."); Assert.AreEqual("orderDrink", inference.Intent, "Incorrect intent."); Dictionary <string, string> expectedSlotValues = new Dictionary <string, string>() { { "size", "medium" }, { "numberOfShots", "double shot" }, { "coffeeDrink", "americano" }, { "milkAmount", "lots of milk" }, { "sugarAmount", "some sugar" }, }; Assert.IsTrue(inference.Slots.All((keyValuePair) => expectedSlotValues.ContainsKey(keyValuePair.Key) && expectedSlotValues[keyValuePair.Key] == keyValuePair.Value)); }
/// <summary> /// Creates an input audio stream, instantiates an instance of Rhino object, and infers the intent from spoken commands. /// </summary> /// <param name="contextPath"> /// Absolute path to file containing context model (file with `.rhn` extension). A context represents the set of /// expressions(spoken commands), intents, and intent arguments(slots) within a domain of interest. /// </param> /// <param name="modelPath">Absolute path to the file containing model parameters. If not set it will be set to the default location.</param> /// <param name="sensitivity"> /// Inference sensitivity. It should be a number within [0, 1]. A higher sensitivity value results in /// fewer misses at the cost of (potentially) increasing the erroneous inference rate. If not set, the default value of 0.5 will be used. /// </param> /// <param name="audioDeviceIndex">Optional argument. If provided, audio is recorded from this input device. Otherwise, the default audio input device is used.</param> /// <param name="outputPath">Optional argument. If provided, recorded audio will be stored in this location at the end of the run.</param> public static void RunDemo(string contextPath, string modelPath, float sensitivity, int?audioDeviceIndex = null, string outputPath = null) { Rhino rhino = null; BinaryWriter outputFileWriter = null; int totalSamplesWritten = 0; try { // init rhino speech-to-intent engine rhino = Rhino.Create(contextPath, modelPath, sensitivity); // open stream to output file if (!string.IsNullOrWhiteSpace(outputPath)) { outputFileWriter = new BinaryWriter(new FileStream(outputPath, FileMode.OpenOrCreate, FileAccess.Write)); WriteWavHeader(outputFileWriter, 1, 16, 16000, 0); } // choose audio device string deviceName = null; if (audioDeviceIndex != null) { List <string> captureDeviceList = ALC.GetStringList(GetEnumerationStringList.CaptureDeviceSpecifier).ToList(); if (captureDeviceList != null && audioDeviceIndex.Value < captureDeviceList.Count) { deviceName = captureDeviceList[audioDeviceIndex.Value]; } else { throw new ArgumentException("No input device found with the specified index. Use --show_audio_devices to show" + "available inputs", "--audio_device_index"); } } Console.WriteLine(rhino.ContextInfo); Console.WriteLine("Listening...\n"); // create and start recording short[] recordingBuffer = new short[rhino.FrameLength]; ALCaptureDevice captureDevice = ALC.CaptureOpenDevice(deviceName, 16000, ALFormat.Mono16, rhino.FrameLength * 2); { ALC.CaptureStart(captureDevice); while (!Console.KeyAvailable) { int samplesAvailable = ALC.GetAvailableSamples(captureDevice); if (samplesAvailable > rhino.FrameLength) { ALC.CaptureSamples(captureDevice, ref recordingBuffer[0], rhino.FrameLength); bool isFinalized = rhino.Process(recordingBuffer); if (isFinalized) { Inference inference = rhino.GetInference(); if (inference.IsUnderstood) { Console.WriteLine("{"); Console.WriteLine($" intent : '{inference.Intent}'"); Console.WriteLine(" slots : {"); foreach (KeyValuePair <string, string> slot in inference.Slots) { Console.WriteLine($" {slot.Key} : '{slot.Value}'"); } Console.WriteLine(" }"); Console.WriteLine("}"); } else { Console.WriteLine("Didn't understand the command."); } } if (outputFileWriter != null) { foreach (short sample in recordingBuffer) { outputFileWriter.Write(sample); } totalSamplesWritten += recordingBuffer.Length; } } Thread.Yield(); } // stop and clean up resources Console.WriteLine("Stopping..."); ALC.CaptureStop(captureDevice); ALC.CaptureCloseDevice(captureDevice); } } finally { if (outputFileWriter != null) { // write size to header and clean up WriteWavHeader(outputFileWriter, 1, 16, 16000, totalSamplesWritten); outputFileWriter.Flush(); outputFileWriter.Dispose(); } rhino?.Dispose(); } }
/// <summary> /// Creates an input audio stream, instantiates an instance of Rhino object, and infers the intent from spoken commands. /// </summary> /// <param name="accessKey">AccessKey obtained from Picovoice Console (https://console.picovoice.ai/).</param> /// <param name="contextPath"> /// Absolute path to file containing context model (file with `.rhn` extension). A context represents the set of /// expressions(spoken commands), intents, and intent arguments(slots) within a domain of interest. /// </param> /// <param name="modelPath">Absolute path to the file containing model parameters. If not set it will be set to the default location.</param> /// <param name="sensitivity"> /// Inference sensitivity. It should be a number within [0, 1]. A higher sensitivity value results in /// fewer misses at the cost of (potentially) increasing the erroneous inference rate. If not set, the default value of 0.5 will be used. /// </param> /// <param name="requireEndpoint"> /// If set to `true`, Rhino requires an endpoint (chunk of silence) before finishing inference. /// </param> /// <param name="audioDeviceIndex">Optional argument. If provided, audio is recorded from this input device. Otherwise, the default audio input device is used.</param> /// <param name="outputPath">Optional argument. If provided, recorded audio will be stored in this location at the end of the run.</param> public static void RunDemo( string accessKey, string contextPath, string modelPath, float sensitivity, bool requireEndpoint, int audioDeviceIndex, string outputPath = null) { Rhino rhino = null; BinaryWriter outputFileWriter = null; int totalSamplesWritten = 0; // init rhino speech-to-intent engine rhino = Rhino.Create( accessKey, contextPath, modelPath, sensitivity, requireEndpoint); // open stream to output file if (!string.IsNullOrWhiteSpace(outputPath)) { outputFileWriter = new BinaryWriter(new FileStream(outputPath, FileMode.OpenOrCreate, FileAccess.Write)); WriteWavHeader(outputFileWriter, 1, 16, 16000, 0); } Console.CancelKeyPress += (s, o) => { Console.WriteLine("Stopping..."); if (outputFileWriter != null) { // write size to header and clean up WriteWavHeader(outputFileWriter, 1, 16, 16000, totalSamplesWritten); outputFileWriter.Flush(); outputFileWriter.Dispose(); } rhino?.Dispose(); }; // create and start recording using (PvRecorder recorder = PvRecorder.Create(audioDeviceIndex, rhino.FrameLength)) { recorder.Start(); Console.WriteLine(rhino.ContextInfo); Console.WriteLine($"\nUsing device: {recorder.SelectedDevice}"); Console.WriteLine("Listening...\n"); while (true) { short[] pcm = recorder.Read(); bool isFinalized = rhino.Process(pcm); if (isFinalized) { Inference inference = rhino.GetInference(); if (inference.IsUnderstood) { Console.WriteLine("{"); Console.WriteLine($" intent : '{inference.Intent}'"); Console.WriteLine(" slots : {"); foreach (KeyValuePair <string, string> slot in inference.Slots) { Console.WriteLine($" {slot.Key} : '{slot.Value}'"); } Console.WriteLine(" }"); Console.WriteLine("}"); } else { Console.WriteLine("Didn't understand the command."); } } if (outputFileWriter != null) { foreach (short sample in pcm) { outputFileWriter.Write(sample); } totalSamplesWritten += pcm.Length; } Thread.Yield(); } } }