Example #1
0
        public void TestOutOfContext()
        {
            using Rhino r = SetUpClass();
            int frameLen = r.FrameLength;

            string       testAudioPath = Path.Combine(_relativeDir, "resources/audio_samples/test_out_of_context.wav");
            List <short> data          = GetPcmFromFile(testAudioPath, r.SampleRate);

            bool isFinalized = false;
            int  framecount  = (int)Math.Floor((float)(data.Count / frameLen));
            var  results     = new List <int>();

            for (int i = 0; i < framecount; i++)
            {
                int          start = i * r.FrameLength;
                int          count = r.FrameLength;
                List <short> frame = data.GetRange(start, count);
                isFinalized = r.Process(frame.ToArray());
                if (isFinalized)
                {
                    break;
                }
            }
            Assert.IsTrue(isFinalized, "Failed to finalize.");

            Inference inference = r.GetInference();

            Assert.IsFalse(inference.IsUnderstood, "Shouldn't be able to understand.");
        }
Example #2
0
        /// <summary>
        /// Reads through input file and prints the inference result returned by Rhino.
        /// </summary>
        /// <param name="inputAudioPath">Required argument. Absolute path to input audio file.</param>
        /// <param name="contextPath">Required argument. Absolute path to the Rhino context file.</param>
        /// <param name="modelPath">Absolute path to the file containing model parameters. If not set it will be set to the default location.</param>
        /// <param name="sensitivity">
        /// Inference sensitivity. It should be a number within [0, 1]. A higher sensitivity value results in
        /// fewer misses at the cost of (potentially) increasing the erroneous inference rate. If not set, the default value of 0.5 will be used.
        /// </param>
        public static void RunDemo(string inputAudioPath, string contextPath, string modelPath, float sensitivity)
        {
            // init rhino speech-to-intent engine
            using Rhino rhino = Rhino.Create(contextPath, modelPath, sensitivity);

            // open and validate wav file
            using BinaryReader reader = new BinaryReader(File.Open(inputAudioPath, FileMode.Open));
            ValidateWavFile(reader, rhino.SampleRate, 16, out short numChannels);

            // read audio and send frames to rhino
            short[] rhinoFrame = new short[rhino.FrameLength];
            int     frameIndex = 0;

            while (reader.BaseStream.Position != reader.BaseStream.Length)
            {
                rhinoFrame[frameIndex++] = reader.ReadInt16();

                if (frameIndex == rhinoFrame.Length)
                {
                    bool isFinalized = rhino.Process(rhinoFrame);
                    if (isFinalized)
                    {
                        Inference inference = rhino.GetInference();
                        if (inference.IsUnderstood)
                        {
                            Console.WriteLine("{");
                            Console.WriteLine($"  intent : '{inference.Intent}'");
                            Console.WriteLine("  slots : {");
                            foreach (KeyValuePair <string, string> slot in inference.Slots)
                            {
                                Console.WriteLine($"    {slot.Key} : '{slot.Value}'");
                            }
                            Console.WriteLine("  }");
                            Console.WriteLine("}");
                        }
                        else
                        {
                            Console.WriteLine("Didn't understand the command.");
                        }
                        return;
                    }

                    frameIndex = 0;
                }

                // skip right channel
                if (numChannels == 2)
                {
                    reader.ReadInt16();
                }
            }

            Console.WriteLine("Reached end of audio file before Rhino returned an inference.");
        }
Example #3
0
        private void RunTestCase(
            Rhino rhino,
            string audioFileName,
            bool isWithinContext,
            string expectedIntent = null,
            Dictionary <string, string> expectedSlots = null)
        {
            int          frameLen      = rhino.FrameLength;
            string       testAudioPath = Path.Combine(_relativeDir, "resources/audio_samples", audioFileName);
            List <short> data          = GetPcmFromFile(testAudioPath, rhino.SampleRate);

            bool isFinalized = false;
            int  framecount  = (int)Math.Floor((float)(data.Count / frameLen));
            var  results     = new List <int>();

            for (int i = 0; i < framecount; i++)
            {
                int          start = i * rhino.FrameLength;
                int          count = rhino.FrameLength;
                List <short> frame = data.GetRange(start, count);
                isFinalized = rhino.Process(frame.ToArray());
                if (isFinalized)
                {
                    break;
                }
            }
            Assert.IsTrue(isFinalized, "Failed to finalize.");

            Inference inference = rhino.GetInference();

            if (isWithinContext)
            {
                Assert.IsTrue(inference.IsUnderstood, "Couldn't understand.");
                Assert.AreEqual(expectedIntent, inference.Intent, "Incorrect intent.");
                Assert.IsTrue(inference.Slots.All((keyValuePair) =>
                                                  expectedSlots.ContainsKey(keyValuePair.Key) &&
                                                  expectedSlots[keyValuePair.Key] == keyValuePair.Value));
            }
            else
            {
                Assert.IsFalse(inference.IsUnderstood, "Shouldn't be able to understand.");
            }
        }
Example #4
0
        public void TestWithinContext()
        {
            using Rhino r = SetUpClass();
            int frameLen = r.FrameLength;

            string       testAudioPath = Path.Combine(_relativeDir, "resources/audio_samples/test_within_context.wav");
            List <short> data          = GetPcmFromFile(testAudioPath, r.SampleRate);

            bool isFinalized = false;
            int  framecount  = (int)Math.Floor((float)(data.Count / frameLen));
            var  results     = new List <int>();

            for (int i = 0; i < framecount; i++)
            {
                int          start = i * r.FrameLength;
                int          count = r.FrameLength;
                List <short> frame = data.GetRange(start, count);
                isFinalized = r.Process(frame.ToArray());
                if (isFinalized)
                {
                    break;
                }
            }
            Assert.IsTrue(isFinalized, "Failed to finalize.");

            Inference inference = r.GetInference();

            Assert.IsTrue(inference.IsUnderstood, "Couldn't understand.");
            Assert.AreEqual("orderDrink", inference.Intent, "Incorrect intent.");

            Dictionary <string, string> expectedSlotValues = new Dictionary <string, string>()
            {
                { "size", "medium" },
                { "numberOfShots", "double shot" },
                { "coffeeDrink", "americano" },
                { "milkAmount", "lots of milk" },
                { "sugarAmount", "some sugar" },
            };

            Assert.IsTrue(inference.Slots.All((keyValuePair) =>
                                              expectedSlotValues.ContainsKey(keyValuePair.Key) &&
                                              expectedSlotValues[keyValuePair.Key] == keyValuePair.Value));
        }
Example #5
0
        /// <summary>
        /// Creates an input audio stream, instantiates an instance of Rhino object, and infers the intent from spoken commands.
        /// </summary>
        /// <param name="contextPath">
        /// Absolute path to file containing context model (file with `.rhn` extension). A context represents the set of
        /// expressions(spoken commands), intents, and intent arguments(slots) within a domain of interest.
        /// </param>
        /// <param name="modelPath">Absolute path to the file containing model parameters. If not set it will be set to the default location.</param>
        /// <param name="sensitivity">
        /// Inference sensitivity. It should be a number within [0, 1]. A higher sensitivity value results in
        /// fewer misses at the cost of (potentially) increasing the erroneous inference rate. If not set, the default value of 0.5 will be used.
        /// </param>
        /// <param name="audioDeviceIndex">Optional argument. If provided, audio is recorded from this input device. Otherwise, the default audio input device is used.</param>
        /// <param name="outputPath">Optional argument. If provided, recorded audio will be stored in this location at the end of the run.</param>
        public static void RunDemo(string contextPath, string modelPath, float sensitivity, int?audioDeviceIndex = null, string outputPath = null)
        {
            Rhino        rhino               = null;
            BinaryWriter outputFileWriter    = null;
            int          totalSamplesWritten = 0;

            try
            {
                // init rhino speech-to-intent engine
                rhino = Rhino.Create(contextPath, modelPath, sensitivity);

                // open stream to output file
                if (!string.IsNullOrWhiteSpace(outputPath))
                {
                    outputFileWriter = new BinaryWriter(new FileStream(outputPath, FileMode.OpenOrCreate, FileAccess.Write));
                    WriteWavHeader(outputFileWriter, 1, 16, 16000, 0);
                }

                // choose audio device
                string deviceName = null;
                if (audioDeviceIndex != null)
                {
                    List <string> captureDeviceList = ALC.GetStringList(GetEnumerationStringList.CaptureDeviceSpecifier).ToList();
                    if (captureDeviceList != null && audioDeviceIndex.Value < captureDeviceList.Count)
                    {
                        deviceName = captureDeviceList[audioDeviceIndex.Value];
                    }
                    else
                    {
                        throw new ArgumentException("No input device found with the specified index. Use --show_audio_devices to show" +
                                                    "available inputs", "--audio_device_index");
                    }
                }

                Console.WriteLine(rhino.ContextInfo);
                Console.WriteLine("Listening...\n");

                // create and start recording
                short[]         recordingBuffer = new short[rhino.FrameLength];
                ALCaptureDevice captureDevice   = ALC.CaptureOpenDevice(deviceName, 16000, ALFormat.Mono16, rhino.FrameLength * 2);
                {
                    ALC.CaptureStart(captureDevice);
                    while (!Console.KeyAvailable)
                    {
                        int samplesAvailable = ALC.GetAvailableSamples(captureDevice);
                        if (samplesAvailable > rhino.FrameLength)
                        {
                            ALC.CaptureSamples(captureDevice, ref recordingBuffer[0], rhino.FrameLength);
                            bool isFinalized = rhino.Process(recordingBuffer);
                            if (isFinalized)
                            {
                                Inference inference = rhino.GetInference();
                                if (inference.IsUnderstood)
                                {
                                    Console.WriteLine("{");
                                    Console.WriteLine($"  intent : '{inference.Intent}'");
                                    Console.WriteLine("  slots : {");
                                    foreach (KeyValuePair <string, string> slot in inference.Slots)
                                    {
                                        Console.WriteLine($"    {slot.Key} : '{slot.Value}'");
                                    }
                                    Console.WriteLine("  }");
                                    Console.WriteLine("}");
                                }
                                else
                                {
                                    Console.WriteLine("Didn't understand the command.");
                                }
                            }

                            if (outputFileWriter != null)
                            {
                                foreach (short sample in recordingBuffer)
                                {
                                    outputFileWriter.Write(sample);
                                }
                                totalSamplesWritten += recordingBuffer.Length;
                            }
                        }
                        Thread.Yield();
                    }

                    // stop and clean up resources
                    Console.WriteLine("Stopping...");
                    ALC.CaptureStop(captureDevice);
                    ALC.CaptureCloseDevice(captureDevice);
                }
            }
            finally
            {
                if (outputFileWriter != null)
                {
                    // write size to header and clean up
                    WriteWavHeader(outputFileWriter, 1, 16, 16000, totalSamplesWritten);
                    outputFileWriter.Flush();
                    outputFileWriter.Dispose();
                }
                rhino?.Dispose();
            }
        }
Example #6
0
        /// <summary>
        /// Creates an input audio stream, instantiates an instance of Rhino object, and infers the intent from spoken commands.
        /// </summary>
        /// <param name="accessKey">AccessKey obtained from Picovoice Console (https://console.picovoice.ai/).</param>
        /// <param name="contextPath">
        /// Absolute path to file containing context model (file with `.rhn` extension). A context represents the set of
        /// expressions(spoken commands), intents, and intent arguments(slots) within a domain of interest.
        /// </param>
        /// <param name="modelPath">Absolute path to the file containing model parameters. If not set it will be set to the default location.</param>
        /// <param name="sensitivity">
        /// Inference sensitivity. It should be a number within [0, 1]. A higher sensitivity value results in
        /// fewer misses at the cost of (potentially) increasing the erroneous inference rate. If not set, the default value of 0.5 will be used.
        /// </param>
        /// <param name="requireEndpoint">
        /// If set to `true`, Rhino requires an endpoint (chunk of silence) before finishing inference.
        /// </param>
        /// <param name="audioDeviceIndex">Optional argument. If provided, audio is recorded from this input device. Otherwise, the default audio input device is used.</param>
        /// <param name="outputPath">Optional argument. If provided, recorded audio will be stored in this location at the end of the run.</param>
        public static void RunDemo(
            string accessKey,
            string contextPath,
            string modelPath,
            float sensitivity,
            bool requireEndpoint,
            int audioDeviceIndex,
            string outputPath = null)
        {
            Rhino        rhino               = null;
            BinaryWriter outputFileWriter    = null;
            int          totalSamplesWritten = 0;

            // init rhino speech-to-intent engine
            rhino = Rhino.Create(
                accessKey,
                contextPath,
                modelPath,
                sensitivity,
                requireEndpoint);

            // open stream to output file
            if (!string.IsNullOrWhiteSpace(outputPath))
            {
                outputFileWriter = new BinaryWriter(new FileStream(outputPath, FileMode.OpenOrCreate, FileAccess.Write));
                WriteWavHeader(outputFileWriter, 1, 16, 16000, 0);
            }

            Console.CancelKeyPress += (s, o) =>
            {
                Console.WriteLine("Stopping...");

                if (outputFileWriter != null)
                {
                    // write size to header and clean up
                    WriteWavHeader(outputFileWriter, 1, 16, 16000, totalSamplesWritten);
                    outputFileWriter.Flush();
                    outputFileWriter.Dispose();
                }
                rhino?.Dispose();
            };

            // create and start recording
            using (PvRecorder recorder = PvRecorder.Create(audioDeviceIndex, rhino.FrameLength))
            {
                recorder.Start();
                Console.WriteLine(rhino.ContextInfo);
                Console.WriteLine($"\nUsing device: {recorder.SelectedDevice}");
                Console.WriteLine("Listening...\n");

                while (true)
                {
                    short[] pcm         = recorder.Read();
                    bool    isFinalized = rhino.Process(pcm);
                    if (isFinalized)
                    {
                        Inference inference = rhino.GetInference();
                        if (inference.IsUnderstood)
                        {
                            Console.WriteLine("{");
                            Console.WriteLine($"  intent : '{inference.Intent}'");
                            Console.WriteLine("  slots : {");
                            foreach (KeyValuePair <string, string> slot in inference.Slots)
                            {
                                Console.WriteLine($"    {slot.Key} : '{slot.Value}'");
                            }
                            Console.WriteLine("  }");
                            Console.WriteLine("}");
                        }
                        else
                        {
                            Console.WriteLine("Didn't understand the command.");
                        }
                    }

                    if (outputFileWriter != null)
                    {
                        foreach (short sample in pcm)
                        {
                            outputFileWriter.Write(sample);
                        }
                        totalSamplesWritten += pcm.Length;
                    }
                    Thread.Yield();
                }
            }
        }