private static void processCommand(ref SystemSpeechRecognizer recognizer, string input)
        {
            switch (input)
            {
            case "Reload grammars":
            {
                var gw = new AllXMLGrammarWriter();
                gw.ReadFileAndConvert();
                string updatedGrammar = gw.GetResultString();

                DateTime post_time = new DateTime();
                //String originalGrammar = File.ReadAllText("GeneratedGrammar.grxml");

                Message <System.Collections.Generic.IEnumerable <String> > updateRequest =
                    new Message <System.Collections.Generic.IEnumerable <String> >(
                        new String[] {
                        updatedGrammar
                        //originalGrammar
                    }, post_time, post_time, 9876, ReloadMessageIDCurrent++);
                recognizer.SetGrammars(updateRequest);
                gw.WriteToFile();
                break;
            }

            default:
                break;
            }
        }
Example #2
0
        public KioskInputTextPreProcessor(Pipeline pipeline, SystemSpeechRecognizer rec)
            : base(pipeline)
        {
            this.recognizer = rec;

            this.AutoResponse = pipeline.CreateEmitter <string>(this, nameof(this.AutoResponse));
        }
        public static ConsumerProducer <AudioBuffer, IStreamingSpeechRecognitionResult> CreateSpeechRecognizer(Pipeline pipeline)
        {
            var recognizer = new SystemSpeechRecognizer(
                pipeline,
                new SystemSpeechRecognizerConfiguration()
            {
                Language = "en-US",
                Grammars = new GrammarInfo[]
                {
                    new GrammarInfo()
                    {
                        Name = Program.AppName, FileName = @"Resources\BaseGrammar.grxml"
                    }
                }
            });

            return(recognizer);
        }
        public async Task StartStreamingRecognitionTest_RealTime()
        {
            using var recognizer = new SystemSpeechRecognizer();

            using var recognition = await recognizer.StartStreamingRecognitionAsync();

            recognition.PreviewReceived += (_, value) =>
            {
                Console.WriteLine($"{DateTime.Now:h:mm:ss.fff} {nameof(recognition.PreviewReceived)}: {value}");
            };
            recognition.Stopped += (_, value) =>
            {
                Console.WriteLine($"{DateTime.Now:h:mm:ss.fff} {nameof(recognition.Stopped)}: {value}");
            };

            await Task.Delay(TimeSpan.FromSeconds(5));

            await recognition.StopAsync();
        }
        static void Main(string[] args)
        {
            Console.WriteLine("Starting speech test...");
            using (var p = Pipeline.Create())
            {
                Microsoft.Psi.Kinect.v1.KinectSensor kinectSensor = new Microsoft.Psi.Kinect.v1.KinectSensor(p);


                // Create System.Speech recognizer component
                var recognizer = new SystemSpeechRecognizer(
                    p,
                    new SystemSpeechRecognizerConfiguration()
                {
                    Language = "en-US",
                    Grammars = new GrammarInfo[]
                    {
                        new GrammarInfo()
                        {
                            Name = "Kinect v1 Speech Test", FileName = "SampleGrammar.grxml"
                        }
                    }
                });
                //pipeline);

                kinectSensor.Audio.PipeTo(recognizer);

                var finalResults = recognizer.Out.Where(result => result.IsFinal);

                // Print the recognized text of the final recognition result to the console.
                finalResults.Do(result =>
                {
                    var ssrResult = result as SpeechRecognitionResult;
                    Console.WriteLine($"{ssrResult.Text} (confidence: {ssrResult.Confidence})");
                });

                p.Run();
            }
        }
Example #6
0
        /// <summary>
        /// Builds and runs a speech recognition pipeline using the .NET System.Speech recognizer and a set of fixed grammars.
        /// </summary>
        /// <param name="outputLogPath">The path under which to write log data.</param>
        /// <param name="inputLogPath">The path from which to read audio input data.</param>
        public static void RunSystemSpeech(string outputLogPath = null, string inputLogPath = null)
        {
            // Create the pipeline object.
            using (Pipeline pipeline = Pipeline.Create())
            {
                // Use either live audio from the microphone or audio from a previously saved log
                IProducer <AudioBuffer> audioInput = null;
                if (inputLogPath != null)
                {
                    // Open the MicrophoneAudio stream from the last saved log
                    var store = PsiStore.Open(pipeline, Program.AppName, inputLogPath);
                    audioInput = store.OpenStream <AudioBuffer>($"{Program.AppName}.MicrophoneAudio");
                }
                else
                {
                    // Create the AudioCapture component to capture audio from the default device in 16 kHz 1-channel
                    // PCM format as required by both the voice activity detector and speech recognition components.
                    audioInput = new AudioCapture(pipeline, WaveFormat.Create16kHz1Channel16BitPcm());
                }

                // Create System.Speech recognizer component
                var recognizer = new SystemSpeechRecognizer(
                    pipeline,
                    new SystemSpeechRecognizerConfiguration()
                {
                    Language = "en-US",
                    Grammars = new GrammarInfo[]
                    {
                        new GrammarInfo()
                        {
                            Name = Program.AppName, FileName = "SampleGrammar.grxml"
                        },
                    },
                });

                // Subscribe the recognizer to the input audio
                audioInput.PipeTo(recognizer);

                // Partial and final speech recognition results are posted on the same stream. Here
                // we use Psi's Where() operator to filter out only the final recognition results.
                var finalResults = recognizer.Out.Where(result => result.IsFinal);

                // Print the final recognition result to the console.
                finalResults.Do(result =>
                {
                    Console.WriteLine($"{result.Text} (confidence: {result.Confidence})");
                });

                // Create a data store to log the data to if necessary. A data store is necessary
                // only if output logging is enabled.
                var dataStore = CreateDataStore(pipeline, outputLogPath);

                // For disk logging only
                if (dataStore != null)
                {
                    // Log the microphone audio and recognition results
                    audioInput.Write($"{Program.AppName}.MicrophoneAudio", dataStore);
                    finalResults.Write($"{Program.AppName}.FinalRecognitionResults", dataStore);
                }

                // Register an event handler to catch pipeline errors
                pipeline.PipelineExceptionNotHandled += Pipeline_PipelineException;

                // Register an event handler to be notified when the pipeline completes
                pipeline.PipelineCompleted += Pipeline_PipelineCompleted;

                // Run the pipeline
                pipeline.RunAsync();

                // The file SampleGrammar.grxml defines a grammar to transcribe numbers
                Console.WriteLine("Say any number between 0 and 100");

                Console.WriteLine("Press any key to exit...");
                Console.ReadKey(true);
            }
        }
Example #7
0
        /// <summary>
        /// Tests speech recognition from any Wave file containing PCM audio in a format that the
        /// speech recognizer component accepts.
        /// </summary>
        /// <param name="filename">The Wave file containing the audio data.</param>
        /// <param name="expectedText">The expected recognized text.</param>
        /// <param name="srgsXmlGrammar">The grammar to use when decoding.</param>
        private void RecognizeSpeechFromWaveFile(string filename, string expectedText, string srgsXmlGrammar = null)
        {
            if (SpeechRecognitionEngine.InstalledRecognizers().Count == 0)
            {
                // Skip test if no installed recognizers on system.
                return;
            }

            // Read the WaveFormat from the file header so we can set the recognizer configuaration.
            WaveFormat format = WaveFileHelper.ReadWaveFileHeader(filename);

            // Initialize components and wire up pipeline.
            using (var pipeline = Pipeline.Create(nameof(this.RecognizeSpeechFromWaveFile)))
            {
                var recognizer = new SystemSpeechRecognizer(pipeline, new SystemSpeechRecognizerConfiguration()
                {
                    BufferLengthInMs = 10000, InputFormat = format
                });
                var audioInput = new WaveFileAudioSource(pipeline, filename);
                audioInput.Out.PipeTo(recognizer.In);

                // Test dynamic update of speech recognition grammar
                if (srgsXmlGrammar != null)
                {
                    var grammarUpdater = Generators.Return <IEnumerable <string> >(pipeline, new string[] { srgsXmlGrammar });
                    grammarUpdater.PipeTo(recognizer.ReceiveGrammars);
                }

                // Add results from outputs. Note that we need to call DeepClone on each result as we
                // do not want them to be resused by the runtime.
                var results = new List <IStreamingSpeechRecognitionResult>();
                recognizer.Out.Do(r => results.Add(r.DeepClone()));
                recognizer.PartialRecognitionResults.Do(r => results.Add(r.DeepClone()));

                // Run pipeline and wait for completion.
                pipeline.Run();

                Assert.IsTrue(results.Count > 0, "No recognition results!");
                Assert.IsTrue(results.Count > 1, "No partial hypotheses!");

                // Verify partial results.
                for (int i = 0; i < results.Count - 1; ++i)
                {
                    var partialResult = results[i];
                    Assert.IsFalse(partialResult.IsFinal);
                    Assert.IsTrue(partialResult.Confidence.HasValue);
                    Assert.IsTrue(partialResult.Confidence.Value > 0);
                    Assert.IsFalse(string.IsNullOrEmpty(partialResult.Text));
                }

                // Verify final results.
                var finalResult = results.Last();
                Assert.IsTrue(finalResult.IsFinal);
                Assert.IsTrue(finalResult.Confidence.HasValue);
                Assert.IsTrue(finalResult.Confidence.Value > 0);
                Assert.AreEqual(expectedText, finalResult.Text, true);
                Assert.IsTrue(finalResult.Alternates.Length > 0);
                Assert.AreEqual(expectedText, finalResult.Alternates[0].Text, true);
                Assert.AreEqual(finalResult.Alternates[0].Confidence.Value, finalResult.Confidence.Value);
                Assert.IsTrue(finalResult.Audio.Length > 0);
            }
        }
        /// <summary>
        /// SetupPsi() is called at application startup. It is responsible for
        /// building and starting the Psi pipeline
        /// </summary>
        public void SetupPsi()
        {
            Console.WriteLine("================================================================================");
            Console.WriteLine("                               Kiosk Awareness sample");
            Console.WriteLine("================================================================================");
            Console.WriteLine();

            this.pipeline = Pipeline.Create();

            // Next register an event handler to catch pipeline errors
            this.pipeline.PipelineCompletionEvent += this.PipelineCompletionEvent;

            /*
             * // bool usingKqml = false;
             * string facilitatorIP = null;
             * int facilitatorPort = -1;
             * int localPort = -1;
             *
             *
             * if (arguments.Length > 0)
             * {
             * if (arguments.Length < 3)
             * {
             *     Console.WriteLine("Usage for running with a facilitator: \nKioskMain facilitatorIP facilitatorPort localPort");
             *     return;
             * }
             *
             * // usingKqml = true;
             *
             * facilitatorIP = arguments[0];
             * facilitatorPort = int.Parse(arguments[1]);
             * localPort = int.Parse(arguments[2]);
             * }
             */

            // bool showLiveVisualization = false;
            string inputLogPath = null;
            // string outputLogPath = null;

            DateTime startTime = DateTime.Now;

            IProducer <AudioBuffer> audioInput = SetupAudioInput(this.pipeline, inputLogPath, ref startTime);

            // Create our webcam
            MediaCapture webcam = new MediaCapture(this.pipeline, 320, 240, 10);

            Debug.WriteLine("Open webcam");

            FaceCasClassifier f = new FaceCasClassifier();

            Debug.WriteLine("Load classifier");
            Debug.WriteLine(f);

            var mouthOpenAsBool = webcam.Out.ToGrayViaOpenCV(f, FrameCount).Select(
                (img, e) =>
            {
                // Debug.WriteLine(FrameCount % 10);
                bool mouthOpen = false;
                if ((Math.Abs(DisNose) / (4 * Math.Abs(DisLipMiddle))) < 3)
                {
                    mouthOpen = true;
                }
                else
                {
                    mouthOpen = false;
                }
                Console.WriteLine(Math.Abs(DisLipMiddle) + " " + Math.Abs(DisLipRight) + " " + Math.Abs(DisLipLeft) + " " + (Math.Abs(DisNose) / (4 * Math.Abs(DisLipMiddle))) + " " + mouthOpen);
                this.DispImage.UpdateImage(img);
                return(mouthOpen);
            });

            var mouthAndSpeech = audioInput.Pair(mouthOpenAsBool).Where(t => true).Select(t =>
            {
                return(t.Item1);
            }
                                                                                          );

            SystemSpeechRecognizer recognizer = SetupSpeechRecognizer(this.pipeline);

            mouthAndSpeech.PipeTo(recognizer);

            var finalResults = recognizer.Out.Where(result => result.IsFinal);

            finalResults.Do(result =>
            {
                var ssrResult = result as SpeechRecognitionResult;
                Console.WriteLine($"{ssrResult.Text} (confidence: {ssrResult.Confidence})");
            });

            var text = finalResults.Select(result =>
            {
                var ssrResult = result as SpeechRecognitionResult;
                return(ssrResult.Text);
            });

            // Finally start the pipeline running
            try
            {
                this.pipeline.RunAsync();
            }
            catch (AggregateException exp)
            {
                MessageBox.Show("Error! " + exp.InnerException.Message);
            }
        }
        public static void StartListeningAndLooking(string[] args, bool live_visual_flag, bool store_visual_flag, string inputStorePath, string outputStorePath, bool usingKqml, String[] compargs)
        {
            using (Pipeline pipeline = Pipeline.Create())
            {
                string facilitatorIP   = null;
                int    facilitatorPort = -1;
                int    localPort       = -1;

                /*
                 * if (args.Length > 0)
                 * {
                 *  if (args.Length < 3)
                 *  {
                 *      Console.WriteLine("Usage for running with a facilitator: \nKioskMain facilitatorIP facilitatorPort localPort");
                 *      return;
                 *  }
                 *  usingKqml = true;
                 *
                 *  facilitatorIP = args[0];
                 *  facilitatorPort = int.Parse(args[1]);
                 *  localPort = int.Parse(args[2]);
                 * }
                 */
                string outputLogPath = null;

                if (outputStorePath != null && outputStorePath != "" && Directory.Exists(outputStorePath))
                {
                    outputLogPath = outputStorePath;
                }
                Console.WriteLine(outputLogPath == null);

                string inputLogPath = null;

                if (inputStorePath != null && inputStorePath != "" && Directory.Exists(inputStorePath))
                {
                    inputLogPath = inputStorePath;
                }
                Console.WriteLine(inputLogPath == null);

                bool showLiveVisualization = live_visual_flag;

                // Needed only for live visualization
                DateTime startTime = DateTime.Now;

                // Use either live audio from the microphone or audio from a previously saved log
                IProducer <AudioBuffer> audioInput = SetupAudioInput(pipeline, inputLogPath, ref startTime);

                // Create our webcam
                MediaCapture webcam = new MediaCapture(pipeline, 320, 240, 10);

                FaceCasClassifier f = new FaceCasClassifier();

                Console.WriteLine("Load classifier");
                Console.WriteLine(f);

                // Bind the webcam's output to our display image.
                // The "Do" operator is executed on each sample from the stream (webcam.Out), which are the images coming from the webcam
                var processedVideo = inputLogPath != null?SetupVideoInput(pipeline, inputLogPath, ref startTime) : webcam.Out.ToGrayViaOpenCV(f).EncodeJpeg(90, DeliveryPolicy.LatestMessage);

                var mouthOpenAsInt = processedVideo.Select(
                    (img, e) =>
                {
                    // Debug.WriteLine(FrameCount % 10);
                    // Console.WriteLine(Math.Abs(DisLipMiddle) + " " + Math.Abs(DisLipRight) + " " + Math.Abs(DisLipLeft) + " " + (Math.Abs(DisNose) / (4 * Math.Abs(DisLipMiddle))) + " " + mouthOpen);
                    //return MouthOpen;
                    return(MouthOpen);
                });

                /*
                 * var hasFaceAsBool = webcam.Out.ToGrayViaOpenCV(f).Select(
                 * (img, e) =>
                 * {
                 *  bool hasFacebool = false;
                 *  if (HasFace == 1)
                 *  {
                 *      hasFacebool = true;
                 *  }
                 *  else
                 *  {
                 *      hasFacebool = false;
                 *  }
                 *  return hasFacebool;
                 * });
                 */

                var mouthAndSpeech = audioInput.Pair(mouthOpenAsInt).Where(t => t.Item2 > -1).Select(t => {
                    return(t.Item1);
                }
                                                                                                     );

                SystemSpeechRecognizer recognizer = SetupSpeechRecognizer(pipeline);

                // Subscribe the recognizer to the input audio
                mouthAndSpeech.PipeTo(recognizer);
                //audioInput.PipeTo(recognizer);

                // Partial and final speech recognition results are posted on the same stream. Here
                // we use Psi's Where() operator to filter out only the final recognition results.
                var finalResults = inputLogPath != null?SetupSpeechInput(pipeline, inputLogPath, ref startTime) : recognizer.Out.Where(result => result.IsFinal);

                // Print the recognized text of the final recognition result to the console.
                finalResults.Do(result =>
                {
                    var ssrResult = result as SpeechRecognitionResult;
                    Console.WriteLine($"{ssrResult.Text} (confidence: {ssrResult.Confidence})");
                });

                var finalResultsHighCf = finalResults.Where(t => (t as SpeechRecognitionResult).Confidence > 0.6).Select(t =>
                {
                    Console.WriteLine("Good Confidence!");
                    return(t);
                });

                // Get just the text from the Speech Recognizer.  We may want to add another filter to only get text if confidence > 0.8
                var text = finalResultsHighCf.Pair(mouthOpenAsInt).Select(result =>
                {
                    var ssrResult = result.Item1 as SpeechRecognitionResult;
                    int userid    = result.Item2;
                    Console.WriteLine("user" + userid + "+" + ssrResult.Text);
                    return("user" + userid + "+" + ssrResult.Text);
                });

                // Setup KQML connection to Companion

                NU.Kqml.SocketStringConsumer kqml = null;
                if (usingKqml)
                {
                    facilitatorIP   = compargs[0];
                    facilitatorPort = Convert.ToInt32(compargs[1]);
                    localPort       = Convert.ToInt32(compargs[2]);
                    Console.WriteLine("Your Companion IP address is: " + facilitatorIP);
                    Console.WriteLine("Your Companion port is: " + facilitatorPort);
                    Console.WriteLine("Your local port is: " + localPort);

                    kqml = new NU.Kqml.SocketStringConsumer(pipeline, facilitatorIP, facilitatorPort, localPort);

                    text.PipeTo(kqml.In);
                }

                // Create a data store to log the data to if necessary. A data store is necessary
                // only if output logging or live visualization are enabled.
                Console.WriteLine(outputLogPath == null);
                var dataStore = CreateDataStore(pipeline, outputLogPath, showLiveVisualization);
                Console.WriteLine(dataStore == null);
                Console.WriteLine("dataStore is empty");
                // For disk logging or live visualization only
                if (dataStore != null)
                {
                    // Log the microphone audio and recognition results
                    processedVideo.Write($"{Program.AppName}.WebCamProcessedVideo", dataStore);
                    audioInput.Write($"{Program.AppName}.MicrophoneAudio", dataStore);
                    finalResults.Write($"{Program.AppName}.FinalRecognitionResults", dataStore);

                    Console.WriteLine("Stored the data here! ");
                }

                // Ignore this block if live visualization is not enabled
                if (showLiveVisualization)
                {
                    // Create the visualization client
                    var visualizationClient = new VisualizationClient();

                    // Clear all data if the visualizer is already open
                    visualizationClient.ClearAll();

                    // Create the visualization client to visualize live data
                    visualizationClient.SetLiveMode(startTime);

                    // Plot the video stream in a new panel
                    visualizationClient.AddXYPanel();
                    //processedVideo.Show(visualizationClient);

                    // Plot the microphone audio stream in a new panel
                    visualizationClient.AddTimelinePanel();
                    //audioInput.Show(visualizationClient);

                    // Plot the recognition results in a new panel
                    visualizationClient.AddTimelinePanel();
                    //finalResults.Show(visualizationClient);
                }

                if (store_visual_flag)
                {
                    // Create the visualization client
                    var visualizationClient = new VisualizationClient();

                    // Clear all data if the visualizer is already open
                    visualizationClient.ClearAll();

                    // Create the visualization client to visualize live data
                    visualizationClient.SetLiveMode(startTime);

                    // Plot the video stream in a new panel
                    visualizationClient.AddXYPanel();
                    processedVideo.Show(visualizationClient);

                    // Plot the microphone audio stream in a new panel
                    visualizationClient.AddTimelinePanel();
                    audioInput.Show(visualizationClient);

                    // Plot the recognition results in a new panel
                    visualizationClient.AddTimelinePanel();
                    finalResults.Show(visualizationClient);
                }

                // Register an event handler to catch pipeline errors
                pipeline.PipelineCompletionEvent += PipelineCompletionEvent;

                // Run the pipeline
                pipeline.RunAsync();

                Console.WriteLine("Press any key to exit...");
                Console.ReadKey(true);

                // if (kqml != null) kqml.Stop();
            }
        }