/// <summary> /// SetupPsi() is called at application startup. It is responsible for /// building and starting the Psi pipeline /// </summary> public void SetupPsi() { Console.WriteLine("================================================================================"); Console.WriteLine(" Kiosk Awareness sample"); Console.WriteLine("================================================================================"); Console.WriteLine(); this.pipeline = Pipeline.Create(); // Next register an event handler to catch pipeline errors this.pipeline.PipelineCompletionEvent += this.PipelineCompletionEvent; /* * // bool usingKqml = false; * string facilitatorIP = null; * int facilitatorPort = -1; * int localPort = -1; * * * if (arguments.Length > 0) * { * if (arguments.Length < 3) * { * Console.WriteLine("Usage for running with a facilitator: \nKioskMain facilitatorIP facilitatorPort localPort"); * return; * } * * // usingKqml = true; * * facilitatorIP = arguments[0]; * facilitatorPort = int.Parse(arguments[1]); * localPort = int.Parse(arguments[2]); * } */ // bool showLiveVisualization = false; string inputLogPath = null; // string outputLogPath = null; DateTime startTime = DateTime.Now; IProducer <AudioBuffer> audioInput = SetupAudioInput(this.pipeline, inputLogPath, ref startTime); // Create our webcam MediaCapture webcam = new MediaCapture(this.pipeline, 320, 240, 10); Debug.WriteLine("Open webcam"); FaceCasClassifier f = new FaceCasClassifier(); Debug.WriteLine("Load classifier"); Debug.WriteLine(f); var mouthOpenAsBool = webcam.Out.ToGrayViaOpenCV(f, FrameCount).Select( (img, e) => { // Debug.WriteLine(FrameCount % 10); bool mouthOpen = false; if ((Math.Abs(DisNose) / (4 * Math.Abs(DisLipMiddle))) < 3) { mouthOpen = true; } else { mouthOpen = false; } Console.WriteLine(Math.Abs(DisLipMiddle) + " " + Math.Abs(DisLipRight) + " " + Math.Abs(DisLipLeft) + " " + (Math.Abs(DisNose) / (4 * Math.Abs(DisLipMiddle))) + " " + mouthOpen); this.DispImage.UpdateImage(img); return(mouthOpen); }); var mouthAndSpeech = audioInput.Pair(mouthOpenAsBool).Where(t => true).Select(t => { return(t.Item1); } ); SystemSpeechRecognizer recognizer = SetupSpeechRecognizer(this.pipeline); mouthAndSpeech.PipeTo(recognizer); var finalResults = recognizer.Out.Where(result => result.IsFinal); finalResults.Do(result => { var ssrResult = result as SpeechRecognitionResult; Console.WriteLine($"{ssrResult.Text} (confidence: {ssrResult.Confidence})"); }); var text = finalResults.Select(result => { var ssrResult = result as SpeechRecognitionResult; return(ssrResult.Text); }); // Finally start the pipeline running try { this.pipeline.RunAsync(); } catch (AggregateException exp) { MessageBox.Show("Error! " + exp.InnerException.Message); } }
/// <summary> /// Here we define an Psi extension. This extension will take a stream of images (source) /// and create a new stream of converted images. /// </summary> /// <param name="source">Our source producer (source stream of image samples)</param> /// <param name="f">A wapper face classifier object (null means use the default)</param> /// <param name="framecount">A integer to control the frame number</param> /// <param name="deliveryPolicy">Our delivery policy (null means use the default)</param> /// <returns>The new stream of converted images.</returns> public static IProducer <Shared <Image> > ToGrayViaOpenCV(this IProducer <Shared <Image> > source, FaceCasClassifier f = null, int framecount = 0, DeliveryPolicy deliveryPolicy = null) { // Process informs the pipeline that we want to call our lambda ("(srcImage, env, e) =>{...}") with each image // from the stream. return(source.Process <Shared <Image>, Shared <Image> >( (srcImage, env, e) => { // Our lambda here is called with each image sample from our stream and calls OpenCV to convert // the image into a grayscale image. We then post the resulting gray scale image to our event queu // so that the Psi pipeline will send it to the next component. // Have Psi allocate a new image. We will convert the current image ('srcImage') into this new image. using (var destImage = ImagePool.GetOrCreate(srcImage.Resource.Width, srcImage.Resource.Height, PixelFormat.Gray_8bpp)) { // Call into our OpenCV wrapper to convert the source image ('srcImage') into the newly created image ('destImage') // Note: since srcImage & destImage are Shared<> we need to access the Microsoft.Psi.Imaging.Image data via the Resource member OpenCVMethods.ToGray(srcImage.ToImageBuffer(), destImage.ToImageBuffer(), f, ref MainWindow.DisNose, ref MainWindow.DisLipMiddle, ref MainWindow.DisLipRight, ref MainWindow.DisLipLeft, ref MainWindow.HasFace); // Debug.WriteLine(MainWindow.MouthOpen); e.Post(destImage, env.OriginatingTime); } }, deliveryPolicy)); }
public static void StartListeningAndLooking(string[] args, bool live_visual_flag, bool store_visual_flag, string inputStorePath, string outputStorePath, bool usingKqml, String[] compargs) { using (Pipeline pipeline = Pipeline.Create()) { string facilitatorIP = null; int facilitatorPort = -1; int localPort = -1; /* * if (args.Length > 0) * { * if (args.Length < 3) * { * Console.WriteLine("Usage for running with a facilitator: \nKioskMain facilitatorIP facilitatorPort localPort"); * return; * } * usingKqml = true; * * facilitatorIP = args[0]; * facilitatorPort = int.Parse(args[1]); * localPort = int.Parse(args[2]); * } */ string outputLogPath = null; if (outputStorePath != null && outputStorePath != "" && Directory.Exists(outputStorePath)) { outputLogPath = outputStorePath; } Console.WriteLine(outputLogPath == null); string inputLogPath = null; if (inputStorePath != null && inputStorePath != "" && Directory.Exists(inputStorePath)) { inputLogPath = inputStorePath; } Console.WriteLine(inputLogPath == null); bool showLiveVisualization = live_visual_flag; // Needed only for live visualization DateTime startTime = DateTime.Now; // Use either live audio from the microphone or audio from a previously saved log IProducer <AudioBuffer> audioInput = SetupAudioInput(pipeline, inputLogPath, ref startTime); // Create our webcam MediaCapture webcam = new MediaCapture(pipeline, 320, 240, 10); FaceCasClassifier f = new FaceCasClassifier(); Console.WriteLine("Load classifier"); Console.WriteLine(f); // Bind the webcam's output to our display image. // The "Do" operator is executed on each sample from the stream (webcam.Out), which are the images coming from the webcam var processedVideo = inputLogPath != null?SetupVideoInput(pipeline, inputLogPath, ref startTime) : webcam.Out.ToGrayViaOpenCV(f).EncodeJpeg(90, DeliveryPolicy.LatestMessage); var mouthOpenAsInt = processedVideo.Select( (img, e) => { // Debug.WriteLine(FrameCount % 10); // Console.WriteLine(Math.Abs(DisLipMiddle) + " " + Math.Abs(DisLipRight) + " " + Math.Abs(DisLipLeft) + " " + (Math.Abs(DisNose) / (4 * Math.Abs(DisLipMiddle))) + " " + mouthOpen); //return MouthOpen; return(MouthOpen); }); /* * var hasFaceAsBool = webcam.Out.ToGrayViaOpenCV(f).Select( * (img, e) => * { * bool hasFacebool = false; * if (HasFace == 1) * { * hasFacebool = true; * } * else * { * hasFacebool = false; * } * return hasFacebool; * }); */ var mouthAndSpeech = audioInput.Pair(mouthOpenAsInt).Where(t => t.Item2 > -1).Select(t => { return(t.Item1); } ); SystemSpeechRecognizer recognizer = SetupSpeechRecognizer(pipeline); // Subscribe the recognizer to the input audio mouthAndSpeech.PipeTo(recognizer); //audioInput.PipeTo(recognizer); // Partial and final speech recognition results are posted on the same stream. Here // we use Psi's Where() operator to filter out only the final recognition results. var finalResults = inputLogPath != null?SetupSpeechInput(pipeline, inputLogPath, ref startTime) : recognizer.Out.Where(result => result.IsFinal); // Print the recognized text of the final recognition result to the console. finalResults.Do(result => { var ssrResult = result as SpeechRecognitionResult; Console.WriteLine($"{ssrResult.Text} (confidence: {ssrResult.Confidence})"); }); var finalResultsHighCf = finalResults.Where(t => (t as SpeechRecognitionResult).Confidence > 0.6).Select(t => { Console.WriteLine("Good Confidence!"); return(t); }); // Get just the text from the Speech Recognizer. We may want to add another filter to only get text if confidence > 0.8 var text = finalResultsHighCf.Pair(mouthOpenAsInt).Select(result => { var ssrResult = result.Item1 as SpeechRecognitionResult; int userid = result.Item2; Console.WriteLine("user" + userid + "+" + ssrResult.Text); return("user" + userid + "+" + ssrResult.Text); }); // Setup KQML connection to Companion NU.Kqml.SocketStringConsumer kqml = null; if (usingKqml) { facilitatorIP = compargs[0]; facilitatorPort = Convert.ToInt32(compargs[1]); localPort = Convert.ToInt32(compargs[2]); Console.WriteLine("Your Companion IP address is: " + facilitatorIP); Console.WriteLine("Your Companion port is: " + facilitatorPort); Console.WriteLine("Your local port is: " + localPort); kqml = new NU.Kqml.SocketStringConsumer(pipeline, facilitatorIP, facilitatorPort, localPort); text.PipeTo(kqml.In); } // Create a data store to log the data to if necessary. A data store is necessary // only if output logging or live visualization are enabled. Console.WriteLine(outputLogPath == null); var dataStore = CreateDataStore(pipeline, outputLogPath, showLiveVisualization); Console.WriteLine(dataStore == null); Console.WriteLine("dataStore is empty"); // For disk logging or live visualization only if (dataStore != null) { // Log the microphone audio and recognition results processedVideo.Write($"{Program.AppName}.WebCamProcessedVideo", dataStore); audioInput.Write($"{Program.AppName}.MicrophoneAudio", dataStore); finalResults.Write($"{Program.AppName}.FinalRecognitionResults", dataStore); Console.WriteLine("Stored the data here! "); } // Ignore this block if live visualization is not enabled if (showLiveVisualization) { // Create the visualization client var visualizationClient = new VisualizationClient(); // Clear all data if the visualizer is already open visualizationClient.ClearAll(); // Create the visualization client to visualize live data visualizationClient.SetLiveMode(startTime); // Plot the video stream in a new panel visualizationClient.AddXYPanel(); //processedVideo.Show(visualizationClient); // Plot the microphone audio stream in a new panel visualizationClient.AddTimelinePanel(); //audioInput.Show(visualizationClient); // Plot the recognition results in a new panel visualizationClient.AddTimelinePanel(); //finalResults.Show(visualizationClient); } if (store_visual_flag) { // Create the visualization client var visualizationClient = new VisualizationClient(); // Clear all data if the visualizer is already open visualizationClient.ClearAll(); // Create the visualization client to visualize live data visualizationClient.SetLiveMode(startTime); // Plot the video stream in a new panel visualizationClient.AddXYPanel(); processedVideo.Show(visualizationClient); // Plot the microphone audio stream in a new panel visualizationClient.AddTimelinePanel(); audioInput.Show(visualizationClient); // Plot the recognition results in a new panel visualizationClient.AddTimelinePanel(); finalResults.Show(visualizationClient); } // Register an event handler to catch pipeline errors pipeline.PipelineCompletionEvent += PipelineCompletionEvent; // Run the pipeline pipeline.RunAsync(); Console.WriteLine("Press any key to exit..."); Console.ReadKey(true); // if (kqml != null) kqml.Stop(); } }