/// <summary> /// Builds and runs a webcam pipeline and records the data to a Psi store /// </summary> /// <param name="pathToStore">The path to directory where store should be saved.</param> public static void RecordAudioVideo(string pathToStore) { // Create the pipeline object. using (Pipeline pipeline = Pipeline.Create()) { var visualizationClient = new VisualizationClient(); // Register an event handler to catch pipeline errors pipeline.PipelineCompletionEvent += PipelineCompletionEvent; // Clear all data if the visualizer is already open visualizationClient.ClearAll(); // Set the visualization client to visualize live data visualizationClient.SetLiveMode(); // Create store Data.Exporter store = Store.Create(pipeline, ApplicationName, pathToStore); // Create our webcam MediaCapture webcam = new MediaCapture(pipeline, 1920, 1080, 30); // Create the AudioCapture component to capture audio from the default device in 16 kHz 1-channel IProducer <AudioBuffer> audioInput = new AudioCapture(pipeline, new AudioCaptureConfiguration() { OutputFormat = WaveFormat.Create16kHz1Channel16BitPcm() }); var images = webcam.Out.EncodeJpeg(90, DeliveryPolicy.LatestMessage).Out; // Attach the webcam's image output to the store. We will write the images to the store as compressed JPEGs. Store.Write(images, "Image", store, true, DeliveryPolicy.LatestMessage); // Attach the audio input to the store Store.Write(audioInput.Out, "Audio", store, true, DeliveryPolicy.LatestMessage); // Create a XY panel in PsiStudio to display the images visualizationClient.AddXYPanel(); images.Show(visualizationClient); // Create a timeline panel in PsiStudio to display the audio waveform visualizationClient.AddTimelinePanel(); audioInput.Out.Show(visualizationClient); // Run the pipeline pipeline.RunAsync(); Console.WriteLine("Press any key to finish recording"); Console.ReadKey(); } }
public static void SetupDataStore(Pipeline pipeline, string outputStorePath, string inputStorePath, bool showLive, Microsoft.Psi.Kinect.v1.KinectSensor kinectSensor, SkeletonFaceTracker faceTracker, IProducer <IStreamingSpeechRecognitionResult> speechRecog) { string outputLogPath = null; if (outputStorePath != null && outputStorePath != "") { if (!Directory.Exists(outputStorePath)) { Directory.CreateDirectory(outputStorePath); } outputLogPath = outputStorePath; } Console.WriteLine(outputLogPath == null); string inputLogPath = null; if (inputStorePath != null && inputStorePath != "" && Directory.Exists(inputStorePath)) { inputLogPath = inputStorePath; } Console.WriteLine(inputLogPath == null); // Needed only for live visualization DateTime startTime = DateTime.Now; // Create a data store to log the data to if necessary. A data store is necessary // only if output logging or live visualization are enabled. Console.WriteLine(outputLogPath == null); var dataStore = CreateDataStore(pipeline, outputLogPath, showLive); Console.WriteLine(dataStore == null); Console.WriteLine("dataStore is empty"); // For disk logging or live visualization only if (dataStore != null) { // Log the microphone audio and recognition results //kinectSensor.ColorImage.Write("Kiosk.KinectSensor.ColorImage", dataStore); kinectSensor.Audio.Write("Kiosk.KinectSensor.Audio", dataStore); //faceTracker.Write("Kiosk.FaceTracker", dataStore); speechRecog.Write($"Kiosk.FinalRecognitionResults", dataStore); Console.WriteLine("Stored the data here! "); } // Ignore this block if live visualization is not enabled if (showLive) { // Create the visualization client var visualizationClient = new VisualizationClient(); // Clear all data if the visualizer is already open visualizationClient.ClearAll(); // Create the visualization client to visualize live data visualizationClient.SetLiveMode(startTime); // Plot the video stream in a new panel //visualizationClient.AddXYPanel(); //kinectSensor.ColorImage.Show(visualizationClient); // Plot the microphone audio stream in a new panel //visualizationClient.AddTimelinePanel(); //kinectSensor.Audio.Show(visualizationClient); // Plot the recognition results in a new panel //visualizationClient.AddTimelinePanel(); //faceTracker.Show(visualizationClient); // Plot the recognition results in a new panel //visualizationClient.AddTimelinePanel(); //speechRecog.Show(visualizationClient); } }
/// /// <abstract> /// This is the main code for our Multimodal Speech Detection demo /// </abstract> /// private void PerformMultiModalSpeechDetection() { Console.WriteLine("Initializing Psi."); bool detected = false; // First create our \Psi pipeline using (var pipeline = Pipeline.Create("MultiModalSpeechDetection")) { VisualizationClient visualizationClient = new VisualizationClient(); // Register an event handler to catch pipeline errors pipeline.PipelineCompletionEvent += PipelineCompletionEvent; // Clear all data if the visualizer is already open visualizationClient.ClearAll(); // Set the visualization client to visualize live data visualizationClient.SetLiveMode(); // Next create our Kinect sensor. We will be using the color images, face tracking, and audio from the Kinect sensor var kinectSensorConfig = new KinectSensorConfiguration(); kinectSensorConfig.OutputColor = true; kinectSensorConfig.OutputAudio = true; kinectSensorConfig.OutputBodies = true; // In order to detect faces using Kinect you must also enable detection of bodies var kinectSensor = new KinectSensor(pipeline, kinectSensorConfig); var kinectFaceDetector = new Microsoft.Psi.Kinect.Face.KinectFaceDetector(pipeline, kinectSensor, Microsoft.Psi.Kinect.Face.KinectFaceDetectorConfiguration.Default); // Create our Voice Activation Detector var speechDetector = new SystemVoiceActivityDetector(pipeline); var convertedAudio = kinectSensor.Audio.Resample(WaveFormat.Create16kHz1Channel16BitPcm()); convertedAudio.PipeTo(speechDetector); // Use the Kinect's face track to determine if the mouth is opened var mouthOpenAsFloat = kinectFaceDetector.Faces.Select((List <Microsoft.Psi.Kinect.Face.KinectFace> list) => { if (!detected) { detected = true; Console.WriteLine("Found your face"); } bool open = (list[0] != null) ? list[0].FaceProperties[Microsoft.Kinect.Face.FaceProperty.MouthOpen] == Microsoft.Kinect.DetectionResult.Yes : false; return(open ? 1.0 : 0.0); }); // Next take the "mouthOpen" value and create a hold on that value (so that we don't see 1,0,1,0,1 but instead would see 1,1,1,1,0.8,0.6,0.4) var mouthOpen = mouthOpenAsFloat.Hold(0.1); // Next join the results of the speechDetector with the mouthOpen generator and only select samples where // we have detected speech and that the mouth was open. var mouthAndSpeechDetector = speechDetector.Join(mouthOpen, hundredMs).Select((t, e) => t.Item1 && t.Item2); // Convert our speech into text var speechRecognition = convertedAudio.SpeechToText(mouthAndSpeechDetector); speechRecognition.Do((s, t) => { if (s.Item1.Length > 0) { Console.WriteLine("You said: " + s.Item1); } }); // Create a stream of landmarks (points) from the face detector var facePoints = new List <Tuple <System.Windows.Point, string> >(); var landmarks = kinectFaceDetector.Faces.Select((List <Microsoft.Psi.Kinect.Face.KinectFace> list) => { facePoints.Clear(); System.Windows.Point pt1 = new System.Windows.Point( list[0].FacePointsInColorSpace[Microsoft.Kinect.Face.FacePointType.EyeLeft].X, list[0].FacePointsInColorSpace[Microsoft.Kinect.Face.FacePointType.EyeLeft].Y); facePoints.Add(Tuple.Create(pt1, string.Empty)); System.Windows.Point pt2 = new System.Windows.Point( list[0].FacePointsInColorSpace[Microsoft.Kinect.Face.FacePointType.EyeRight].X, list[0].FacePointsInColorSpace[Microsoft.Kinect.Face.FacePointType.EyeRight].Y); facePoints.Add(Tuple.Create(pt2, string.Empty)); System.Windows.Point pt3 = new System.Windows.Point( list[0].FacePointsInColorSpace[Microsoft.Kinect.Face.FacePointType.MouthCornerLeft].X, list[0].FacePointsInColorSpace[Microsoft.Kinect.Face.FacePointType.MouthCornerLeft].Y); facePoints.Add(Tuple.Create(pt3, string.Empty)); System.Windows.Point pt4 = new System.Windows.Point( list[0].FacePointsInColorSpace[Microsoft.Kinect.Face.FacePointType.MouthCornerRight].X, list[0].FacePointsInColorSpace[Microsoft.Kinect.Face.FacePointType.MouthCornerRight].Y); facePoints.Add(Tuple.Create(pt4, string.Empty)); System.Windows.Point pt5 = new System.Windows.Point( list[0].FacePointsInColorSpace[Microsoft.Kinect.Face.FacePointType.Nose].X, list[0].FacePointsInColorSpace[Microsoft.Kinect.Face.FacePointType.Nose].Y); facePoints.Add(Tuple.Create(pt5, string.Empty)); return(facePoints); }); // ******************************************************************** // Finally create a Live Visualizer using PsiStudio. // We must persist our streams to a store in order for Live Viz to work properly // ******************************************************************** // Create store for the data. Live Visualizer can only read data from a store. var pathToStore = Environment.GetFolderPath(Environment.SpecialFolder.MyVideos); Microsoft.Psi.Data.Exporter store = Store.Create(pipeline, ApplicationName, pathToStore); visualizationClient.AddTimelinePanel().Configuration.ShowLegend = true; mouthOpen.Select(v => v ? 1d : 0d).Write("MouthOpen", store).Show(visualizationClient); visualizationClient.AddTimelinePanel().Configuration.ShowLegend = true; speechDetector.Select(v => v ? 1d : 0d).Write("VAD", store).Show(visualizationClient); visualizationClient.AddTimelinePanel().Configuration.ShowLegend = true; mouthAndSpeechDetector.Write("Join(MouthOpen,VAD)", store).Show(visualizationClient); visualizationClient.AddTimelinePanel().Configuration.ShowLegend = true; kinectSensor.Audio.Write("Audio", store).Show(visualizationClient); var panel = visualizationClient.AddXYPanel(); var images = kinectSensor.ColorImage.EncodeJpeg(90, DeliveryPolicy.LatestMessage).Out; Store.Write(images, "Images", store, true, DeliveryPolicy.LatestMessage); images.Show(visualizationClient); var ptsVis = landmarks.Write("FaceLandmarks", store).Show(visualizationClient); ptsVis.Configuration.Radius = 3; ptsVis.Configuration.XMax = Width; ptsVis.Configuration.YMax = Height; pipeline.Run(); } }
public static void StartListeningAndLooking(string[] args, bool live_visual_flag, bool store_visual_flag, string inputStorePath, string outputStorePath, bool usingKqml, String[] compargs) { using (Pipeline pipeline = Pipeline.Create()) { string facilitatorIP = null; int facilitatorPort = -1; int localPort = -1; /* * if (args.Length > 0) * { * if (args.Length < 3) * { * Console.WriteLine("Usage for running with a facilitator: \nKioskMain facilitatorIP facilitatorPort localPort"); * return; * } * usingKqml = true; * * facilitatorIP = args[0]; * facilitatorPort = int.Parse(args[1]); * localPort = int.Parse(args[2]); * } */ string outputLogPath = null; if (outputStorePath != null && outputStorePath != "" && Directory.Exists(outputStorePath)) { outputLogPath = outputStorePath; } Console.WriteLine(outputLogPath == null); string inputLogPath = null; if (inputStorePath != null && inputStorePath != "" && Directory.Exists(inputStorePath)) { inputLogPath = inputStorePath; } Console.WriteLine(inputLogPath == null); bool showLiveVisualization = live_visual_flag; // Needed only for live visualization DateTime startTime = DateTime.Now; // Use either live audio from the microphone or audio from a previously saved log IProducer <AudioBuffer> audioInput = SetupAudioInput(pipeline, inputLogPath, ref startTime); // Create our webcam MediaCapture webcam = new MediaCapture(pipeline, 320, 240, 10); FaceCasClassifier f = new FaceCasClassifier(); Console.WriteLine("Load classifier"); Console.WriteLine(f); // Bind the webcam's output to our display image. // The "Do" operator is executed on each sample from the stream (webcam.Out), which are the images coming from the webcam var processedVideo = inputLogPath != null?SetupVideoInput(pipeline, inputLogPath, ref startTime) : webcam.Out.ToGrayViaOpenCV(f).EncodeJpeg(90, DeliveryPolicy.LatestMessage); var mouthOpenAsInt = processedVideo.Select( (img, e) => { // Debug.WriteLine(FrameCount % 10); // Console.WriteLine(Math.Abs(DisLipMiddle) + " " + Math.Abs(DisLipRight) + " " + Math.Abs(DisLipLeft) + " " + (Math.Abs(DisNose) / (4 * Math.Abs(DisLipMiddle))) + " " + mouthOpen); //return MouthOpen; return(MouthOpen); }); /* * var hasFaceAsBool = webcam.Out.ToGrayViaOpenCV(f).Select( * (img, e) => * { * bool hasFacebool = false; * if (HasFace == 1) * { * hasFacebool = true; * } * else * { * hasFacebool = false; * } * return hasFacebool; * }); */ var mouthAndSpeech = audioInput.Pair(mouthOpenAsInt).Where(t => t.Item2 > -1).Select(t => { return(t.Item1); } ); SystemSpeechRecognizer recognizer = SetupSpeechRecognizer(pipeline); // Subscribe the recognizer to the input audio mouthAndSpeech.PipeTo(recognizer); //audioInput.PipeTo(recognizer); // Partial and final speech recognition results are posted on the same stream. Here // we use Psi's Where() operator to filter out only the final recognition results. var finalResults = inputLogPath != null?SetupSpeechInput(pipeline, inputLogPath, ref startTime) : recognizer.Out.Where(result => result.IsFinal); // Print the recognized text of the final recognition result to the console. finalResults.Do(result => { var ssrResult = result as SpeechRecognitionResult; Console.WriteLine($"{ssrResult.Text} (confidence: {ssrResult.Confidence})"); }); var finalResultsHighCf = finalResults.Where(t => (t as SpeechRecognitionResult).Confidence > 0.6).Select(t => { Console.WriteLine("Good Confidence!"); return(t); }); // Get just the text from the Speech Recognizer. We may want to add another filter to only get text if confidence > 0.8 var text = finalResultsHighCf.Pair(mouthOpenAsInt).Select(result => { var ssrResult = result.Item1 as SpeechRecognitionResult; int userid = result.Item2; Console.WriteLine("user" + userid + "+" + ssrResult.Text); return("user" + userid + "+" + ssrResult.Text); }); // Setup KQML connection to Companion NU.Kqml.SocketStringConsumer kqml = null; if (usingKqml) { facilitatorIP = compargs[0]; facilitatorPort = Convert.ToInt32(compargs[1]); localPort = Convert.ToInt32(compargs[2]); Console.WriteLine("Your Companion IP address is: " + facilitatorIP); Console.WriteLine("Your Companion port is: " + facilitatorPort); Console.WriteLine("Your local port is: " + localPort); kqml = new NU.Kqml.SocketStringConsumer(pipeline, facilitatorIP, facilitatorPort, localPort); text.PipeTo(kqml.In); } // Create a data store to log the data to if necessary. A data store is necessary // only if output logging or live visualization are enabled. Console.WriteLine(outputLogPath == null); var dataStore = CreateDataStore(pipeline, outputLogPath, showLiveVisualization); Console.WriteLine(dataStore == null); Console.WriteLine("dataStore is empty"); // For disk logging or live visualization only if (dataStore != null) { // Log the microphone audio and recognition results processedVideo.Write($"{Program.AppName}.WebCamProcessedVideo", dataStore); audioInput.Write($"{Program.AppName}.MicrophoneAudio", dataStore); finalResults.Write($"{Program.AppName}.FinalRecognitionResults", dataStore); Console.WriteLine("Stored the data here! "); } // Ignore this block if live visualization is not enabled if (showLiveVisualization) { // Create the visualization client var visualizationClient = new VisualizationClient(); // Clear all data if the visualizer is already open visualizationClient.ClearAll(); // Create the visualization client to visualize live data visualizationClient.SetLiveMode(startTime); // Plot the video stream in a new panel visualizationClient.AddXYPanel(); //processedVideo.Show(visualizationClient); // Plot the microphone audio stream in a new panel visualizationClient.AddTimelinePanel(); //audioInput.Show(visualizationClient); // Plot the recognition results in a new panel visualizationClient.AddTimelinePanel(); //finalResults.Show(visualizationClient); } if (store_visual_flag) { // Create the visualization client var visualizationClient = new VisualizationClient(); // Clear all data if the visualizer is already open visualizationClient.ClearAll(); // Create the visualization client to visualize live data visualizationClient.SetLiveMode(startTime); // Plot the video stream in a new panel visualizationClient.AddXYPanel(); processedVideo.Show(visualizationClient); // Plot the microphone audio stream in a new panel visualizationClient.AddTimelinePanel(); audioInput.Show(visualizationClient); // Plot the recognition results in a new panel visualizationClient.AddTimelinePanel(); finalResults.Show(visualizationClient); } // Register an event handler to catch pipeline errors pipeline.PipelineCompletionEvent += PipelineCompletionEvent; // Run the pipeline pipeline.RunAsync(); Console.WriteLine("Press any key to exit..."); Console.ReadKey(true); // if (kqml != null) kqml.Stop(); } }