Ejemplo n.º 1
0
        /// <summary>
        /// Builds and runs a webcam pipeline and records the data to a Psi store
        /// </summary>
        /// <param name="pathToStore">The path to directory where store should be saved.</param>
        public static void RecordAudioVideo(string pathToStore)
        {
            // Create the pipeline object.
            using (Pipeline pipeline = Pipeline.Create())
            {
                var visualizationClient = new VisualizationClient();

                // Register an event handler to catch pipeline errors
                pipeline.PipelineCompletionEvent += PipelineCompletionEvent;

                // Clear all data if the visualizer is already open
                visualizationClient.ClearAll();

                // Set the visualization client to visualize live data
                visualizationClient.SetLiveMode();

                // Create store
                Data.Exporter store = Store.Create(pipeline, ApplicationName, pathToStore);

                // Create our webcam
                MediaCapture webcam = new MediaCapture(pipeline, 1920, 1080, 30);

                // Create the AudioCapture component to capture audio from the default device in 16 kHz 1-channel
                IProducer <AudioBuffer> audioInput = new AudioCapture(pipeline, new AudioCaptureConfiguration()
                {
                    OutputFormat = WaveFormat.Create16kHz1Channel16BitPcm()
                });

                var images = webcam.Out.EncodeJpeg(90, DeliveryPolicy.LatestMessage).Out;

                // Attach the webcam's image output to the store. We will write the images to the store as compressed JPEGs.
                Store.Write(images, "Image", store, true, DeliveryPolicy.LatestMessage);

                // Attach the audio input to the store
                Store.Write(audioInput.Out, "Audio", store, true, DeliveryPolicy.LatestMessage);

                // Create a XY panel in PsiStudio to display the images
                visualizationClient.AddXYPanel();
                images.Show(visualizationClient);

                // Create a timeline panel in PsiStudio to display the audio waveform
                visualizationClient.AddTimelinePanel();
                audioInput.Out.Show(visualizationClient);

                // Run the pipeline
                pipeline.RunAsync();

                Console.WriteLine("Press any key to finish recording");
                Console.ReadKey();
            }
        }
        public static void SetupDataStore(Pipeline pipeline, string outputStorePath, string inputStorePath, bool showLive,
                                          Microsoft.Psi.Kinect.v1.KinectSensor kinectSensor, SkeletonFaceTracker faceTracker, IProducer <IStreamingSpeechRecognitionResult> speechRecog)
        {
            string outputLogPath = null;

            if (outputStorePath != null && outputStorePath != "")
            {
                if (!Directory.Exists(outputStorePath))
                {
                    Directory.CreateDirectory(outputStorePath);
                }
                outputLogPath = outputStorePath;
            }
            Console.WriteLine(outputLogPath == null);

            string inputLogPath = null;

            if (inputStorePath != null && inputStorePath != "" && Directory.Exists(inputStorePath))
            {
                inputLogPath = inputStorePath;
            }
            Console.WriteLine(inputLogPath == null);

            // Needed only for live visualization
            DateTime startTime = DateTime.Now;

            // Create a data store to log the data to if necessary. A data store is necessary
            // only if output logging or live visualization are enabled.
            Console.WriteLine(outputLogPath == null);
            var dataStore = CreateDataStore(pipeline, outputLogPath, showLive);

            Console.WriteLine(dataStore == null);
            Console.WriteLine("dataStore is empty");
            // For disk logging or live visualization only
            if (dataStore != null)
            {
                // Log the microphone audio and recognition results
                //kinectSensor.ColorImage.Write("Kiosk.KinectSensor.ColorImage", dataStore);
                kinectSensor.Audio.Write("Kiosk.KinectSensor.Audio", dataStore);
                //faceTracker.Write("Kiosk.FaceTracker", dataStore);
                speechRecog.Write($"Kiosk.FinalRecognitionResults", dataStore);

                Console.WriteLine("Stored the data here! ");
            }

            // Ignore this block if live visualization is not enabled
            if (showLive)
            {
                // Create the visualization client
                var visualizationClient = new VisualizationClient();

                // Clear all data if the visualizer is already open
                visualizationClient.ClearAll();

                // Create the visualization client to visualize live data
                visualizationClient.SetLiveMode(startTime);

                // Plot the video stream in a new panel
                //visualizationClient.AddXYPanel();
                //kinectSensor.ColorImage.Show(visualizationClient);

                // Plot the microphone audio stream in a new panel
                //visualizationClient.AddTimelinePanel();
                //kinectSensor.Audio.Show(visualizationClient);

                // Plot the recognition results in a new panel
                //visualizationClient.AddTimelinePanel();
                //faceTracker.Show(visualizationClient);

                // Plot the recognition results in a new panel
                //visualizationClient.AddTimelinePanel();
                //speechRecog.Show(visualizationClient);
            }
        }
Ejemplo n.º 3
0
        ///
        /// <abstract>
        /// This is the main code for our Multimodal Speech Detection demo
        /// </abstract>
        ///
        private void PerformMultiModalSpeechDetection()
        {
            Console.WriteLine("Initializing Psi.");

            bool detected = false;

            // First create our \Psi pipeline
            using (var pipeline = Pipeline.Create("MultiModalSpeechDetection"))
            {
                VisualizationClient visualizationClient = new VisualizationClient();

                // Register an event handler to catch pipeline errors
                pipeline.PipelineCompletionEvent += PipelineCompletionEvent;

                // Clear all data if the visualizer is already open
                visualizationClient.ClearAll();

                // Set the visualization client to visualize live data
                visualizationClient.SetLiveMode();

                // Next create our Kinect sensor. We will be using the color images, face tracking, and audio from the Kinect sensor
                var kinectSensorConfig = new KinectSensorConfiguration();
                kinectSensorConfig.OutputColor  = true;
                kinectSensorConfig.OutputAudio  = true;
                kinectSensorConfig.OutputBodies = true; // In order to detect faces using Kinect you must also enable detection of bodies
                var kinectSensor       = new KinectSensor(pipeline, kinectSensorConfig);
                var kinectFaceDetector = new Microsoft.Psi.Kinect.Face.KinectFaceDetector(pipeline, kinectSensor, Microsoft.Psi.Kinect.Face.KinectFaceDetectorConfiguration.Default);

                // Create our Voice Activation Detector
                var speechDetector = new SystemVoiceActivityDetector(pipeline);
                var convertedAudio = kinectSensor.Audio.Resample(WaveFormat.Create16kHz1Channel16BitPcm());
                convertedAudio.PipeTo(speechDetector);

                // Use the Kinect's face track to determine if the mouth is opened
                var mouthOpenAsFloat = kinectFaceDetector.Faces.Select((List <Microsoft.Psi.Kinect.Face.KinectFace> list) =>
                {
                    if (!detected)
                    {
                        detected = true;
                        Console.WriteLine("Found your face");
                    }

                    bool open = (list[0] != null) ? list[0].FaceProperties[Microsoft.Kinect.Face.FaceProperty.MouthOpen] == Microsoft.Kinect.DetectionResult.Yes : false;
                    return(open ? 1.0 : 0.0);
                });

                // Next take the "mouthOpen" value and create a hold on that value (so that we don't see 1,0,1,0,1 but instead would see 1,1,1,1,0.8,0.6,0.4)
                var mouthOpen = mouthOpenAsFloat.Hold(0.1);

                // Next join the results of the speechDetector with the mouthOpen generator and only select samples where
                // we have detected speech and that the mouth was open.
                var mouthAndSpeechDetector = speechDetector.Join(mouthOpen, hundredMs).Select((t, e) => t.Item1 && t.Item2);

                // Convert our speech into text
                var speechRecognition = convertedAudio.SpeechToText(mouthAndSpeechDetector);
                speechRecognition.Do((s, t) =>
                {
                    if (s.Item1.Length > 0)
                    {
                        Console.WriteLine("You said: " + s.Item1);
                    }
                });

                // Create a stream of landmarks (points) from the face detector
                var facePoints = new List <Tuple <System.Windows.Point, string> >();
                var landmarks  = kinectFaceDetector.Faces.Select((List <Microsoft.Psi.Kinect.Face.KinectFace> list) =>
                {
                    facePoints.Clear();
                    System.Windows.Point pt1 = new System.Windows.Point(
                        list[0].FacePointsInColorSpace[Microsoft.Kinect.Face.FacePointType.EyeLeft].X,
                        list[0].FacePointsInColorSpace[Microsoft.Kinect.Face.FacePointType.EyeLeft].Y);
                    facePoints.Add(Tuple.Create(pt1, string.Empty));

                    System.Windows.Point pt2 = new System.Windows.Point(
                        list[0].FacePointsInColorSpace[Microsoft.Kinect.Face.FacePointType.EyeRight].X,
                        list[0].FacePointsInColorSpace[Microsoft.Kinect.Face.FacePointType.EyeRight].Y);
                    facePoints.Add(Tuple.Create(pt2, string.Empty));

                    System.Windows.Point pt3 = new System.Windows.Point(
                        list[0].FacePointsInColorSpace[Microsoft.Kinect.Face.FacePointType.MouthCornerLeft].X,
                        list[0].FacePointsInColorSpace[Microsoft.Kinect.Face.FacePointType.MouthCornerLeft].Y);
                    facePoints.Add(Tuple.Create(pt3, string.Empty));

                    System.Windows.Point pt4 = new System.Windows.Point(
                        list[0].FacePointsInColorSpace[Microsoft.Kinect.Face.FacePointType.MouthCornerRight].X,
                        list[0].FacePointsInColorSpace[Microsoft.Kinect.Face.FacePointType.MouthCornerRight].Y);
                    facePoints.Add(Tuple.Create(pt4, string.Empty));

                    System.Windows.Point pt5 = new System.Windows.Point(
                        list[0].FacePointsInColorSpace[Microsoft.Kinect.Face.FacePointType.Nose].X,
                        list[0].FacePointsInColorSpace[Microsoft.Kinect.Face.FacePointType.Nose].Y);
                    facePoints.Add(Tuple.Create(pt5, string.Empty));
                    return(facePoints);
                });

                // ********************************************************************
                // Finally create a Live Visualizer using PsiStudio.
                // We must persist our streams to a store in order for Live Viz to work properly
                // ********************************************************************

                // Create store for the data. Live Visualizer can only read data from a store.
                var pathToStore = Environment.GetFolderPath(Environment.SpecialFolder.MyVideos);
                Microsoft.Psi.Data.Exporter store = Store.Create(pipeline, ApplicationName, pathToStore);

                visualizationClient.AddTimelinePanel().Configuration.ShowLegend = true;
                mouthOpen.Select(v => v ? 1d : 0d).Write("MouthOpen", store).Show(visualizationClient);

                visualizationClient.AddTimelinePanel().Configuration.ShowLegend = true;
                speechDetector.Select(v => v ? 1d : 0d).Write("VAD", store).Show(visualizationClient);

                visualizationClient.AddTimelinePanel().Configuration.ShowLegend = true;
                mouthAndSpeechDetector.Write("Join(MouthOpen,VAD)", store).Show(visualizationClient);

                visualizationClient.AddTimelinePanel().Configuration.ShowLegend = true;
                kinectSensor.Audio.Write("Audio", store).Show(visualizationClient);

                var panel  = visualizationClient.AddXYPanel();
                var images = kinectSensor.ColorImage.EncodeJpeg(90, DeliveryPolicy.LatestMessage).Out;
                Store.Write(images, "Images", store, true, DeliveryPolicy.LatestMessage);
                images.Show(visualizationClient);

                var ptsVis = landmarks.Write("FaceLandmarks", store).Show(visualizationClient);
                ptsVis.Configuration.Radius = 3;
                ptsVis.Configuration.XMax   = Width;
                ptsVis.Configuration.YMax   = Height;
                pipeline.Run();
            }
        }
Ejemplo n.º 4
0
        public static void StartListeningAndLooking(string[] args, bool live_visual_flag, bool store_visual_flag, string inputStorePath, string outputStorePath, bool usingKqml, String[] compargs)
        {
            using (Pipeline pipeline = Pipeline.Create())
            {
                string facilitatorIP   = null;
                int    facilitatorPort = -1;
                int    localPort       = -1;

                /*
                 * if (args.Length > 0)
                 * {
                 *  if (args.Length < 3)
                 *  {
                 *      Console.WriteLine("Usage for running with a facilitator: \nKioskMain facilitatorIP facilitatorPort localPort");
                 *      return;
                 *  }
                 *  usingKqml = true;
                 *
                 *  facilitatorIP = args[0];
                 *  facilitatorPort = int.Parse(args[1]);
                 *  localPort = int.Parse(args[2]);
                 * }
                 */
                string outputLogPath = null;

                if (outputStorePath != null && outputStorePath != "" && Directory.Exists(outputStorePath))
                {
                    outputLogPath = outputStorePath;
                }
                Console.WriteLine(outputLogPath == null);

                string inputLogPath = null;

                if (inputStorePath != null && inputStorePath != "" && Directory.Exists(inputStorePath))
                {
                    inputLogPath = inputStorePath;
                }
                Console.WriteLine(inputLogPath == null);

                bool showLiveVisualization = live_visual_flag;

                // Needed only for live visualization
                DateTime startTime = DateTime.Now;

                // Use either live audio from the microphone or audio from a previously saved log
                IProducer <AudioBuffer> audioInput = SetupAudioInput(pipeline, inputLogPath, ref startTime);

                // Create our webcam
                MediaCapture webcam = new MediaCapture(pipeline, 320, 240, 10);

                FaceCasClassifier f = new FaceCasClassifier();

                Console.WriteLine("Load classifier");
                Console.WriteLine(f);

                // Bind the webcam's output to our display image.
                // The "Do" operator is executed on each sample from the stream (webcam.Out), which are the images coming from the webcam
                var processedVideo = inputLogPath != null?SetupVideoInput(pipeline, inputLogPath, ref startTime) : webcam.Out.ToGrayViaOpenCV(f).EncodeJpeg(90, DeliveryPolicy.LatestMessage);

                var mouthOpenAsInt = processedVideo.Select(
                    (img, e) =>
                {
                    // Debug.WriteLine(FrameCount % 10);
                    // Console.WriteLine(Math.Abs(DisLipMiddle) + " " + Math.Abs(DisLipRight) + " " + Math.Abs(DisLipLeft) + " " + (Math.Abs(DisNose) / (4 * Math.Abs(DisLipMiddle))) + " " + mouthOpen);
                    //return MouthOpen;
                    return(MouthOpen);
                });

                /*
                 * var hasFaceAsBool = webcam.Out.ToGrayViaOpenCV(f).Select(
                 * (img, e) =>
                 * {
                 *  bool hasFacebool = false;
                 *  if (HasFace == 1)
                 *  {
                 *      hasFacebool = true;
                 *  }
                 *  else
                 *  {
                 *      hasFacebool = false;
                 *  }
                 *  return hasFacebool;
                 * });
                 */

                var mouthAndSpeech = audioInput.Pair(mouthOpenAsInt).Where(t => t.Item2 > -1).Select(t => {
                    return(t.Item1);
                }
                                                                                                     );

                SystemSpeechRecognizer recognizer = SetupSpeechRecognizer(pipeline);

                // Subscribe the recognizer to the input audio
                mouthAndSpeech.PipeTo(recognizer);
                //audioInput.PipeTo(recognizer);

                // Partial and final speech recognition results are posted on the same stream. Here
                // we use Psi's Where() operator to filter out only the final recognition results.
                var finalResults = inputLogPath != null?SetupSpeechInput(pipeline, inputLogPath, ref startTime) : recognizer.Out.Where(result => result.IsFinal);

                // Print the recognized text of the final recognition result to the console.
                finalResults.Do(result =>
                {
                    var ssrResult = result as SpeechRecognitionResult;
                    Console.WriteLine($"{ssrResult.Text} (confidence: {ssrResult.Confidence})");
                });

                var finalResultsHighCf = finalResults.Where(t => (t as SpeechRecognitionResult).Confidence > 0.6).Select(t =>
                {
                    Console.WriteLine("Good Confidence!");
                    return(t);
                });

                // Get just the text from the Speech Recognizer.  We may want to add another filter to only get text if confidence > 0.8
                var text = finalResultsHighCf.Pair(mouthOpenAsInt).Select(result =>
                {
                    var ssrResult = result.Item1 as SpeechRecognitionResult;
                    int userid    = result.Item2;
                    Console.WriteLine("user" + userid + "+" + ssrResult.Text);
                    return("user" + userid + "+" + ssrResult.Text);
                });

                // Setup KQML connection to Companion

                NU.Kqml.SocketStringConsumer kqml = null;
                if (usingKqml)
                {
                    facilitatorIP   = compargs[0];
                    facilitatorPort = Convert.ToInt32(compargs[1]);
                    localPort       = Convert.ToInt32(compargs[2]);
                    Console.WriteLine("Your Companion IP address is: " + facilitatorIP);
                    Console.WriteLine("Your Companion port is: " + facilitatorPort);
                    Console.WriteLine("Your local port is: " + localPort);

                    kqml = new NU.Kqml.SocketStringConsumer(pipeline, facilitatorIP, facilitatorPort, localPort);

                    text.PipeTo(kqml.In);
                }

                // Create a data store to log the data to if necessary. A data store is necessary
                // only if output logging or live visualization are enabled.
                Console.WriteLine(outputLogPath == null);
                var dataStore = CreateDataStore(pipeline, outputLogPath, showLiveVisualization);
                Console.WriteLine(dataStore == null);
                Console.WriteLine("dataStore is empty");
                // For disk logging or live visualization only
                if (dataStore != null)
                {
                    // Log the microphone audio and recognition results
                    processedVideo.Write($"{Program.AppName}.WebCamProcessedVideo", dataStore);
                    audioInput.Write($"{Program.AppName}.MicrophoneAudio", dataStore);
                    finalResults.Write($"{Program.AppName}.FinalRecognitionResults", dataStore);

                    Console.WriteLine("Stored the data here! ");
                }

                // Ignore this block if live visualization is not enabled
                if (showLiveVisualization)
                {
                    // Create the visualization client
                    var visualizationClient = new VisualizationClient();

                    // Clear all data if the visualizer is already open
                    visualizationClient.ClearAll();

                    // Create the visualization client to visualize live data
                    visualizationClient.SetLiveMode(startTime);

                    // Plot the video stream in a new panel
                    visualizationClient.AddXYPanel();
                    //processedVideo.Show(visualizationClient);

                    // Plot the microphone audio stream in a new panel
                    visualizationClient.AddTimelinePanel();
                    //audioInput.Show(visualizationClient);

                    // Plot the recognition results in a new panel
                    visualizationClient.AddTimelinePanel();
                    //finalResults.Show(visualizationClient);
                }

                if (store_visual_flag)
                {
                    // Create the visualization client
                    var visualizationClient = new VisualizationClient();

                    // Clear all data if the visualizer is already open
                    visualizationClient.ClearAll();

                    // Create the visualization client to visualize live data
                    visualizationClient.SetLiveMode(startTime);

                    // Plot the video stream in a new panel
                    visualizationClient.AddXYPanel();
                    processedVideo.Show(visualizationClient);

                    // Plot the microphone audio stream in a new panel
                    visualizationClient.AddTimelinePanel();
                    audioInput.Show(visualizationClient);

                    // Plot the recognition results in a new panel
                    visualizationClient.AddTimelinePanel();
                    finalResults.Show(visualizationClient);
                }

                // Register an event handler to catch pipeline errors
                pipeline.PipelineCompletionEvent += PipelineCompletionEvent;

                // Run the pipeline
                pipeline.RunAsync();

                Console.WriteLine("Press any key to exit...");
                Console.ReadKey(true);

                // if (kqml != null) kqml.Stop();
            }
        }