private Vector3 raycast(Point cameraPoint, Matrix4x4 cameraToWorldMatrix, CameraIntrinsics camIntrinsics)
        {
            // Convert the first point of the detected contour to Unity world coordinates
            //              Point[] countoursPoint = contours[0].toArray();
            //              Windows.Foundation.Point orgpoint = new Windows.Foundation.Point(countoursPoint[0].x, countoursPoint[0].y);
            Windows.Foundation.Point orgpoint = new Windows.Foundation.Point(cameraPoint.x, cameraPoint.y);

            // Unprojects pixel coordinates into a camera space ray from the camera origin, expressed as a X, Y coordinates on a plane one meter from the camera.
            System.Numerics.Vector2 result = camIntrinsics.UnprojectAtUnitDepth(orgpoint);
            // manual calibration: correct y-axes by 5 cm to get better unprojection accurracy
            UnityEngine.Vector3 pos = new UnityEngine.Vector3(result.X + unprojectionOffset.x, result.Y + unprojectionOffset.y, 1.0f);

            // Convert from camera coordinates to world coordinates and RayCast in that direction.
            // convert right-handed coord-sys to Unity left-handed coord-sys
            Quaternion rotation   = Quaternion.LookRotation(-cameraToWorldMatrix.GetColumn(2), cameraToWorldMatrix.GetColumn(1));
            Vector3    layForward = Vector3.Normalize(rotation * pos);
            Vector3    cameraPos  = cameraToWorldMatrix.GetColumn(3);

            RaycastHit hit = new RaycastHit();

            return(Physics.Raycast(cameraPos, layForward, out hit, Mathf.Infinity, this.SpatialAwarnessLayerMask) ?
                   hit.point : cameraPos + layForward * 5.0f);
        }
        void ProcessFrame(SpatialCoordinateSystem worldCoordinateSystem)
        {
            if (!IsInValidateStateToProcessFrame())
            {
                return;
            }

            // obtain the details of the last frame captured 
            FrameGrabber.Frame frame = frameGrabber.LastFrame;

            if (frame.mediaFrameReference == null)
            {
                return;
            }

            MediaFrameReference mediaFrameReference = frame.mediaFrameReference;

            SpatialCoordinateSystem cameraCoordinateSystem = mediaFrameReference.CoordinateSystem;
            CameraIntrinsics cameraIntrinsics = mediaFrameReference.VideoMediaFrame.CameraIntrinsics;

            Matrix4x4? cameraToWorld = cameraCoordinateSystem.TryGetTransformTo(worldCoordinateSystem);

            if (!cameraToWorld.HasValue)
            {
                return;
            }

            // padding 
            float averageFaceWidthInMeters = 0.15f;

            float pixelsPerMeterAlongX = cameraIntrinsics.FocalLength.X;
            float averagePixelsForFaceAt1Meter = pixelsPerMeterAlongX * averageFaceWidthInMeters;

            // Place the label 25cm above the center of the face.
            Vector3 labelOffsetInWorldSpace = new Vector3(0.0f, 0.25f, 0.0f);            

            frameAnalyzer.AnalyzeFrame(frame.mediaFrameReference, (status, detectedPersons) =>
            {
                if(status > 0 && detectedPersons.Count > 0)
                {
                    FrameAnalyzer.Bounds? bestRect = null;
                    Vector3 bestRectPositionInCameraSpace = Vector3.Zero;
                    float bestDotProduct = -1.0f;
                    FrameAnalyzer.DetectedPerson bestPerson = null; 

                    foreach (var dp in detectedPersons)
                    {
                        Debug.WriteLine($"Detected person: {dp.ToString()}");

                        Point faceRectCenterPoint = new Point(
                            dp.bounds.left + dp.bounds.width /2, 
                            dp.bounds.top + dp.bounds.height / 2
                            );

                        // Calculate the vector towards the face at 1 meter.
                        Vector2 centerOfFace = cameraIntrinsics.UnprojectAtUnitDepth(faceRectCenterPoint);

                        // Add the Z component and normalize.
                        Vector3 vectorTowardsFace = Vector3.Normalize(new Vector3(centerOfFace.X, centerOfFace.Y, -1.0f));

                        // Get the dot product between the vector towards the face and the gaze vector.
                        // The closer the dot product is to 1.0, the closer the face is to the middle of the video image.
                        float dotFaceWithGaze = Vector3.Dot(vectorTowardsFace, -Vector3.UnitZ);                        

                        // Pick the faceRect that best matches the users gaze.
                        if (dotFaceWithGaze > bestDotProduct)
                        {
                            // Estimate depth using the ratio of the current faceRect width with the average faceRect width at 1 meter.
                            float estimatedFaceDepth = averagePixelsForFaceAt1Meter / (float)dp.bounds.width;

                            // Scale the vector towards the face by the depth, and add an offset for the label.
                            Vector3 targetPositionInCameraSpace = vectorTowardsFace * estimatedFaceDepth;

                            bestDotProduct = dotFaceWithGaze;
                            bestRect = dp.bounds;
                            bestRectPositionInCameraSpace = targetPositionInCameraSpace;
                            bestPerson = dp; 
                        }                         
                    }

                    if (bestRect.HasValue)
                    {
                        // Transform the cube from Camera space to World space.
                        Vector3 bestRectPositionInWorldspace = Vector3.Transform(bestRectPositionInCameraSpace, cameraToWorld.Value);
                        Vector3 labelPosition = bestRectPositionInWorldspace + labelOffsetInWorldSpace;                          

                        quadRenderer.TargetPosition = labelPosition;
                        textRenderer.RenderTextOffscreen($"{bestPerson.name}, {bestPerson.gender}, Age: {bestPerson.age}");

                        lastFaceDetectedTimestamp = Utils.GetCurrentUnixTimestampMillis();
                    }               
                }
            }); 
        }
Exemple #3
0
    public async Task EvaluateVideoFrameAsync(VideoFrame frame, VideoMediaFrame VideoFrame, SpatialCoordinateSystem worldCoordinateSystem, SpatialCoordinateSystem cameraCoordinateSystem) // <-- 2
    {
        if (frame != null)
        {
            try
            {
                TimeRecorder.Restart();

                // A matrix to transform camera coordinate system to world coordinate system
                Matrix4x4 cameraToWorld = (Matrix4x4)cameraCoordinateSystem.TryGetTransformTo(worldCoordinateSystem);

                // Internal orientation of camera
                CameraIntrinsics cameraIntrinsics = VideoFrame.CameraIntrinsics;

                // The frame of depth camera
                DepthMediaFrame depthFrame = VideoFrame.DepthMediaFrame;

                // not working, cause error
                // DepthCorrelatedCoordinateMapper depthFrameMapper = depthFrame.TryCreateCoordinateMapper(cameraIntrinsics, cameraCoordinateSystem);

                ONNXModelInput inputData = new ONNXModelInput();
                inputData.Data = frame;
                var output = await Model.EvaluateAsync(inputData).ConfigureAwait(false); // <-- 3

                TimeRecorder.Stop();

                string timeStamp = $"({DateTime.Now})";
                // $" Evaluation took {TimeRecorder.ElapsedMilliseconds}ms\n";

                int count = 0;

                foreach (var prediction in output)
                {
                    var product = prediction.TagName;     // <-- 4
                    var loss    = prediction.Probability; // <-- 5

                    if (loss > 0.5f)
                    {
                        float left   = prediction.BoundingBox.Left;
                        float top    = prediction.BoundingBox.Top;
                        float right  = prediction.BoundingBox.Left + prediction.BoundingBox.Width;
                        float bottom = prediction.BoundingBox.Top + prediction.BoundingBox.Height;
                        float x      = prediction.BoundingBox.Left + prediction.BoundingBox.Width / 2;
                        float y      = prediction.BoundingBox.Top + prediction.BoundingBox.Height / 2;

                        Direct3DSurfaceDescription pixelData = frame.Direct3DSurface.Description;
                        int height = pixelData.Height;
                        int width  = pixelData.Width;

                        Vector3 ImageToWorld(float X, float Y)
                        {
                            // remove image distortion
                            // Point objectCenterPoint = cameraIntrinsics.UndistortPoint(new Point(x, y));
                            // screen space -> camera space
                            // unproject pixel coordinate of object center towards a plane that is one meter from the camera
                            Vector2 objectCenter = cameraIntrinsics.UnprojectAtUnitDepth(new Point(X * width, Y * height));

                            // construct a ray towards object
                            Vector3 vectorTowardsObject = Vector3.Normalize(new Vector3(objectCenter.X, objectCenter.Y, -1.0f));

                            // estimate the vending machine distance by its width
                            // less accurate than use depth frame
                            // magic number 940 pixels in width for an average vending machine at 2m
                            // float estimatedVendingMachineDepth = (0.94f / prediction.BoundingBox.Width) * 2;
                            float estimatedVendingMachineDepth = (0.3f / prediction.BoundingBox.Width) * 1;

                            // times the vector towards object by the distance to get object's vector in camera coordinate system
                            Vector3 vectorToObject = vectorTowardsObject * estimatedVendingMachineDepth;

                            // camera space -> world space
                            // tranform the object postion from camera coordinate system to world coordinate system
                            Vector3 targetPositionInWorldSpace = Vector3.Transform(vectorToObject, cameraToWorld);

                            return(targetPositionInWorldSpace);
                        }


                        Vector3 objectCenterInWorld = ImageToWorld(x, y);
                        Vector3 objectTopLeft       = ImageToWorld(left, top);
                        Vector3 objectTopRight      = ImageToWorld(right, top);
                        Vector3 objectBotLeft       = ImageToWorld(left, bottom);
                        float   widthInWorld        = Vector3.Distance(objectTopLeft, objectTopRight);
                        float   heightInWorld       = widthInWorld / (width * prediction.BoundingBox.Width) * (height * prediction.BoundingBox.Height);
                        var     lossStr             = (loss * 100.0f).ToString("#0.00") + "%";
                        // lossStr = $"{prediction.BoundingBox.Width*width}X{prediction.BoundingBox.Height*height}";
                        UnityApp.StoreNetworkResult(timeStamp, product, lossStr, objectCenterInWorld.X, objectCenterInWorld.Y, objectCenterInWorld.Z, widthInWorld, heightInWorld);
                    }
                }
            }
            catch (Exception ex)
            {
                var err_message = $"{ex.Message}";
                ModifyText(err_message);
            }
        }
    }
Exemple #4
0
    private void ProcessFaces(List <BitmapBounds> faces, MediaFrameReference frame, SpatialCoordinateSystem worldCoordSystem)
    {
        VideoMediaFrameFormat   videoFormat            = frame.VideoMediaFrame.VideoFormat;
        SpatialCoordinateSystem cameraCoordinateSystem = frame.CoordinateSystem;
        CameraIntrinsics        cameraIntrinsics       = frame.VideoMediaFrame.CameraIntrinsics;

        System.Numerics.Matrix4x4?cameraToWorld = cameraCoordinateSystem.TryGetTransformTo(worldCoordSystem);

        // If we can't locate the world, this transform will be null.
        if (!cameraToWorld.HasValue)
        {
            return;
        }

        float textureWidthInv  = 1.0f / videoFormat.Width;
        float textureHeightInv = 1.0f / videoFormat.Height;

        // The face analysis returns very "tight fitting" rectangles.
        // We add some padding to make the visuals more appealing.
        int   paddingForFaceRect       = 24;
        float averageFaceWidthInMeters = 0.15f;

        float pixelsPerMeterAlongX         = cameraIntrinsics.FocalLength.X;
        float averagePixelsForFaceAt1Meter = pixelsPerMeterAlongX * averageFaceWidthInMeters;

        // Place the cube 25cm above the center of the face.
        System.Numerics.Vector3 cubeOffsetInWorldSpace = new System.Numerics.Vector3(0.0f, 0.25f, 0.0f);
        BitmapBounds            bestRect = new BitmapBounds();

        System.Numerics.Vector3 bestRectPositionInCameraSpace = System.Numerics.Vector3.Zero;
        float bestDotProduct = -1.0f;

        foreach (BitmapBounds faceRect in faces)
        {
            Point faceRectCenterPoint = new Point(faceRect.X + faceRect.Width / 2u, faceRect.Y + faceRect.Height / 2u);

            // Calculate the vector towards the face at 1 meter.
            System.Numerics.Vector2 centerOfFace = cameraIntrinsics.UnprojectAtUnitDepth(faceRectCenterPoint);

            // Add the Z component and normalize.
            System.Numerics.Vector3 vectorTowardsFace = System.Numerics.Vector3.Normalize(new System.Numerics.Vector3(centerOfFace.X, centerOfFace.Y, -1.0f));

            // Estimate depth using the ratio of the current faceRect width with the average faceRect width at 1 meter.
            float estimatedFaceDepth = averagePixelsForFaceAt1Meter / faceRect.Width;

            // Get the dot product between the vector towards the face and the gaze vector.
            // The closer the dot product is to 1.0, the closer the face is to the middle of the video image.
            float dotFaceWithGaze = System.Numerics.Vector3.Dot(vectorTowardsFace, -System.Numerics.Vector3.UnitZ);

            // Scale the vector towards the face by the depth, and add an offset for the cube.
            System.Numerics.Vector3 targetPositionInCameraSpace = vectorTowardsFace * estimatedFaceDepth;

            // Pick the faceRect that best matches the users gaze.
            if (dotFaceWithGaze > bestDotProduct)
            {
                bestDotProduct = dotFaceWithGaze;
                bestRect       = faceRect;
                bestRectPositionInCameraSpace = targetPositionInCameraSpace;
            }
        }

        // Transform the cube from Camera space to World space.
        System.Numerics.Vector3 bestRectPositionInWorldspace = System.Numerics.Vector3.Transform(bestRectPositionInCameraSpace, cameraToWorld.Value);

        cubeRenderer.SetTargetPosition(bestRectPositionInWorldspace + cubeOffsetInWorldSpace);

        // Texture Coordinates are [0,1], but our FaceRect is [0,Width] and [0,Height], so we need to normalize these coordinates
        // We also add padding for the faceRects to make it more visually appealing.
        float normalizedWidth  = (bestRect.Width + paddingForFaceRect * 2u) * textureWidthInv;
        float normalizedHeight = (bestRect.Height + paddingForFaceRect * 2u) * textureHeightInv;
        float normalizedX      = (bestRect.X - paddingForFaceRect) * textureWidthInv;
        float normalizedY      = (bestRect.Y - paddingForFaceRect) * textureHeightInv;
    }