Beispiel #1
0
        private IEnumerable <byte> ApplyTensorAsMask(byte[] data, TensorFloat tensorFloat, float cutoff)
        {
            var tensorData = tensorFloat.GetAsVectorView().ToArray();

            for (int i = 0; i < data.Length; i += 4)
            {
                var alpha = Math.Clamp(tensorData[i / 4], 0, 1);

                if (alpha > cutoff)
                {
                    yield return(Convert.ToByte(data[i + 2] * alpha));

                    yield return(Convert.ToByte(data[i + 1] * alpha));

                    yield return(Convert.ToByte(data[i + 0] * alpha));

                    yield return(Convert.ToByte(alpha * 255));
                }
                else
                {
                    yield return(0);

                    yield return(0);

                    yield return(0);

                    yield return(0);
                }
            }
        }
Beispiel #2
0
        private async Task ToImage(TensorFloat tensorFloat, Image image)
        {
            var pixels = tensorFloat
                         .GetAsVectorView()
                         .SelectMany(
                f =>
            {
                byte v = Convert.ToByte(f * 255);
                return(new byte[] { v, v, v, 255 });
            })
                         .ToArray();

            var writeableBitmap = new WriteableBitmap(320, 320);

            // Open a stream to copy the image contents to the WriteableBitmap's pixel buffer
            using (Stream stream = writeableBitmap.PixelBuffer.AsStream())
            {
                await stream.WriteAsync(pixels, 0, pixels.Length);
            }

            var dest      = SoftwareBitmap.CreateCopyFromBuffer(writeableBitmap.PixelBuffer, BitmapPixelFormat.Bgra8, 320, 320, BitmapAlphaMode.Premultiplied);
            var destSouce = new SoftwareBitmapSource();
            await destSouce.SetBitmapAsync(dest);

            image.Source = destSouce;
        }
        /// <summary>
        /// PostProcessing.
        /// Generates a list of BBox containing the detected face info.
        /// </summary>
        /// <param name="boundingBoxCollection">empty list of FaceDetectionRec to store results.</param>
        /// <param name="scores">score output of Onnx model.</param>
        /// <param name="boxes">box output of Onnx model.</param>
        /// <param name="scoreThreshold">threshold of score between 0 and 1 for filtering boxes.</param>
        private static void GenerateBBox(
            ICollection <FaceDetectionRectangle> boundingBoxCollection,
            TensorFloat scores, TensorFloat boxes,
            float scoreThreshold)
        {
            IReadOnlyList <float> vectorBoxes  = boxes.GetAsVectorView();
            IList <float>         boxList      = vectorBoxes.ToList();
            IReadOnlyList <float> vectorScores = scores.GetAsVectorView();
            IList <float>         scoreList    = vectorScores.ToList();

            long numAnchors = scores.Shape[1];

            if (numAnchors <= 0)
            {
                return;
            }

            for (var i = 0; i < numAnchors; i++)
            {
                if (scoreList[i * 2 + 1] > scoreThreshold)
                {
                    var rect = new FaceDetectionRectangle
                    {
                        X1    = boxList[i * 4] * inputImageDataWidth,
                        Y1    = boxList[i * 4 + 1] * inputImageDataHeight,
                        X2    = boxList[i * 4 + 2] * inputImageDataWidth,
                        Y2    = boxList[i * 4 + 3] * inputImageDataHeight,
                        Score = Clip(scoreList[i * 2 + 1], 0, 1)
                    };

                    boundingBoxCollection.Add(rect);
                }
            }
        }
Beispiel #4
0
        private async Task LoadAndEvaluateModelAsync(VideoFrame _inputFrame, string _modelFileName)
        {
            LearningModelBinding _binding     = null;
            VideoFrame           _outputFrame = null;
            LearningModelSession _session;

            try
            {
                //Load and create the model
                if (_model == null)
                {
                    var modelFile =
                        await StorageFile.GetFileFromApplicationUriAsync(new Uri($"ms-appx:///{_modelFileName}"));

                    _model = await LearningModel.LoadFromStorageFileAsync(modelFile);
                }

                // Create the evaluation session with the model
                _session = new LearningModelSession(_model);

                // Get input and output features of the model
                var inputFeatures  = _model.InputFeatures.ToList();
                var outputFeatures = _model.OutputFeatures.ToList();

                // Create binding and then bind input/ output features
                _binding = new LearningModelBinding(_session);

                _inputImageDescriptor =
                    inputFeatures.FirstOrDefault(feature => feature.Kind == LearningModelFeatureKind.Tensor) as TensorFeatureDescriptor;

                _outputTensorDescriptor =
                    outputFeatures.FirstOrDefault(feature => feature.Kind == LearningModelFeatureKind.Tensor) as TensorFeatureDescriptor;

                TensorFloat       outputTensor = TensorFloat.Create(_outputTensorDescriptor.Shape);
                ImageFeatureValue imageTensor  = ImageFeatureValue.CreateFromVideoFrame(_inputFrame);

                // Bind inputs +outputs
                _binding.Bind(_inputImageDescriptor.Name, imageTensor);
                _binding.Bind(_outputTensorDescriptor.Name, outputTensor);


                // Evaluate and get the results
                var results = await _session.EvaluateAsync(_binding, "test");

                Debug.WriteLine("ResultsEvaluated: " + results.ToString());

                var outputTensorList = outputTensor.GetAsVectorView();
                var resultsList      = new List <float>(outputTensorList.Count);
                for (int i = 0; i < outputTensorList.Count; i++)
                {
                    resultsList.Add(outputTensorList[i]);
                }
            }
            catch (Exception ex)
            {
                Debug.WriteLine($"error: {ex.Message}");
                _model = null;
            }
        }
        private async Task <List <float> > EvaluateFrame(VideoFrame frame)
        {
            _binding.Clear();
            _binding.Bind("input_1:0", frame);
            var results = await _session.EvaluateAsync(_binding, "");

            TensorFloat result = results.Outputs["Identity:0"] as TensorFloat;
            var         shape  = result.Shape;
            var         data   = result.GetAsVectorView();

            return(data.ToList <float>());
        }
Beispiel #6
0
        internal async Task <List <DetectionResult> > EvaluateFrame(VideoFrame frame)
        {
            _binding.Clear();
            _binding.Bind("input_1:0", frame);
            var results = await _session.EvaluateAsync(_binding, "");

            TensorFloat result     = results.Outputs["Identity:0"] as TensorFloat;
            var         shape      = result.Shape;
            var         data       = result.GetAsVectorView();
            var         detections = ParseResult(data.ToList <float>().ToArray());

            Comparer cp = new Comparer();

            detections.Sort(cp);
            return(NMS(detections));
        }
Beispiel #7
0
        internal String Evaluate()
        {
            // input tensor shape is [1x4]
            long[] shape = new long[2];
            shape[0] = 1;
            shape[1] = 4;

            // set up the input tensor
            float[] input_data = new float[4];
            input_data[0] = _sepal_length;
            input_data[1] = _sepal_width;
            input_data[2] = _petal_length;
            input_data[3] = _petal_width;
            TensorFloat tensor_float = TensorFloat.CreateFromArray(shape, input_data);

            // bind the tensor to "input"
            var binding = new LearningModelBinding(_session);

            binding.Bind("input", tensor_float);

            // evaluate
            var results = _session.Evaluate(binding, "");

            // get the results
            TensorFloat prediction      = (TensorFloat)results.Outputs.First().Value;
            var         prediction_data = prediction.GetAsVectorView();

            // find the highest predicted value
            int   max_index = 0;
            float max_value = 0;

            for (int i = 0; i < prediction_data.Count; i++)
            {
                var val = prediction_data.ElementAt(i);
                if (val > max_value)
                {
                    max_value = val;
                    max_index = i;
                }
            }

            // return the label corresponding to the highest predicted value
            return(_labels.ElementAt(max_index));
        }
        /// <summary>
        /// Processes scors and boxes and generate a list of face rectangles.
        /// </summary>
        /// <param name="landmarkTensors">landmark output of Onnx model.</param>
        /// <param name="imageX">X start position of the image.</param>
        /// <param name="imageY">Y start position of the image.</param>
        /// <param name="imageWidth">width of the image.</param>
        /// <param name="imageHeight">height of the image.</param>
        public static FaceLandmarks Predict(TensorFloat landmarkTensors, int imageX, int imageY, int imageWidth, int imageHeight)
        {
            var faceLandmarks = new FaceLandmarks();

            IReadOnlyList <float> vectorLandmarks   = landmarkTensors.GetAsVectorView();
            IList <float>         landmarkFloatList = vectorLandmarks.ToList();
            long numAnchors = (long)Math.Ceiling(landmarkTensors.Shape[1] * 0.5);

            for (var i = 0; i < numAnchors; i++)
            {
                var mark = new FaceLandmark
                {
                    X = landmarkFloatList[i * 2] * imageWidth + imageX,
                    Y = landmarkFloatList[i * 2 + 1] * imageHeight + imageY
                };

                faceLandmarks.landmarkList.Add(mark);
            }

            return(faceLandmarks);
        }
        private async void Current_SoftwareBitmapFrameCaptured(object sender, SoftwareBitmapEventArgs e)
        {
            Debug.WriteLine("FrameCaptured");
            Debug.WriteLine($"Frame evaluation started {DateTime.Now}");
            if (e.SoftwareBitmap != null)
            {
                BitmapPixelFormat bpf = e.SoftwareBitmap.BitmapPixelFormat;

                var uncroppedBitmap = SoftwareBitmap.Convert(e.SoftwareBitmap, BitmapPixelFormat.Nv12);
                var faces           = await _faceDetector.DetectFacesAsync(uncroppedBitmap);

                if (faces.Count > 0)
                {
                    //crop image to focus on face portion
                    var        faceBox    = faces[0].FaceBox;
                    VideoFrame inputFrame = VideoFrame.CreateWithSoftwareBitmap(e.SoftwareBitmap);
                    VideoFrame tmp        = null;
                    tmp = new VideoFrame(e.SoftwareBitmap.BitmapPixelFormat, (int)(faceBox.Width + faceBox.Width % 2) - 2, (int)(faceBox.Height + faceBox.Height % 2) - 2);
                    await inputFrame.CopyToAsync(tmp, faceBox, null);

                    //crop image to fit model input requirements
                    VideoFrame croppedInputImage = new VideoFrame(BitmapPixelFormat.Gray8, (int)_inputImageDescriptor.Shape[3], (int)_inputImageDescriptor.Shape[2]);
                    var        srcBounds         = GetCropBounds(
                        tmp.SoftwareBitmap.PixelWidth,
                        tmp.SoftwareBitmap.PixelHeight,
                        croppedInputImage.SoftwareBitmap.PixelWidth,
                        croppedInputImage.SoftwareBitmap.PixelHeight);
                    await tmp.CopyToAsync(croppedInputImage, srcBounds, null);

                    ImageFeatureValue imageTensor = ImageFeatureValue.CreateFromVideoFrame(croppedInputImage);

                    _binding = new LearningModelBinding(_session);

                    TensorFloat  outputTensor        = TensorFloat.Create(_outputTensorDescriptor.Shape);
                    List <float> _outputVariableList = new List <float>();

                    // Bind inputs + outputs
                    _binding.Bind(_inputImageDescriptor.Name, imageTensor);
                    _binding.Bind(_outputTensorDescriptor.Name, outputTensor);

                    // Evaluate results
                    var results = await _session.EvaluateAsync(_binding, new Guid().ToString());

                    Debug.WriteLine("ResultsEvaluated: " + results.ToString());

                    var outputTensorList = outputTensor.GetAsVectorView();
                    var resultsList      = new List <float>(outputTensorList.Count);
                    for (int i = 0; i < outputTensorList.Count; i++)
                    {
                        resultsList.Add(outputTensorList[i]);
                    }

                    var softMaxexOutputs = SoftMax(resultsList);

                    double maxProb  = 0;
                    int    maxIndex = 0;

                    // Comb through the evaluation results
                    for (int i = 0; i < Constants.POTENTIAL_EMOJI_NAME_LIST.Count(); i++)
                    {
                        // Record the dominant emotion probability & its location
                        if (softMaxexOutputs[i] > maxProb)
                        {
                            maxIndex = i;
                            maxProb  = softMaxexOutputs[i];
                        }

                        //for evaluations run on the EmotionPage, record info about single specific emotion of interest
                        if (CurrentEmojis._currentEmoji != null && Constants.POTENTIAL_EMOJI_NAME_LIST[i].Equals(CurrentEmojis._currentEmoji.Name))
                        {
                            SoftwareBitmap potentialBestPic;

                            try
                            {
                                potentialBestPic = SoftwareBitmap.Convert(uncroppedBitmap, BitmapPixelFormat.Bgra8);
                            }
                            catch (Exception ex)
                            {
                                Debug.WriteLine($"Error converting SoftwareBitmap. Details:{ex.Message}. Attempting to continue...");
                                return;
                            }

                            await Windows.ApplicationModel.Core.CoreApplication.MainView.CoreWindow.Dispatcher.RunAsync(CoreDispatcherPriority.Normal,
                                                                                                                        async() =>
                            {
                                // Give user immediate visual feedback by updating success gauge
                                ScoreUpdated?.Invoke(this, new EmotionPageGaugeScoreEventArgs()
                                {
                                    Score = softMaxexOutputs[i]
                                });

                                // Save original pic for each emotion no matter how bad it is (and record its associated info)
                                double bestScore = CurrentEmojis._emojis.Emojis[CurrentEmojis._currentEmojiIndex].BestScore;
                                if (softMaxexOutputs[i] > bestScore)
                                {
                                    CurrentEmojis._emojis.Emojis[CurrentEmojis._currentEmojiIndex].BestScore = softMaxexOutputs[i];

                                    var source = new SoftwareBitmapSource();

                                    await source.SetBitmapAsync(potentialBestPic);

                                    // Create format of potentialBestPic to be displayed in a gif later
                                    SoftwareBitmap tmpBitmap = potentialBestPic;
                                    WriteableBitmap wb       = new WriteableBitmap(tmpBitmap.PixelWidth, tmpBitmap.PixelHeight);
                                    tmpBitmap.CopyToBuffer(wb.PixelBuffer);

                                    CurrentEmojis._emojis.Emojis[CurrentEmojis._currentEmojiIndex].BestPic      = source;
                                    CurrentEmojis._emojis.Emojis[CurrentEmojis._currentEmojiIndex].ShowOopsIcon = false;
                                    CurrentEmojis._emojis.Emojis[CurrentEmojis._currentEmojiIndex].BestPicWB    = wb;
                                }
                            }
                                                                                                                        );
                        }
                    }

                    Debug.WriteLine($"Probability = {maxProb}, Threshold set to = {Constants.CLASSIFICATION_CERTAINTY_THRESHOLD}, Emotion = {Constants.POTENTIAL_EMOJI_NAME_LIST[maxIndex]}");

                    // For evaluations run on the MainPage, update the emoji carousel
                    if (maxProb >= Constants.CLASSIFICATION_CERTAINTY_THRESHOLD)
                    {
                        Debug.WriteLine("first page emoji should start to update");
                        IntelligenceServiceEmotionClassified?.Invoke(this, new ClassifiedEmojiEventArgs(CurrentEmojis._emojis.Emojis[maxIndex]));
                    }

                    // Dispose of resources
                    if (e.SoftwareBitmap != null)
                    {
                        e.SoftwareBitmap.Dispose();
                        e.SoftwareBitmap = null;
                    }
                }
            }
            IntelligenceServiceProcessingCompleted?.Invoke(this, null);
            Debug.WriteLine($"Frame evaluation finished {DateTime.Now}");
        }
Beispiel #10
0
        /// <summary>
        /// Extract bounding boxes and their probabilities from the prediction output.
        /// </summary>
        private ExtractBoxResult ExtractBoxes(TensorFloat predictionOutput, float[] anchors)
        {
            var shape = predictionOutput.Shape;

            Debug.Assert(shape.Count == 4, "The model output has unexpected shape");
            Debug.Assert(shape[0] == 1, "The batch size must be 1");

            IReadOnlyList <float> outputs = predictionOutput.GetAsVectorView();

            var numAnchor = anchors.Length / 2;
            var channels  = shape[1];
            var height    = shape[2];
            var width     = shape[3];

            Debug.Assert(channels % numAnchor == 0);
            var numClass = (channels / numAnchor) - 5;

            Debug.Assert(numClass == this.labels.Count);

            var boxes = new List <BoundingBox>();
            var probs = new List <float[]>();

            for (int gridY = 0; gridY < height; gridY++)
            {
                for (int gridX = 0; gridX < width; gridX++)
                {
                    int offset     = 0;
                    int stride     = (int)(height * width);
                    int baseOffset = gridX + gridY * (int)width;

                    for (int i = 0; i < numAnchor; i++)
                    {
                        var x = (Logistic(outputs[baseOffset + (offset++ *stride)]) + gridX) / width;
                        var y = (Logistic(outputs[baseOffset + (offset++ *stride)]) + gridY) / height;
                        var w = (float)Math.Exp(outputs[baseOffset + (offset++ *stride)]) * anchors[i * 2] / width;
                        var h = (float)Math.Exp(outputs[baseOffset + (offset++ *stride)]) * anchors[i * 2 + 1] / height;

                        x = x - (w / 2);
                        y = y - (h / 2);

                        var objectness = Logistic(outputs[baseOffset + (offset++ *stride)]);

                        var classProbabilities = new float[numClass];
                        for (int j = 0; j < numClass; j++)
                        {
                            classProbabilities[j] = outputs[baseOffset + (offset++ *stride)];
                        }
                        var max = classProbabilities.Max();
                        for (int j = 0; j < numClass; j++)
                        {
                            classProbabilities[j] = (float)Math.Exp(classProbabilities[j] - max);
                        }
                        var sum = classProbabilities.Sum();
                        for (int j = 0; j < numClass; j++)
                        {
                            classProbabilities[j] *= objectness / sum;
                        }

                        if (classProbabilities.Max() > this.probabilityThreshold)
                        {
                            boxes.Add(new BoundingBox(x, y, w, h));
                            probs.Add(classProbabilities);
                        }
                    }
                    Debug.Assert(offset == channels);
                }
            }

            Debug.Assert(boxes.Count == probs.Count);
            return(new ExtractBoxResult()
            {
                Boxes = boxes, Probs = probs
            });
        }
        private async void Current_SoftwareBitmapFrameCaptured(object sender, SoftwareBitmapEventArgs e)
        {
            Debug.WriteLine("FrameCaptured");
            Debug.WriteLine($"Frame evaluation started {DateTime.Now}");
            if (e.SoftwareBitmap != null)
            {
                BitmapPixelFormat bpf = e.SoftwareBitmap.BitmapPixelFormat;

                var uncroppedBitmap = SoftwareBitmap.Convert(e.SoftwareBitmap, BitmapPixelFormat.Nv12);
                var faces           = await _faceDetector.DetectFacesAsync(uncroppedBitmap);

                if (faces.Count > 0)
                {
                    //crop image to focus on face portion
                    var        faceBox    = faces[0].FaceBox;
                    VideoFrame inputFrame = VideoFrame.CreateWithSoftwareBitmap(e.SoftwareBitmap);
                    VideoFrame tmp        = null;
                    tmp = new VideoFrame(e.SoftwareBitmap.BitmapPixelFormat, (int)(faceBox.Width + faceBox.Width % 2) - 2, (int)(faceBox.Height + faceBox.Height % 2) - 2);
                    await inputFrame.CopyToAsync(tmp, faceBox, null);

                    //crop image to fit model input requirements
                    VideoFrame croppedInputImage = new VideoFrame(BitmapPixelFormat.Gray8, (int)_inputImageDescriptor.Shape[3], (int)_inputImageDescriptor.Shape[2]);
                    var        srcBounds         = GetCropBounds(
                        tmp.SoftwareBitmap.PixelWidth,
                        tmp.SoftwareBitmap.PixelHeight,
                        croppedInputImage.SoftwareBitmap.PixelWidth,
                        croppedInputImage.SoftwareBitmap.PixelHeight);
                    await tmp.CopyToAsync(croppedInputImage, srcBounds, null);

                    ImageFeatureValue imageTensor = ImageFeatureValue.CreateFromVideoFrame(croppedInputImage);

                    _binding = new LearningModelBinding(_session);

                    TensorFloat  outputTensor        = TensorFloat.Create(_outputTensorDescriptor.Shape);
                    List <float> _outputVariableList = new List <float>();

                    // Bind inputs + outputs
                    _binding.Bind(_inputImageDescriptor.Name, imageTensor);
                    _binding.Bind(_outputTensorDescriptor.Name, outputTensor);

                    // Evaluate results
                    var results = await _session.EvaluateAsync(_binding, new Guid().ToString());

                    Debug.WriteLine("ResultsEvaluated: " + results.ToString());

                    var outputTensorList = outputTensor.GetAsVectorView();
                    var resultsList      = new List <float>(outputTensorList.Count);
                    for (int i = 0; i < outputTensorList.Count; i++)
                    {
                        resultsList.Add(outputTensorList[i]);
                    }

                    var softMaxexOutputs = SoftMax(resultsList);

                    double maxProb  = 0;
                    int    maxIndex = 0;

                    // Comb through the evaluation results
                    for (int i = 0; i < Constants.POTENTIAL_EMOJI_NAME_LIST.Count(); i++)
                    {
                        // Record the dominant emotion probability & its location
                        if (softMaxexOutputs[i] > maxProb)
                        {
                            maxIndex = i;
                            maxProb  = softMaxexOutputs[i];
                        }
                    }

                    Debug.WriteLine($"Probability = {maxProb}, Threshold set to = {Constants.CLASSIFICATION_CERTAINTY_THRESHOLD}, Emotion = {Constants.POTENTIAL_EMOJI_NAME_LIST[maxIndex]}");

                    // For evaluations run on the MainPage, update the emoji carousel
                    if (maxProb >= Constants.CLASSIFICATION_CERTAINTY_THRESHOLD)
                    {
                        Debug.WriteLine("first page emoji should start to update");
                        IntelligenceServiceEmotionClassified?.Invoke(this, new ClassifiedEmojiEventArgs(CurrentEmojis._emojis.Emojis[maxIndex]));
                    }

                    // Dispose of resources
                    if (e.SoftwareBitmap != null)
                    {
                        e.SoftwareBitmap.Dispose();
                        e.SoftwareBitmap = null;
                    }
                }
            }
            IntelligenceServiceProcessingCompleted?.Invoke(this, null);
            Debug.WriteLine($"Frame evaluation finished {DateTime.Now}");
        }
        unsafe private TensorFloat CustomTensorize(List <VideoFrame> frameList, List <float> mean, List <float> std, bool toRGB = false)
        {
            int               temp_len       = frameList.Count();
            SoftwareBitmap    softwareBitmap = frameList[0].SoftwareBitmap;
            Int32             height         = softwareBitmap.PixelHeight;
            Int32             width          = softwareBitmap.PixelWidth;
            BitmapPixelFormat pixelFormat    = softwareBitmap.BitmapPixelFormat;

            Int32 channels = BitmapPixelFormat.Gray8 == pixelFormat ? 1 : 3;

            List <Int64> shape = new List <Int64>()
            {
                1, temp_len, channels, height, width
            };                                                                              // B,T,C,H,W

            // The channels of image stored in buffer is in order of BGRA-BGRA-BGRA-BGRA.
            // Then we transform it to the order of BBBBB....GGGGG....RRRR....AAAA(dropped)
            TensorFloat tf = TensorFloat.Create(shape);
            byte *      pCPUTensorbyte;
            float *     pCPUTensor;
            uint        uCapacity;

            // The channels of image stored in buffer is in order of BGRA-BGRA-BGRA-BGRA.
            // Then we transform it to the order of BBBBB....GGGGG....RRRR....AAAA(dropped)
            var tfr  = tf.CreateReference();
            var tfr2 = (IMemoryBufferByteAccess)tfr;

            tfr2.GetBuffer(out pCPUTensorbyte, out uCapacity);
            pCPUTensor = (float *)pCPUTensorbyte;

            for (Int32 t = 0; t < temp_len; t += 1)
            {
                VideoFrame     frame           = frameList[t];
                SoftwareBitmap softwareBitmap2 = frame.SoftwareBitmap;
                // 1. Get the access to buffer of softwarebitmap
                BitmapBuffer           spBitmapBuffer = softwareBitmap2.LockBuffer(BitmapBufferAccessMode.Read);
                IMemoryBufferReference reference      = spBitmapBuffer.CreateReference();

                byte *pData;
                uint  size;
                ((IMemoryBufferByteAccess)reference).GetBuffer(out pData, out size);

                // 2. Transform the data in buffer to a vector of float
                var offset = (height * width * channels) * t;
                if (BitmapPixelFormat.Bgra8 == pixelFormat)
                {
                    for (UInt32 i = 0; i < size; i += 4)
                    {
                        if (toRGB)
                        {
                            // suppose the model expects BGR image.
                            // index 0 is B, 1 is G, 2 is R, 3 is alpha(dropped).
                            UInt32 pixelInd = i / 4;
                            pCPUTensor[offset + (height * width * 0) + pixelInd] = (((float)pData[i + 2]) - mean[0]) / std[0];
                            pCPUTensor[offset + (height * width * 1) + pixelInd] = (((float)pData[i + 1]) - mean[1]) / std[1];
                            pCPUTensor[offset + (height * width * 2) + pixelInd] = (((float)pData[i + 0]) - mean[2]) / std[2];
                        }
                        else
                        {
                            // suppose the model expects BGR image.
                            // index 0 is B, 1 is G, 2 is R, 3 is alpha(dropped).
                            UInt32 pixelInd = i / 4;
                            pCPUTensor[offset + (height * width * 0) + pixelInd] = (((float)pData[i + 0]) - mean[0]) / std[0];
                            pCPUTensor[offset + (height * width * 1) + pixelInd] = (((float)pData[i + 1]) - mean[1]) / std[1];
                            pCPUTensor[offset + (height * width * 2) + pixelInd] = (((float)pData[i + 2]) - mean[2]) / std[2];
                        }
                    }
                }
                else if (BitmapPixelFormat.Rgba8 == pixelFormat)
                {
                    for (UInt32 i = 0; i < size; i += 4)
                    {
                        // suppose the model expects BGR image.
                        // index 0 is B, 1 is G, 2 is R, 3 is alpha(dropped).
                        if (toRGB)
                        {
                            // suppose the model expects BGR image.
                            // index 0 is B, 1 is G, 2 is R, 3 is alpha(dropped).
                            UInt32 pixelInd = i / 4;
                            pCPUTensor[offset + (height * width * 0) + pixelInd] = (((float)pData[i + 0]) - mean[0]) / std[0];
                            pCPUTensor[offset + (height * width * 1) + pixelInd] = (((float)pData[i + 1]) - mean[1]) / std[1];
                            pCPUTensor[offset + (height * width * 2) + pixelInd] = (((float)pData[i + 2]) - mean[2]) / std[2];
                        }
                        else
                        {
                            UInt32 pixelInd = i / 4;
                            pCPUTensor[offset + (height * width * 0) + pixelInd] = (((float)pData[i + 2]) - mean[0]) / std[0];
                            pCPUTensor[offset + (height * width * 1) + pixelInd] = (((float)pData[i + 1]) - mean[1]) / std[1];
                            pCPUTensor[offset + (height * width * 2) + pixelInd] = (((float)pData[i + 0]) - mean[2]) / std[2];
                        }
                    }
                }
                else if (BitmapPixelFormat.Gray8 == pixelFormat)
                {
                    for (UInt32 i = 0; i < size; i += 4)
                    {
                        // suppose the model expects BGR image.
                        // index 0 is B, 1 is G, 2 is R, 3 is alpha(dropped).
                        UInt32 pixelInd = i / 4;
                        float  red      = (float)pData[i + 2];
                        float  green    = (float)pData[i + 1];
                        float  blue     = (float)pData[i];
                        float  gray     = 0.2126f * red + 0.7152f * green + 0.0722f * blue;
                        pCPUTensor[offset + pixelInd] = gray;
                    }
                }
            }

            // to prepend following error, copy to another instance and use it as model input.
            // The tensor has outstanding memory buffer references that must be closed prior to evaluation!
            TensorFloat ret = TensorFloat.CreateFromIterable(
                tf.Shape,
                tf.GetAsVectorView());

            return(ret);
        }
Beispiel #13
0
        /// <summary>
        /// Extract bounding boxes and their probabilities from the prediction output.
        /// </summary>
        private (IList <BoundingBox>, IList <float[]>) ExtractBoxes(TensorFloat predictionOutput, float[] anchors)
        {
            IReadOnlyList <float> outputs = predictionOutput.GetAsVectorView();

            var shape     = predictionOutput.Shape;
            var numAnchor = anchors.Length / 2;
            var channels  = shape[1];
            var height    = shape[2];
            var width     = shape[3];
            var numClass  = (channels / numAnchor) - 5;

            var boxes = new List <BoundingBox>();
            var probs = new List <float[]>();

            for (int gridY = 0; gridY < height; gridY++)
            {
                for (int gridX = 0; gridX < width; gridX++)
                {
                    int offset     = 0;
                    int stride     = (int)(height * width);
                    int baseOffset = gridX + gridY * (int)width;

                    for (int i = 0; i < numAnchor; i++)
                    {
                        var x = (Logistic(outputs[baseOffset + (offset++ *stride)]) + gridX) / width;
                        var y = (Logistic(outputs[baseOffset + (offset++ *stride)]) + gridY) / height;
                        var w = (float)Math.Exp(outputs[baseOffset + (offset++ *stride)]) * anchors[i * 2] / width;
                        var h = (float)Math.Exp(outputs[baseOffset + (offset++ *stride)]) * anchors[i * 2 + 1] / height;

                        x = x - (w / 2);
                        y = y - (h / 2);

                        var objectness = Logistic(outputs[baseOffset + (offset++ *stride)]);

                        var classProbabilities = new float[numClass];
                        for (int j = 0; j < numClass; j++)
                        {
                            classProbabilities[j] = outputs[baseOffset + (offset++ *stride)];
                        }
                        var max = classProbabilities.Max();
                        for (int j = 0; j < numClass; j++)
                        {
                            classProbabilities[j] = (float)Math.Exp(classProbabilities[j] - max);
                        }
                        var sum = classProbabilities.Sum();
                        for (int j = 0; j < numClass; j++)
                        {
                            classProbabilities[j] *= objectness / sum;
                        }

                        if (classProbabilities.Max() > this.probabilityThreshold)
                        {
                            boxes.Add(new BoundingBox(x, y, w, h));
                            probs.Add(classProbabilities);
                        }
                    }
                }
            }

            return(boxes, probs);
        }