private IEnumerable <byte> ApplyTensorAsMask(byte[] data, TensorFloat tensorFloat, float cutoff) { var tensorData = tensorFloat.GetAsVectorView().ToArray(); for (int i = 0; i < data.Length; i += 4) { var alpha = Math.Clamp(tensorData[i / 4], 0, 1); if (alpha > cutoff) { yield return(Convert.ToByte(data[i + 2] * alpha)); yield return(Convert.ToByte(data[i + 1] * alpha)); yield return(Convert.ToByte(data[i + 0] * alpha)); yield return(Convert.ToByte(alpha * 255)); } else { yield return(0); yield return(0); yield return(0); yield return(0); } } }
private async Task ToImage(TensorFloat tensorFloat, Image image) { var pixels = tensorFloat .GetAsVectorView() .SelectMany( f => { byte v = Convert.ToByte(f * 255); return(new byte[] { v, v, v, 255 }); }) .ToArray(); var writeableBitmap = new WriteableBitmap(320, 320); // Open a stream to copy the image contents to the WriteableBitmap's pixel buffer using (Stream stream = writeableBitmap.PixelBuffer.AsStream()) { await stream.WriteAsync(pixels, 0, pixels.Length); } var dest = SoftwareBitmap.CreateCopyFromBuffer(writeableBitmap.PixelBuffer, BitmapPixelFormat.Bgra8, 320, 320, BitmapAlphaMode.Premultiplied); var destSouce = new SoftwareBitmapSource(); await destSouce.SetBitmapAsync(dest); image.Source = destSouce; }
/// <summary> /// PostProcessing. /// Generates a list of BBox containing the detected face info. /// </summary> /// <param name="boundingBoxCollection">empty list of FaceDetectionRec to store results.</param> /// <param name="scores">score output of Onnx model.</param> /// <param name="boxes">box output of Onnx model.</param> /// <param name="scoreThreshold">threshold of score between 0 and 1 for filtering boxes.</param> private static void GenerateBBox( ICollection <FaceDetectionRectangle> boundingBoxCollection, TensorFloat scores, TensorFloat boxes, float scoreThreshold) { IReadOnlyList <float> vectorBoxes = boxes.GetAsVectorView(); IList <float> boxList = vectorBoxes.ToList(); IReadOnlyList <float> vectorScores = scores.GetAsVectorView(); IList <float> scoreList = vectorScores.ToList(); long numAnchors = scores.Shape[1]; if (numAnchors <= 0) { return; } for (var i = 0; i < numAnchors; i++) { if (scoreList[i * 2 + 1] > scoreThreshold) { var rect = new FaceDetectionRectangle { X1 = boxList[i * 4] * inputImageDataWidth, Y1 = boxList[i * 4 + 1] * inputImageDataHeight, X2 = boxList[i * 4 + 2] * inputImageDataWidth, Y2 = boxList[i * 4 + 3] * inputImageDataHeight, Score = Clip(scoreList[i * 2 + 1], 0, 1) }; boundingBoxCollection.Add(rect); } } }
private async Task LoadAndEvaluateModelAsync(VideoFrame _inputFrame, string _modelFileName) { LearningModelBinding _binding = null; VideoFrame _outputFrame = null; LearningModelSession _session; try { //Load and create the model if (_model == null) { var modelFile = await StorageFile.GetFileFromApplicationUriAsync(new Uri($"ms-appx:///{_modelFileName}")); _model = await LearningModel.LoadFromStorageFileAsync(modelFile); } // Create the evaluation session with the model _session = new LearningModelSession(_model); // Get input and output features of the model var inputFeatures = _model.InputFeatures.ToList(); var outputFeatures = _model.OutputFeatures.ToList(); // Create binding and then bind input/ output features _binding = new LearningModelBinding(_session); _inputImageDescriptor = inputFeatures.FirstOrDefault(feature => feature.Kind == LearningModelFeatureKind.Tensor) as TensorFeatureDescriptor; _outputTensorDescriptor = outputFeatures.FirstOrDefault(feature => feature.Kind == LearningModelFeatureKind.Tensor) as TensorFeatureDescriptor; TensorFloat outputTensor = TensorFloat.Create(_outputTensorDescriptor.Shape); ImageFeatureValue imageTensor = ImageFeatureValue.CreateFromVideoFrame(_inputFrame); // Bind inputs +outputs _binding.Bind(_inputImageDescriptor.Name, imageTensor); _binding.Bind(_outputTensorDescriptor.Name, outputTensor); // Evaluate and get the results var results = await _session.EvaluateAsync(_binding, "test"); Debug.WriteLine("ResultsEvaluated: " + results.ToString()); var outputTensorList = outputTensor.GetAsVectorView(); var resultsList = new List <float>(outputTensorList.Count); for (int i = 0; i < outputTensorList.Count; i++) { resultsList.Add(outputTensorList[i]); } } catch (Exception ex) { Debug.WriteLine($"error: {ex.Message}"); _model = null; } }
private async Task <List <float> > EvaluateFrame(VideoFrame frame) { _binding.Clear(); _binding.Bind("input_1:0", frame); var results = await _session.EvaluateAsync(_binding, ""); TensorFloat result = results.Outputs["Identity:0"] as TensorFloat; var shape = result.Shape; var data = result.GetAsVectorView(); return(data.ToList <float>()); }
internal async Task <List <DetectionResult> > EvaluateFrame(VideoFrame frame) { _binding.Clear(); _binding.Bind("input_1:0", frame); var results = await _session.EvaluateAsync(_binding, ""); TensorFloat result = results.Outputs["Identity:0"] as TensorFloat; var shape = result.Shape; var data = result.GetAsVectorView(); var detections = ParseResult(data.ToList <float>().ToArray()); Comparer cp = new Comparer(); detections.Sort(cp); return(NMS(detections)); }
internal String Evaluate() { // input tensor shape is [1x4] long[] shape = new long[2]; shape[0] = 1; shape[1] = 4; // set up the input tensor float[] input_data = new float[4]; input_data[0] = _sepal_length; input_data[1] = _sepal_width; input_data[2] = _petal_length; input_data[3] = _petal_width; TensorFloat tensor_float = TensorFloat.CreateFromArray(shape, input_data); // bind the tensor to "input" var binding = new LearningModelBinding(_session); binding.Bind("input", tensor_float); // evaluate var results = _session.Evaluate(binding, ""); // get the results TensorFloat prediction = (TensorFloat)results.Outputs.First().Value; var prediction_data = prediction.GetAsVectorView(); // find the highest predicted value int max_index = 0; float max_value = 0; for (int i = 0; i < prediction_data.Count; i++) { var val = prediction_data.ElementAt(i); if (val > max_value) { max_value = val; max_index = i; } } // return the label corresponding to the highest predicted value return(_labels.ElementAt(max_index)); }
/// <summary> /// Processes scors and boxes and generate a list of face rectangles. /// </summary> /// <param name="landmarkTensors">landmark output of Onnx model.</param> /// <param name="imageX">X start position of the image.</param> /// <param name="imageY">Y start position of the image.</param> /// <param name="imageWidth">width of the image.</param> /// <param name="imageHeight">height of the image.</param> public static FaceLandmarks Predict(TensorFloat landmarkTensors, int imageX, int imageY, int imageWidth, int imageHeight) { var faceLandmarks = new FaceLandmarks(); IReadOnlyList <float> vectorLandmarks = landmarkTensors.GetAsVectorView(); IList <float> landmarkFloatList = vectorLandmarks.ToList(); long numAnchors = (long)Math.Ceiling(landmarkTensors.Shape[1] * 0.5); for (var i = 0; i < numAnchors; i++) { var mark = new FaceLandmark { X = landmarkFloatList[i * 2] * imageWidth + imageX, Y = landmarkFloatList[i * 2 + 1] * imageHeight + imageY }; faceLandmarks.landmarkList.Add(mark); } return(faceLandmarks); }
private async void Current_SoftwareBitmapFrameCaptured(object sender, SoftwareBitmapEventArgs e) { Debug.WriteLine("FrameCaptured"); Debug.WriteLine($"Frame evaluation started {DateTime.Now}"); if (e.SoftwareBitmap != null) { BitmapPixelFormat bpf = e.SoftwareBitmap.BitmapPixelFormat; var uncroppedBitmap = SoftwareBitmap.Convert(e.SoftwareBitmap, BitmapPixelFormat.Nv12); var faces = await _faceDetector.DetectFacesAsync(uncroppedBitmap); if (faces.Count > 0) { //crop image to focus on face portion var faceBox = faces[0].FaceBox; VideoFrame inputFrame = VideoFrame.CreateWithSoftwareBitmap(e.SoftwareBitmap); VideoFrame tmp = null; tmp = new VideoFrame(e.SoftwareBitmap.BitmapPixelFormat, (int)(faceBox.Width + faceBox.Width % 2) - 2, (int)(faceBox.Height + faceBox.Height % 2) - 2); await inputFrame.CopyToAsync(tmp, faceBox, null); //crop image to fit model input requirements VideoFrame croppedInputImage = new VideoFrame(BitmapPixelFormat.Gray8, (int)_inputImageDescriptor.Shape[3], (int)_inputImageDescriptor.Shape[2]); var srcBounds = GetCropBounds( tmp.SoftwareBitmap.PixelWidth, tmp.SoftwareBitmap.PixelHeight, croppedInputImage.SoftwareBitmap.PixelWidth, croppedInputImage.SoftwareBitmap.PixelHeight); await tmp.CopyToAsync(croppedInputImage, srcBounds, null); ImageFeatureValue imageTensor = ImageFeatureValue.CreateFromVideoFrame(croppedInputImage); _binding = new LearningModelBinding(_session); TensorFloat outputTensor = TensorFloat.Create(_outputTensorDescriptor.Shape); List <float> _outputVariableList = new List <float>(); // Bind inputs + outputs _binding.Bind(_inputImageDescriptor.Name, imageTensor); _binding.Bind(_outputTensorDescriptor.Name, outputTensor); // Evaluate results var results = await _session.EvaluateAsync(_binding, new Guid().ToString()); Debug.WriteLine("ResultsEvaluated: " + results.ToString()); var outputTensorList = outputTensor.GetAsVectorView(); var resultsList = new List <float>(outputTensorList.Count); for (int i = 0; i < outputTensorList.Count; i++) { resultsList.Add(outputTensorList[i]); } var softMaxexOutputs = SoftMax(resultsList); double maxProb = 0; int maxIndex = 0; // Comb through the evaluation results for (int i = 0; i < Constants.POTENTIAL_EMOJI_NAME_LIST.Count(); i++) { // Record the dominant emotion probability & its location if (softMaxexOutputs[i] > maxProb) { maxIndex = i; maxProb = softMaxexOutputs[i]; } //for evaluations run on the EmotionPage, record info about single specific emotion of interest if (CurrentEmojis._currentEmoji != null && Constants.POTENTIAL_EMOJI_NAME_LIST[i].Equals(CurrentEmojis._currentEmoji.Name)) { SoftwareBitmap potentialBestPic; try { potentialBestPic = SoftwareBitmap.Convert(uncroppedBitmap, BitmapPixelFormat.Bgra8); } catch (Exception ex) { Debug.WriteLine($"Error converting SoftwareBitmap. Details:{ex.Message}. Attempting to continue..."); return; } await Windows.ApplicationModel.Core.CoreApplication.MainView.CoreWindow.Dispatcher.RunAsync(CoreDispatcherPriority.Normal, async() => { // Give user immediate visual feedback by updating success gauge ScoreUpdated?.Invoke(this, new EmotionPageGaugeScoreEventArgs() { Score = softMaxexOutputs[i] }); // Save original pic for each emotion no matter how bad it is (and record its associated info) double bestScore = CurrentEmojis._emojis.Emojis[CurrentEmojis._currentEmojiIndex].BestScore; if (softMaxexOutputs[i] > bestScore) { CurrentEmojis._emojis.Emojis[CurrentEmojis._currentEmojiIndex].BestScore = softMaxexOutputs[i]; var source = new SoftwareBitmapSource(); await source.SetBitmapAsync(potentialBestPic); // Create format of potentialBestPic to be displayed in a gif later SoftwareBitmap tmpBitmap = potentialBestPic; WriteableBitmap wb = new WriteableBitmap(tmpBitmap.PixelWidth, tmpBitmap.PixelHeight); tmpBitmap.CopyToBuffer(wb.PixelBuffer); CurrentEmojis._emojis.Emojis[CurrentEmojis._currentEmojiIndex].BestPic = source; CurrentEmojis._emojis.Emojis[CurrentEmojis._currentEmojiIndex].ShowOopsIcon = false; CurrentEmojis._emojis.Emojis[CurrentEmojis._currentEmojiIndex].BestPicWB = wb; } } ); } } Debug.WriteLine($"Probability = {maxProb}, Threshold set to = {Constants.CLASSIFICATION_CERTAINTY_THRESHOLD}, Emotion = {Constants.POTENTIAL_EMOJI_NAME_LIST[maxIndex]}"); // For evaluations run on the MainPage, update the emoji carousel if (maxProb >= Constants.CLASSIFICATION_CERTAINTY_THRESHOLD) { Debug.WriteLine("first page emoji should start to update"); IntelligenceServiceEmotionClassified?.Invoke(this, new ClassifiedEmojiEventArgs(CurrentEmojis._emojis.Emojis[maxIndex])); } // Dispose of resources if (e.SoftwareBitmap != null) { e.SoftwareBitmap.Dispose(); e.SoftwareBitmap = null; } } } IntelligenceServiceProcessingCompleted?.Invoke(this, null); Debug.WriteLine($"Frame evaluation finished {DateTime.Now}"); }
/// <summary> /// Extract bounding boxes and their probabilities from the prediction output. /// </summary> private ExtractBoxResult ExtractBoxes(TensorFloat predictionOutput, float[] anchors) { var shape = predictionOutput.Shape; Debug.Assert(shape.Count == 4, "The model output has unexpected shape"); Debug.Assert(shape[0] == 1, "The batch size must be 1"); IReadOnlyList <float> outputs = predictionOutput.GetAsVectorView(); var numAnchor = anchors.Length / 2; var channels = shape[1]; var height = shape[2]; var width = shape[3]; Debug.Assert(channels % numAnchor == 0); var numClass = (channels / numAnchor) - 5; Debug.Assert(numClass == this.labels.Count); var boxes = new List <BoundingBox>(); var probs = new List <float[]>(); for (int gridY = 0; gridY < height; gridY++) { for (int gridX = 0; gridX < width; gridX++) { int offset = 0; int stride = (int)(height * width); int baseOffset = gridX + gridY * (int)width; for (int i = 0; i < numAnchor; i++) { var x = (Logistic(outputs[baseOffset + (offset++ *stride)]) + gridX) / width; var y = (Logistic(outputs[baseOffset + (offset++ *stride)]) + gridY) / height; var w = (float)Math.Exp(outputs[baseOffset + (offset++ *stride)]) * anchors[i * 2] / width; var h = (float)Math.Exp(outputs[baseOffset + (offset++ *stride)]) * anchors[i * 2 + 1] / height; x = x - (w / 2); y = y - (h / 2); var objectness = Logistic(outputs[baseOffset + (offset++ *stride)]); var classProbabilities = new float[numClass]; for (int j = 0; j < numClass; j++) { classProbabilities[j] = outputs[baseOffset + (offset++ *stride)]; } var max = classProbabilities.Max(); for (int j = 0; j < numClass; j++) { classProbabilities[j] = (float)Math.Exp(classProbabilities[j] - max); } var sum = classProbabilities.Sum(); for (int j = 0; j < numClass; j++) { classProbabilities[j] *= objectness / sum; } if (classProbabilities.Max() > this.probabilityThreshold) { boxes.Add(new BoundingBox(x, y, w, h)); probs.Add(classProbabilities); } } Debug.Assert(offset == channels); } } Debug.Assert(boxes.Count == probs.Count); return(new ExtractBoxResult() { Boxes = boxes, Probs = probs }); }
private async void Current_SoftwareBitmapFrameCaptured(object sender, SoftwareBitmapEventArgs e) { Debug.WriteLine("FrameCaptured"); Debug.WriteLine($"Frame evaluation started {DateTime.Now}"); if (e.SoftwareBitmap != null) { BitmapPixelFormat bpf = e.SoftwareBitmap.BitmapPixelFormat; var uncroppedBitmap = SoftwareBitmap.Convert(e.SoftwareBitmap, BitmapPixelFormat.Nv12); var faces = await _faceDetector.DetectFacesAsync(uncroppedBitmap); if (faces.Count > 0) { //crop image to focus on face portion var faceBox = faces[0].FaceBox; VideoFrame inputFrame = VideoFrame.CreateWithSoftwareBitmap(e.SoftwareBitmap); VideoFrame tmp = null; tmp = new VideoFrame(e.SoftwareBitmap.BitmapPixelFormat, (int)(faceBox.Width + faceBox.Width % 2) - 2, (int)(faceBox.Height + faceBox.Height % 2) - 2); await inputFrame.CopyToAsync(tmp, faceBox, null); //crop image to fit model input requirements VideoFrame croppedInputImage = new VideoFrame(BitmapPixelFormat.Gray8, (int)_inputImageDescriptor.Shape[3], (int)_inputImageDescriptor.Shape[2]); var srcBounds = GetCropBounds( tmp.SoftwareBitmap.PixelWidth, tmp.SoftwareBitmap.PixelHeight, croppedInputImage.SoftwareBitmap.PixelWidth, croppedInputImage.SoftwareBitmap.PixelHeight); await tmp.CopyToAsync(croppedInputImage, srcBounds, null); ImageFeatureValue imageTensor = ImageFeatureValue.CreateFromVideoFrame(croppedInputImage); _binding = new LearningModelBinding(_session); TensorFloat outputTensor = TensorFloat.Create(_outputTensorDescriptor.Shape); List <float> _outputVariableList = new List <float>(); // Bind inputs + outputs _binding.Bind(_inputImageDescriptor.Name, imageTensor); _binding.Bind(_outputTensorDescriptor.Name, outputTensor); // Evaluate results var results = await _session.EvaluateAsync(_binding, new Guid().ToString()); Debug.WriteLine("ResultsEvaluated: " + results.ToString()); var outputTensorList = outputTensor.GetAsVectorView(); var resultsList = new List <float>(outputTensorList.Count); for (int i = 0; i < outputTensorList.Count; i++) { resultsList.Add(outputTensorList[i]); } var softMaxexOutputs = SoftMax(resultsList); double maxProb = 0; int maxIndex = 0; // Comb through the evaluation results for (int i = 0; i < Constants.POTENTIAL_EMOJI_NAME_LIST.Count(); i++) { // Record the dominant emotion probability & its location if (softMaxexOutputs[i] > maxProb) { maxIndex = i; maxProb = softMaxexOutputs[i]; } } Debug.WriteLine($"Probability = {maxProb}, Threshold set to = {Constants.CLASSIFICATION_CERTAINTY_THRESHOLD}, Emotion = {Constants.POTENTIAL_EMOJI_NAME_LIST[maxIndex]}"); // For evaluations run on the MainPage, update the emoji carousel if (maxProb >= Constants.CLASSIFICATION_CERTAINTY_THRESHOLD) { Debug.WriteLine("first page emoji should start to update"); IntelligenceServiceEmotionClassified?.Invoke(this, new ClassifiedEmojiEventArgs(CurrentEmojis._emojis.Emojis[maxIndex])); } // Dispose of resources if (e.SoftwareBitmap != null) { e.SoftwareBitmap.Dispose(); e.SoftwareBitmap = null; } } } IntelligenceServiceProcessingCompleted?.Invoke(this, null); Debug.WriteLine($"Frame evaluation finished {DateTime.Now}"); }
unsafe private TensorFloat CustomTensorize(List <VideoFrame> frameList, List <float> mean, List <float> std, bool toRGB = false) { int temp_len = frameList.Count(); SoftwareBitmap softwareBitmap = frameList[0].SoftwareBitmap; Int32 height = softwareBitmap.PixelHeight; Int32 width = softwareBitmap.PixelWidth; BitmapPixelFormat pixelFormat = softwareBitmap.BitmapPixelFormat; Int32 channels = BitmapPixelFormat.Gray8 == pixelFormat ? 1 : 3; List <Int64> shape = new List <Int64>() { 1, temp_len, channels, height, width }; // B,T,C,H,W // The channels of image stored in buffer is in order of BGRA-BGRA-BGRA-BGRA. // Then we transform it to the order of BBBBB....GGGGG....RRRR....AAAA(dropped) TensorFloat tf = TensorFloat.Create(shape); byte * pCPUTensorbyte; float * pCPUTensor; uint uCapacity; // The channels of image stored in buffer is in order of BGRA-BGRA-BGRA-BGRA. // Then we transform it to the order of BBBBB....GGGGG....RRRR....AAAA(dropped) var tfr = tf.CreateReference(); var tfr2 = (IMemoryBufferByteAccess)tfr; tfr2.GetBuffer(out pCPUTensorbyte, out uCapacity); pCPUTensor = (float *)pCPUTensorbyte; for (Int32 t = 0; t < temp_len; t += 1) { VideoFrame frame = frameList[t]; SoftwareBitmap softwareBitmap2 = frame.SoftwareBitmap; // 1. Get the access to buffer of softwarebitmap BitmapBuffer spBitmapBuffer = softwareBitmap2.LockBuffer(BitmapBufferAccessMode.Read); IMemoryBufferReference reference = spBitmapBuffer.CreateReference(); byte *pData; uint size; ((IMemoryBufferByteAccess)reference).GetBuffer(out pData, out size); // 2. Transform the data in buffer to a vector of float var offset = (height * width * channels) * t; if (BitmapPixelFormat.Bgra8 == pixelFormat) { for (UInt32 i = 0; i < size; i += 4) { if (toRGB) { // suppose the model expects BGR image. // index 0 is B, 1 is G, 2 is R, 3 is alpha(dropped). UInt32 pixelInd = i / 4; pCPUTensor[offset + (height * width * 0) + pixelInd] = (((float)pData[i + 2]) - mean[0]) / std[0]; pCPUTensor[offset + (height * width * 1) + pixelInd] = (((float)pData[i + 1]) - mean[1]) / std[1]; pCPUTensor[offset + (height * width * 2) + pixelInd] = (((float)pData[i + 0]) - mean[2]) / std[2]; } else { // suppose the model expects BGR image. // index 0 is B, 1 is G, 2 is R, 3 is alpha(dropped). UInt32 pixelInd = i / 4; pCPUTensor[offset + (height * width * 0) + pixelInd] = (((float)pData[i + 0]) - mean[0]) / std[0]; pCPUTensor[offset + (height * width * 1) + pixelInd] = (((float)pData[i + 1]) - mean[1]) / std[1]; pCPUTensor[offset + (height * width * 2) + pixelInd] = (((float)pData[i + 2]) - mean[2]) / std[2]; } } } else if (BitmapPixelFormat.Rgba8 == pixelFormat) { for (UInt32 i = 0; i < size; i += 4) { // suppose the model expects BGR image. // index 0 is B, 1 is G, 2 is R, 3 is alpha(dropped). if (toRGB) { // suppose the model expects BGR image. // index 0 is B, 1 is G, 2 is R, 3 is alpha(dropped). UInt32 pixelInd = i / 4; pCPUTensor[offset + (height * width * 0) + pixelInd] = (((float)pData[i + 0]) - mean[0]) / std[0]; pCPUTensor[offset + (height * width * 1) + pixelInd] = (((float)pData[i + 1]) - mean[1]) / std[1]; pCPUTensor[offset + (height * width * 2) + pixelInd] = (((float)pData[i + 2]) - mean[2]) / std[2]; } else { UInt32 pixelInd = i / 4; pCPUTensor[offset + (height * width * 0) + pixelInd] = (((float)pData[i + 2]) - mean[0]) / std[0]; pCPUTensor[offset + (height * width * 1) + pixelInd] = (((float)pData[i + 1]) - mean[1]) / std[1]; pCPUTensor[offset + (height * width * 2) + pixelInd] = (((float)pData[i + 0]) - mean[2]) / std[2]; } } } else if (BitmapPixelFormat.Gray8 == pixelFormat) { for (UInt32 i = 0; i < size; i += 4) { // suppose the model expects BGR image. // index 0 is B, 1 is G, 2 is R, 3 is alpha(dropped). UInt32 pixelInd = i / 4; float red = (float)pData[i + 2]; float green = (float)pData[i + 1]; float blue = (float)pData[i]; float gray = 0.2126f * red + 0.7152f * green + 0.0722f * blue; pCPUTensor[offset + pixelInd] = gray; } } } // to prepend following error, copy to another instance and use it as model input. // The tensor has outstanding memory buffer references that must be closed prior to evaluation! TensorFloat ret = TensorFloat.CreateFromIterable( tf.Shape, tf.GetAsVectorView()); return(ret); }
/// <summary> /// Extract bounding boxes and their probabilities from the prediction output. /// </summary> private (IList <BoundingBox>, IList <float[]>) ExtractBoxes(TensorFloat predictionOutput, float[] anchors) { IReadOnlyList <float> outputs = predictionOutput.GetAsVectorView(); var shape = predictionOutput.Shape; var numAnchor = anchors.Length / 2; var channels = shape[1]; var height = shape[2]; var width = shape[3]; var numClass = (channels / numAnchor) - 5; var boxes = new List <BoundingBox>(); var probs = new List <float[]>(); for (int gridY = 0; gridY < height; gridY++) { for (int gridX = 0; gridX < width; gridX++) { int offset = 0; int stride = (int)(height * width); int baseOffset = gridX + gridY * (int)width; for (int i = 0; i < numAnchor; i++) { var x = (Logistic(outputs[baseOffset + (offset++ *stride)]) + gridX) / width; var y = (Logistic(outputs[baseOffset + (offset++ *stride)]) + gridY) / height; var w = (float)Math.Exp(outputs[baseOffset + (offset++ *stride)]) * anchors[i * 2] / width; var h = (float)Math.Exp(outputs[baseOffset + (offset++ *stride)]) * anchors[i * 2 + 1] / height; x = x - (w / 2); y = y - (h / 2); var objectness = Logistic(outputs[baseOffset + (offset++ *stride)]); var classProbabilities = new float[numClass]; for (int j = 0; j < numClass; j++) { classProbabilities[j] = outputs[baseOffset + (offset++ *stride)]; } var max = classProbabilities.Max(); for (int j = 0; j < numClass; j++) { classProbabilities[j] = (float)Math.Exp(classProbabilities[j] - max); } var sum = classProbabilities.Sum(); for (int j = 0; j < numClass; j++) { classProbabilities[j] *= objectness / sum; } if (classProbabilities.Max() > this.probabilityThreshold) { boxes.Add(new BoundingBox(x, y, w, h)); probs.Add(classProbabilities); } } } } return(boxes, probs); }