/// <summary> /// Parses net output (detect) to predictions. /// </summary> private List <YoloPrediction> ParseDetect(DenseTensor <float> output, Image image) { var result = new ConcurrentBag <YoloPrediction>(); var(w, h) = (image.Width, image.Height); // image w and h var(xGain, yGain) = (_model.Width / (float)w, _model.Height / (float)h); // x, y gains var gain = Math.Min(xGain, yGain); // gain = resized / original var(xPad, yPad) = ((_model.Width - w * gain) / 2, (_model.Height - h * gain) / 2); // left, right pads Parallel.For(0, (int)output.Length / _model.Dimensions, (i) => { if (output[0, i, 4] <= _model.Confidence) { return; // skip low obj_conf results } Parallel.For(5, _model.Dimensions, (j) => { output[0, i, j] = output[0, i, j] * output[0, i, 4]; // mul_conf = obj_conf * cls_conf }); Parallel.For(5, _model.Dimensions, (k) => { if (output[0, i, k] <= _model.MulConfidence) { return; // skip low mul_conf results } float xMin = ((output[0, i, 0] - output[0, i, 2] / 2) - xPad) / gain; // unpad bbox tlx to original float yMin = ((output[0, i, 1] - output[0, i, 3] / 2) - yPad) / gain; // unpad bbox tly to original float xMax = ((output[0, i, 0] + output[0, i, 2] / 2) - xPad) / gain; // unpad bbox brx to original float yMax = ((output[0, i, 1] + output[0, i, 3] / 2) - yPad) / gain; // unpad bbox bry to original xMin = Clamp(xMin, 0, w - 0); // clip bbox tlx to boundaries yMin = Clamp(yMin, 0, h - 0); // clip bbox tly to boundaries xMax = Clamp(xMax, 0, w - 1); // clip bbox brx to boundaries yMax = Clamp(yMax, 0, h - 1); // clip bbox bry to boundaries YoloLabel label = _model.Labels[k - 5]; var prediction = new YoloPrediction(label, output[0, i, k]) { Rectangle = new RectangleF(xMin, yMin, xMax - xMin, yMax - yMin) }; result.Add(prediction); }); }); return(result.ToList()); }
/// <summary> /// Parses net output (detect) to predictions. /// </summary> private List <YoloPrediction> ParseDetect(DenseTensor <float> output, Image image) { var result = new List <YoloPrediction>(); var(xGain, yGain) = (_model.Width / (float)image.Width, _model.Height / (float)image.Height); for (int i = 0; i < output.Length / _model.Dimensions; i++) // iterate tensor { if (output[0, i, 4] <= _model.Confidence) { continue; } for (int j = 5; j < _model.Dimensions; j++) // compute mul conf { output[0, i, j] = output[0, i, j] * output[0, i, 4]; // conf = obj_conf * cls_conf } for (int k = 5; k < _model.Dimensions; k++) { if (output[0, i, k] <= _model.MulConfidence) { continue; } var xMin = (output[0, i, 0] - output[0, i, 2] / 2) / xGain; // top left x var yMin = (output[0, i, 1] - output[0, i, 3] / 2) / yGain; // top left y var xMax = (output[0, i, 0] + output[0, i, 2] / 2) / xGain; // bottom right x var yMax = (output[0, i, 1] + output[0, i, 3] / 2) / yGain; // bottom right y YoloLabel label = _model.Labels[k - 5]; var prediction = new YoloPrediction(label, output[0, i, k]) { Rectangle = new RectangleF(xMin, yMin, xMax - xMin, yMax - yMin) }; result.Add(prediction); } } return(result); }
/// <summary> /// Parses net outputs (sigmoid) to predictions. /// </summary> private List <YoloPrediction> ParseSigmoid(DenseTensor <float>[] output, Image image) { var result = new ConcurrentBag <YoloPrediction>(); var(w, h) = (image.Width, image.Height); // image w and h var(xGain, yGain) = (_model.Width / (float)w, _model.Height / (float)h); // x, y gains var gain = Math.Min(xGain, yGain); // gain = resized / original var(xPad, yPad) = ((_model.Width - w * gain) / 2, (_model.Height - h * gain) / 2); // left, right pads Parallel.For(0, output.Length, (i) => // iterate model outputs { int shapes = _model.Shapes[i]; // shapes per output Parallel.For(0, _model.Anchors[0].Length, (a) => // iterate anchors { Parallel.For(0, shapes, (y) => // iterate shapes (rows) { Parallel.For(0, shapes, (x) => // iterate shapes (columns) { int offset = (shapes * shapes * a + shapes * y + x) * _model.Dimensions; float[] buffer = output[i].Skip(offset).Take(_model.Dimensions).Select(Sigmoid).ToArray(); if (buffer[4] <= _model.Confidence) { return; // skip low obj_conf results } List <float> scores = buffer.Skip(5).Select(b => b * buffer[4]).ToList(); // mul_conf = obj_conf * cls_conf float mulConfidence = scores.Max(); // max confidence score if (mulConfidence <= _model.MulConfidence) { return; // skip low mul_conf results } float rawX = (buffer[0] * 2 - 0.5f + x) * _model.Strides[i]; // predicted bbox x (center) float rawY = (buffer[1] * 2 - 0.5f + y) * _model.Strides[i]; // predicted bbox y (center) float rawW = (float)Math.Pow(buffer[2] * 2, 2) * _model.Anchors[i][a][0]; // predicted bbox w float rawH = (float)Math.Pow(buffer[3] * 2, 2) * _model.Anchors[i][a][1]; // predicted bbox h float[] xyxy = Xywh2xyxy(new float[] { rawX, rawY, rawW, rawH }); float xMin = Clamp((xyxy[0] - xPad) / gain, 0, w - 0); // unpad, clip tlx float yMin = Clamp((xyxy[1] - yPad) / gain, 0, h - 0); // unpad, clip tly float xMax = Clamp((xyxy[2] - xPad) / gain, 0, w - 1); // unpad, clip brx float yMax = Clamp((xyxy[3] - yPad) / gain, 0, h - 1); // unpad, clip bry YoloLabel label = _model.Labels[scores.IndexOf(mulConfidence)]; var prediction = new YoloPrediction(label, mulConfidence) { Rectangle = new RectangleF(xMin, yMin, xMax - xMin, yMax - yMin) }; result.Add(prediction); }); }); }); }); return(result.ToList()); }
/// <summary> /// Parses net output to predictions. /// </summary> private List <YoloPrediction> ParseOutput(DenseTensor <float>[] output, Image image) { var result = new List <YoloPrediction>(); var(xGain, yGain) = (_model.Width / (float)image.Width, _model.Height / (float)image.Height); for (int i = 0; i < output.Length; i++) // iterate outputs { int shapes = _shapes[i]; // shapes per output for (int a = 0; a < _anchors.Length; a++) // iterate anchors { for (int y = 0; y < shapes; y++) // iterate rows { for (int x = 0; x < shapes; x++) // iterate columns { int offset = (shapes * shapes * a + shapes * y + x) * _model.Dimensions; float[] buffer = output[i].Skip(offset).Take(_model.Dimensions).Select(Sigmoid).ToArray(); var objConfidence = buffer[4]; // extract object confidence if (objConfidence < _model.Confidence) // check predicted object confidence { continue; } List <float> scores = buffer.Skip(5).Select(x => x * objConfidence).ToList(); float mulConfidence = scores.Max(); // find the best label if (mulConfidence <= _model.MulConfidence) // check class obj_conf * cls_conf confidence { continue; } var rawX = (buffer[0] * 2 - 0.5f + x) * _strides[i]; // predicted bbox x (center) var rawY = (buffer[1] * 2 - 0.5f + y) * _strides[i]; // predicted bbox y (center) var rawW = MathF.Pow(buffer[2] * 2, 2) * _anchors[i][a][0]; // predicted bbox width var rawH = MathF.Pow(buffer[3] * 2, 2) * _anchors[i][a][1]; // predicted bbox height float[] xyxy = Xywh2xyxy(new float[] { rawX, rawY, rawW, rawH }); var xMin = xyxy[0] / xGain; // final bbox tlx scaled with ratio (to original size) var yMin = xyxy[1] / yGain; // final bbox tly scaled with ratio (to original size) var xMax = xyxy[2] / xGain; // final bbox brx scaled with ratio (to original size) var yMax = xyxy[3] / yGain; // final bbox bry scaled with ratio (to original size) YoloLabel label = _model.Labels[scores.IndexOf(mulConfidence)]; var prediction = new YoloPrediction(label, mulConfidence) { Rectangle = new RectangleF(xMin, yMin, xMax - xMin, yMax - yMin) }; result.Add(prediction); } } } } return(result); }