bool ScoreIsMaximumInLocalWindow( int keypointId, float score, int heatmapY, int heatmapX, int localMaximumRadius, float[,,,] scores) { var height = scores.GetLength(1); var width = scores.GetLength(2); var localMaximum = true; var yStart = Mathf.Max(heatmapY - localMaximumRadius, 0); var yEnd = Mathf.Min(heatmapY + localMaximumRadius + 1, height); for (var yCurrent = yStart; yCurrent < yEnd; ++yCurrent) { var xStart = Mathf.Max(heatmapX - localMaximumRadius, 0); var xEnd = Mathf.Min(heatmapX + localMaximumRadius + 1, width); for (var xCurrent = xStart; xCurrent < xEnd; ++xCurrent) { if (scores[0, yCurrent, xCurrent, keypointId] > score) { localMaximum = false; break; } } if (!localMaximum) { break; } } return(localMaximum); }
public RecognitionResult[] Recognize(Tensor image) { Output input = _graph["image_tensor"]; Output[] outputs = new Output[] { _graph["detection_boxes"], _graph["detection_scores"], _graph["detection_classes"], _graph["num_detections"], _graph["detection_masks"] }; Tensor[] finalTensor = _session.Run(new Output[] { input }, new Tensor[] { image }, outputs); int numDetections = (int) (finalTensor[3].Data as float[])[0]; float[,,] detectionBoxes = finalTensor[0].JaggedData as float[,,]; float[,] detectionScores = finalTensor[1].JaggedData as float[,]; float[,] detectionClasses = finalTensor[2].JaggedData as float[,]; float[,,,] detectionMask = finalTensor[4].JaggedData as float[,,,]; List<RecognitionResult> results = new List<RecognitionResult>(); int numberOfClasses = detectionScores.GetLength(1); for (int i = 0; i < numDetections; i++) { RecognitionResult r = new RecognitionResult(); r.Class = (int) detectionClasses[0,i]; r.Label = Labels[r.Class - 1]; r.Probability = detectionScores[0,i]; r.Region = new float[] { detectionBoxes[0, i, 0], detectionBoxes[0, i, 1], detectionBoxes[0, i, 2], detectionBoxes[0, i, 3] }; results.Add(r); float[,] m = new float[detectionMask.GetLength(2), detectionMask.GetLength(3)]; for (int j = 0; j < m.GetLength(0); j++) for (int k = 0; k < m.GetLength(1); k++) { m[j, k] = detectionMask[0, i, j, k]; } r.Mask = m; } return results.ToArray(); }
public override void write(Object obj1, BOutput bout1, long version) { BOutputBin bout = (BOutputBin)bout1; BBufferBin bbuf = bout.bbuf; float[,,,] arr = (float[, , , ])obj1; // lengths int n3 = arr.GetLength(0); int n2 = arr.GetLength(1); int n1 = arr.GetLength(2); int n0 = arr.GetLength(3); bbuf.putLength(n3); bbuf.putLength(n2); bbuf.putLength(n1); bbuf.putLength(n0); // write for (int i3 = 0; i3 < n3; i3++) { for (int i2 = 0; i2 < n2; i2++) { for (int i1 = 0; i1 < n1; i1++) { for (int i0 = 0; i0 < n0; i0++) { bbuf.putFloat(arr[i3, i2, i1, i0]); } } } } }
public static void print(float[,,,] input) { StringBuilder sb = new StringBuilder(); sb.Append("["); for (int i = 0; i < input.GetLength(0); i++) { sb.Append("["); for (int j = 0; j < input.GetLength(1); j++) { sb.Append("["); for (int k = 0; k < input.GetLength(2); k++) { sb.Append("["); for (int l = 0; l < input.GetLength(3); l++) { sb.Append(input[i, j, k, l] + " "); } sb.Append("]"); } sb.Append("]"); } sb.Append("]"); } sb.Append("]\r\n"); Console.Write(sb.ToString()); }
public static int[] get_shape(float[,,,] input) { int[] res = new int[4]; res[0] = input.GetLength(0); res[1] = input.GetLength(1); res[2] = input.GetLength(2); res[3] = input.GetLength(3); Console.WriteLine("(" + res[0] + "," + res[1] + "," + res[2] + "," + res[3] + ")"); return(res); }
protected float[,,] SeparableConv2D(float[,,,] kernel1, float[,,,] kernel2, float[] bias, float[,,] src, bool IsPaddingSame = false, int strideX = 1, int strideY = 1) { int kernelW = kernel1.GetLength(0); int kernelH = kernel1.GetLength(1); int filterCount = kernel1.GetLength(2); int filterMultiplier = kernel1.GetLength(3); float[,,] res1 = DepthwiseConv2D(kernel1, new float[filterMultiplier], src, IsPaddingSame, strideX, strideY); float[,,] res2 = Conv2d(kernel2, bias, res1, IsPaddingSame, strideX, strideY); return(res2); }
protected float[,,] Conv2DTr(float[,,,] kernel, float[] bias, int strideX, int strideY, float[,,] src) { int W = src.GetLength(0); int H = src.GetLength(1); int Deep = src.GetLength(2); int kernelW = kernel.GetLength(0); int kernelH = kernel.GetLength(1); int filterChannel = kernel.GetLength(2); int filterCount = kernel.GetLength(3); int resW = W * strideX; int resH = H * strideY; float[,,] res = new float[resW, resH, filterCount]; for (int f = 0; f < filterCount; f++) { for (int x = 0; x < resW; x++) { for (int y = 0; y < resH; y++) { res[x, y, f] = bias[f]; } } } for (int x = 0; x < W; x++) { for (int y = 0; y < H; y++) { for (int f = 0; f < filterCount; f++) { for (int d = 0; d < Deep; d++) { for (int kx = 0; kx < kernelW; kx++) { for (int ky = 0; ky < kernelH; ky++) { res[x * strideX + kx, y *strideY + ky, f] += src[x, y, d] * kernel[kx, ky, d, f]; } } } } } } return(res); }
public Mat Detect(Mat mat) { byte[] array = new byte[_width * _height * mat.ElemSize()]; Marshal.Copy(mat.DataStart, array, 0, array.Length); using (Image <Bgr, byte> image1 = new Image <Bgr, byte>(_width, _height)) { image1.Bytes = array; var frame = image1.Mat; int cols = 640; int rows = 480; _net.SetInput(DnnInvoke.BlobFromImage(frame, 1, new System.Drawing.Size(300, 300), default(MCvScalar), false, false)); using (Emgu.CV.Mat matt = _net.Forward()) { float[,,,] flt = (float[, , , ])matt.GetData(); for (int x = 0; x < flt.GetLength(2); x++) { if (flt[0, 0, x, 2] > _probability) { int X1 = Convert.ToInt32(flt[0, 0, x, 3] * cols); int Y1 = Convert.ToInt32(flt[0, 0, x, 4] * rows); int X2 = Convert.ToInt32(flt[0, 0, x, 5] * cols); int Y2 = Convert.ToInt32(flt[0, 0, x, 6] * rows); mat.Rectangle(new OpenCvSharp.Rect((int)X1, (int)Y1, (int)X2 - (int)X1, (int)Y2 - (int)Y1), Scalar.Red); } } } } return(mat); }
System.Drawing.PointF GetDisplacement(int edgeId, System.Drawing.Point point, float[,,,] displacements) { var numEdges = (int)(displacements.GetLength(3) / 2); return(new System.Drawing.PointF( displacements[0, point.Y, point.X, numEdges + edgeId], displacements[0, point.Y, point.X, edgeId] )); }
Vector2 GetDisplacement(int edgeId, Vector2Int point, float[,,,] displacements) { var numEdges = (int)(displacements.GetLength(3) / 2); return(new Vector2( displacements[0, point.y, point.x, numEdges + edgeId], displacements[0, point.y, point.x, edgeId] )); }
public static double mean(float[,,,] tensor) { double sum = 0f; var x = tensor.GetLength(1); var y = tensor.GetLength(2); var z = tensor.GetLength(3); for (int i = 0; i < x; i++) { for (int j = 0; j < y; j++) { for (int k = 0; k < z; k++) { sum += tensor[0, i, j, k]; } } } var mean = sum / (x * y * z); return(mean); }
/// <summary> /// Receve an image from camera. /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void Camera_ImageGrabbed(object sender, EventArgs e) { camera.Retrieve(frame); Mat blobs = DnnInvoke.BlobFromImage(frame, 1.0, new System.Drawing.Size(detectionSize, detectionSize), swapRB: true); net.SetInput(blobs); Mat outp = net.Forward(); float[,,,] boxes = outp.GetData() as float[, , , ]; for (int i = 0; i < boxes.GetLength(2); i++) { int classID = Convert.ToInt32(boxes[0, 0, i, 1]); float confidence = Convert.ToSingle( boxes[0, 0, i, 2].ToString().Replace(",", "."), CultureInfo.InvariantCulture); if (confidence < 0.6) { continue; } float Xstart = Convert.ToSingle( boxes[0, 0, i, 3].ToString().Replace(",", "."), CultureInfo.InvariantCulture) * resolutionX; float Ystart = Convert.ToSingle( boxes[0, 0, i, 4].ToString().Replace(",", "."), CultureInfo.InvariantCulture) * resolutionY; float Xend = Convert.ToSingle( boxes[0, 0, i, 5].ToString().Replace(",", "."), CultureInfo.InvariantCulture) * resolutionX; float Yend = Convert.ToSingle( boxes[0, 0, i, 6].ToString().Replace(",", "."), CultureInfo.InvariantCulture) * resolutionY; System.Drawing.Rectangle rect = new System.Drawing.Rectangle { X = (int)Xstart, Y = (int)Ystart, Height = (int)(Yend - Ystart), Width = (int)(Xend - Xstart) }; string label = labels[classID - 1]; frame.Draw(rect, new Bgr(0, 255, 0), 2); frame.Draw(new System.Drawing.Rectangle((int)Xstart, (int)Ystart - 35, label.Length * 18, 35), new Bgr(0, 255, 0), -1); CvInvoke.PutText(frame, label, new System.Drawing.Point((int)Xstart, (int)Ystart - 10), FontFace.HersheySimplex, 1.0, new MCvScalar(0, 0, 0), 2); } Dispatcher.Invoke(new Action(() => { img.Source = frame.Bitmap.BitmapToBitmapSource(); })); }
PriorityQueue <float, PartWithScore> BuildPartWithScoreQueue( float scoreThreshold, int localMaximumRadius, float[,,,] scores) { var queue = new PriorityQueue <float, PartWithScore>(); var height = scores.GetLength(1); var width = scores.GetLength(2); var numKeypoints = scores.GetLength(3); for (int heatmapY = 0; heatmapY < height; ++heatmapY) { for (int heatmapX = 0; heatmapX < width; ++heatmapX) { for (int keypointId = 0; keypointId < numKeypoints; ++keypointId) { float score = scores[0, heatmapY, heatmapX, keypointId]; // Only consider parts with score greater or equal to threshold as // root candidates. if (score < scoreThreshold) { continue; } // Only consider keypoints whose score is maximum in a local window. if (ScoreIsMaximumInLocalWindow( keypointId, score, heatmapY, heatmapX, localMaximumRadius, scores)) { queue.Push(score, new PartWithScore(score, new Part(heatmapX, heatmapY, keypointId) )); } } } } return(queue); }
public void Detect(Mat image, List <Rectangle> fullFaceRegions, List <Rectangle> partialFaceRegions) { int imgDim = 300; MCvScalar meanVal = new MCvScalar(104, 177, 123); Size imageSize = image.Size; using (Mat inputBlob = DnnInvoke.BlobFromImage( image, 1.0, new Size(imgDim, imgDim), meanVal, false, false)) _faceDetector.SetInput(inputBlob, "data"); using (Mat detection = _faceDetector.Forward("detection_out")) { float confidenceThreshold = 0.5f; //List<Rectangle> fullFaceRegions = new List<Rectangle>(); //List<Rectangle> partialFaceRegions = new List<Rectangle>(); Rectangle imageRegion = new Rectangle(Point.Empty, image.Size); float[,,,] values = detection.GetData(true) as float[, , , ]; for (int i = 0; i < values.GetLength(2); i++) { float confident = values[0, 0, i, 2]; if (confident > confidenceThreshold) { float xLeftBottom = values[0, 0, i, 3] * imageSize.Width; float yLeftBottom = values[0, 0, i, 4] * imageSize.Height; float xRightTop = values[0, 0, i, 5] * imageSize.Width; float yRightTop = values[0, 0, i, 6] * imageSize.Height; RectangleF objectRegion = new RectangleF( xLeftBottom, yLeftBottom, xRightTop - xLeftBottom, yRightTop - yLeftBottom); Rectangle faceRegion = Rectangle.Round(objectRegion); if (imageRegion.Contains(faceRegion)) { fullFaceRegions.Add(faceRegion); } else { partialFaceRegions.Add(faceRegion); } } } } }
Keypoint[] DecodePose(PartWithScore root, float[,,,] scores, float[,,,] offsets, int outputStride, float[,,,] displacementsFwd, float[,,,] displacementsBwd) { var numParts = scores.GetLength(3); var numEdges = parentToChildEdges.Length; var instanceKeypoints = new Keypoint[numParts]; // Start a new detection instance at the position of the root. var rootPart = root.part; var rootScore = root.score; var rootPoint = GetImageCoords(rootPart, outputStride, offsets); instanceKeypoints[rootPart.id] = new Keypoint( rootScore, rootPoint, partNames[rootPart.id] ); // Decode the part positions upwards in the tree, following the backward // displacements. for (var edge = numEdges - 1; edge >= 0; --edge) { var sourceKeypointId = parentToChildEdges[edge]; var targetKeypointId = childToParentEdges[edge]; if (instanceKeypoints[sourceKeypointId].score > 0.0f && instanceKeypoints[targetKeypointId].score == 0.0f) { instanceKeypoints[targetKeypointId] = TraverseToTargetKeypoint( edge, instanceKeypoints[sourceKeypointId], targetKeypointId, scores, offsets, outputStride, displacementsBwd); } } // Decode the part positions downwards in the tree, following the forward // displacements. for (var edge = 0; edge < numEdges; ++edge) { var sourceKeypointId = childToParentEdges[edge]; var targetKeypointId = parentToChildEdges[edge]; if (instanceKeypoints[sourceKeypointId].score > 0.0f && instanceKeypoints[targetKeypointId].score == 0.0f) { instanceKeypoints[targetKeypointId] = TraverseToTargetKeypoint( edge, instanceKeypoints[sourceKeypointId], targetKeypointId, scores, offsets, outputStride, displacementsFwd); } } return(instanceKeypoints); }
/// <summary> /// Performs the convolution. /// </summary> /// <param name="input"> /// The input matrix. /// </param> /// <param name="filter"> /// The filter matrix. /// </param> /// <param name="output"> /// The output matrix. /// </param> /// <exception cref="ArgumentException"> /// Thrown when the size of matrices is incorrect. /// </exception> public void Convolve(float[,,] input, float[,,,] filter, float[,,] output) { if (input.GetLength(0) != this.inputDepth || input.GetLength(1) != this.inputHeight || input.GetLength(2) != this.inputWidth) { throw new ArgumentException("Wrong input size.", nameof(input)); } if (filter.GetLength(0) != this.filterCount || filter.GetLength(1) != this.inputDepth || filter.GetLength(2) != this.kernelHeight || filter.GetLength(3) != this.kernelWidth) { throw new ArgumentException("Wrong input size.", nameof(filter)); } if (output.GetLength(0) != this.filterCount || output.GetLength(1) != this.outputHeight || output.GetLength(2) != this.outputWidth) { throw new ArgumentException("Wrong input size.", nameof(output)); } this.UnfoldConvolutionInput(input); this.UnfoldConvolutionFilter(filter); MatrixHelper.Multiply(this.inputUnfolded, this.filterUnfolded, this.outputUnfolded); this.FoldConvolutionOutput(output); }
internal void AssertArray(ndarray arrayData, float[,,,] expectedData) { int lengthd0 = expectedData.GetLength(0); int lengthd1 = expectedData.GetLength(1); int lengthd2 = expectedData.GetLength(2); int lengthd3 = expectedData.GetLength(3); AssertShape(arrayData, lengthd0, lengthd1, lengthd2, lengthd3); AssertDataTypes(arrayData, expectedData); for (int i = 0; i < lengthd0; i++) { ndarray dim1Data = arrayData[i] as ndarray; for (int j = 0; j < lengthd1; j++) { ndarray dim2Data = dim1Data[j] as ndarray; for (int k = 0; k < lengthd2; k++) { ndarray dim3Data = dim2Data[k] as ndarray; for (int l = 0; l < lengthd3; l++) { float E1 = expectedData[i, j, k, l]; float A1 = (float)dim3Data[l]; if (float.IsNaN(E1) && float.IsNaN(A1)) { continue; } if (float.IsInfinity(E1) && float.IsInfinity(A1)) { continue; } Assert.AreEqual(E1, A1, 0.00000001); } } } } }
// Ищем лица по списку изображений (SSD) public List <int[][]> DetectFacesSDD(List <string> imagePaths) { List <int[][]> allFaces = new List <int[][]>() { }; int count = 0; // Ищем лица для каждого изображения foreach (var file in imagePaths) { List <int[]> faces = new List <int[]>(); int i = 0; using (Image <Bgr, byte> image = new Image <Bgr, byte>(file)) { int cols = image.Width; int rows = image.Height; Net net = DnnInvoke.ReadNetFromTensorflow(_modelFile, _configFile); net.SetInput(DnnInvoke.BlobFromImage(image.Mat, 1, new System.Drawing.Size(300, 300), default(MCvScalar), false, false)); Mat mat = net.Forward(); float[,,,] flt = (float[, , , ])mat.GetData(); for (int x = 0; x < flt.GetLength(2); x++) { if (flt[0, 0, x, 2] > 0.2) { int left = Convert.ToInt32(flt[0, 0, x, 3] * cols); int top = Convert.ToInt32(flt[0, 0, x, 4] * rows); int right = Convert.ToInt32(flt[0, 0, x, 5] * cols) - left; int bottom = Convert.ToInt32(flt[0, 0, x, 6] * rows) - top; int[] face = new[] { left, top, right, bottom }; faces.Add(face); i++; } } } allFaces.Add(faces.ToArray()); Console.WriteLine(count); count++; } return(allFaces); }
/** * We get a new keypoint along the `edgeId` for the pose instance, assuming * that the position of the `idSource` part is already known. For this, we * follow the displacement vector from the source to target part (stored in * the `i`-t channel of the displacement tensor). */ Keypoint TraverseToTargetKeypoint( int edgeId, Keypoint sourceKeypoint, int targetKeypointId, float[,,,] scores, float[,,,] offsets, int outputStride, float[,,,] displacements) { var height = scores.GetLength(1); var width = scores.GetLength(2); // Nearest neighbor interpolation for the source->target displacements. var sourceKeypointIndices = GetStridedIndexNearPoint( sourceKeypoint.position, outputStride, height, width); var displacement = GetDisplacement(edgeId, sourceKeypointIndices, displacements); var displacedPoint = AddVectors(sourceKeypoint.position, displacement); var displacedPointIndices = GetStridedIndexNearPoint(displacedPoint, outputStride, height, width); var offsetPoint = GetOffsetPoint( displacedPointIndices.y, displacedPointIndices.x, targetKeypointId, offsets); var score = scores[0, displacedPointIndices.y, displacedPointIndices.x, targetKeypointId]; var targetKeypoint = AddVectors( new Vector2( x: displacedPointIndices.x * outputStride, y: displacedPointIndices.y * outputStride) , new Vector2(x: offsetPoint.X, y: offsetPoint.Y)); return(new Keypoint(score, targetKeypoint, partNames[targetKeypointId])); }
/// <summary> /// Flips the matrix horizontally and vertically. /// </summary> /// <param name="input"> /// Input matrix. /// </param> /// <param name="output"> /// Output matrix. /// </param> /// <exception cref="ArgumentNullException"> /// Thrown when one of matrices is null. /// </exception> /// <exception cref="MatrixException"> /// Thrown when matrices dimensions do not support this operation. /// </exception> public static void Flip(float[,,,] input, float[,,,] output) { if (input == null) { throw new ArgumentNullException(nameof(input)); } if (output == null) { throw new ArgumentNullException(nameof(output)); } var count = input.GetLength(0); var depth = input.GetLength(1); var height = input.GetLength(2); var width = input.GetLength(3); if (output.GetLength(1) != count || output.GetLength(0) != depth || output.GetLength(2) != height || output.GetLength(3) != width) { throw new MatrixException("Matrices dimensions do not support this operation."); } for (var i = 0; i < count; ++i) { for (var j = 0; j < depth; ++j) { for (var k = 0; k < height; ++k) { for (var l = 0; l < width; ++l) { output[j, i, height - k - 1, width - l - 1] = input[i, j, k, l]; } } } } }
/// <summary> /// Receive an image from camera. /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void Camera_ImageGrabbed(object sender, EventArgs e) { camera.Retrieve(frame); //CvInvoke.Flip(frame, frame, Emgu.CV.CvEnum.FlipType.Horizontal); Mat blobs = DnnInvoke.BlobFromImage(frame, 1.0, new System.Drawing.Size(detectionSize, detectionSize)); net.SetInput(blobs); Mat detections = net.Forward(); float[,,,] detectionsArrayInFloats = detections.GetData() as float[, , , ]; for (int i = 0; i < detectionsArrayInFloats.GetLength(2); i++) { if (Convert.ToSingle(detectionsArrayInFloats[0, 0, i, 2], CultureInfo.InvariantCulture) > 0.4) { float Xstart = Convert.ToSingle(detectionsArrayInFloats[0, 0, i, 3], CultureInfo.InvariantCulture) * detectionSize * xRate; float Ystart = Convert.ToSingle(detectionsArrayInFloats[0, 0, i, 4], CultureInfo.InvariantCulture) * detectionSize * yRate; float Xend = Convert.ToSingle(detectionsArrayInFloats[0, 0, i, 5], CultureInfo.InvariantCulture) * detectionSize * xRate; float Yend = Convert.ToSingle(detectionsArrayInFloats[0, 0, i, 6], CultureInfo.InvariantCulture) * detectionSize * yRate; System.Drawing.Rectangle rect = new System.Drawing.Rectangle { X = (int)Xstart, Y = (int)Ystart, Height = (int)(Yend - Ystart), Width = (int)(Xend - Xstart) }; frame.Draw(rect, new Bgr(0, 255, 0), 2); } } Dispatcher.Invoke(new Action(() => { img.Source = frame.Bitmap.BitmapToBitmapSource(); })); }
public RecognitionResult[] Recognize(Tensor image) { HashSet <string> opNames = new HashSet <string>(); HashSet <string> couldBeInputs = new HashSet <string>(); HashSet <string> couldBeOutputs = new HashSet <string>(); foreach (Operation op in _graph) { String name = op.Name; opNames.Add(name); if (op.NumInputs == 0 && op.OpType.Equals("Placeholder")) { couldBeInputs.Add(op.Name); } foreach (Output output in op.Outputs) { int[] shape = _graph.GetTensorShape(output); if (output.NumConsumers == 0) { couldBeOutputs.Add(name); } } } Output input = _graph["image_tensor"]; Output[] outputs = new Output[] { _graph["detection_boxes"], _graph["detection_scores"], _graph["detection_classes"], _graph["num_detections"], _graph["detection_masks"] }; Tensor[] finalTensor = _session.Run(new Output[] { input }, new Tensor[] { image }, outputs); int numDetections = (int)(finalTensor[3].Data as float[])[0]; float[,,] detectinBoxes = finalTensor[0].JaggedData as float[, , ]; float[,] detectionScores = finalTensor[1].JaggedData as float[, ]; float[,] detectionClasses = finalTensor[2].JaggedData as float[, ]; float[,,,] detectionMask = finalTensor[4].JaggedData as float[, , , ]; List <RecognitionResult> results = new List <RecognitionResult>(); int numberOfClasses = detectionScores.GetLength(1); for (int i = 0; i < numDetections; i++) { RecognitionResult r = new RecognitionResult(); r.Class = (int)detectionClasses[0, i]; r.Label = Labels[r.Class - 1]; r.Probability = detectionScores[0, i]; r.Region = new float[] { detectinBoxes[0, i, 0], detectinBoxes[0, i, 1], detectinBoxes[0, i, 2], detectinBoxes[0, i, 3] }; results.Add(r); float[,] m = new float[detectionMask.GetLength(2), detectionMask.GetLength(3)]; for (int j = 0; j < m.GetLength(0); j++) { for (int k = 0; k < m.GetLength(1); k++) { m[j, k] = detectionMask[0, i, j, k]; } } r.Mask = m; } return(results.ToArray()); }
public DnnPage() : base() { var button = this.GetButton(); button.Text = "Perform Mask-rcnn Detection"; button.Clicked += OnButtonClicked; OnImagesLoaded += async(sender, image) => { if (image == null || image[0] == null) { return; } SetMessage("Please wait..."); SetImage(null); Task <Tuple <Mat, String, long> > t = new Task <Tuple <Mat, String, long> >( () => { InitDetector(); String msg = String.Empty; using (Mat blob = DnnInvoke.BlobFromImage(image[0])) using (VectorOfMat tensors = new VectorOfMat()) { _maskRcnnDetector.SetInput(blob, "image_tensor"); Stopwatch watch = Stopwatch.StartNew(); _maskRcnnDetector.Forward(tensors, new string[] { "detection_out_final", "detection_masks" }); watch.Stop(); msg = String.Format("Mask RCNN inception completed in {0} milliseconds.", watch.ElapsedMilliseconds); using (Mat boxes = tensors[0]) using (Mat masks = tensors[1]) { System.Drawing.Size imgSize = image[0].Size; float[,,,] boxesData = boxes.GetData(true) as float[, , , ]; int numDetections = boxesData.GetLength(2); for (int i = 0; i < numDetections; i++) { float score = boxesData[0, 0, i, 2]; if (score > 0.5) { int classId = (int)boxesData[0, 0, i, 1]; String label = _labels[classId]; MCvScalar color = _colors[classId]; float left = boxesData[0, 0, i, 3] * imgSize.Width; float top = boxesData[0, 0, i, 4] * imgSize.Height; float right = boxesData[0, 0, i, 5] * imgSize.Width; float bottom = boxesData[0, 0, i, 6] * imgSize.Height; RectangleF rectF = new RectangleF(left, top, right - left, bottom - top); Rectangle rect = Rectangle.Round(rectF); rect.Intersect(new Rectangle(Point.Empty, imgSize)); CvInvoke.Rectangle(image[0], rect, new MCvScalar(0, 0, 0, 0), 1); CvInvoke.PutText(image[0], label, rect.Location, FontFace.HersheyComplex, 1.0, new MCvScalar(0, 0, 255), 2); int[] masksDim = masks.SizeOfDimension; using (Mat mask = new Mat( masksDim[2], masksDim[3], DepthType.Cv32F, 1, masks.GetDataPointer(i, classId), masksDim[3] * masks.ElementSize)) using (Mat maskLarge = new Mat()) using (Mat maskLargeInv = new Mat()) using (Mat subRegion = new Mat(image[0], rect)) using (Mat largeColor = new Mat(subRegion.Size, Emgu.CV.CvEnum.DepthType.Cv8U, 3)) { CvInvoke.Resize(mask, maskLarge, rect.Size); //give the mask at least 30% transparency using (ScalarArray sa = new ScalarArray(0.7)) CvInvoke.Min(sa, maskLarge, maskLarge); //Create the inverse mask for the original image using (ScalarArray sa = new ScalarArray(1.0)) CvInvoke.Subtract(sa, maskLarge, maskLargeInv); //The mask color largeColor.SetTo(color); if (subRegion.NumberOfChannels == 4) { using (Mat bgrSubRegion = new Mat()) { CvInvoke.CvtColor(subRegion, bgrSubRegion, ColorConversion.Bgra2Bgr); CvInvoke.BlendLinear(largeColor, bgrSubRegion, maskLarge, maskLargeInv, bgrSubRegion); CvInvoke.CvtColor(bgrSubRegion, subRegion, ColorConversion.Bgr2Bgra); } } else { CvInvoke.BlendLinear(largeColor, subRegion, maskLarge, maskLargeInv, subRegion); } } } } } } long time = 0; return(new Tuple <Mat, String, long>(image[0], msg, time)); }); t.Start(); var result = await t; SetImage(t.Result.Item1); //String computeDevice = CvInvoke.UseOpenCL ? "OpenCL: " + Ocl.Device.Default.Name : "CPU"; SetMessage(t.Result.Item2); }; }
private void DetectAndRender(Mat image) { int imgDim = 300; MCvScalar meanVal = new MCvScalar(104, 177, 123); Size imageSize = image.Size; using (Mat inputBlob = DnnInvoke.BlobFromImage( image, 1.0, new Size(imgDim, imgDim), meanVal, false, false)) _faceDetector.SetInput(inputBlob, "data"); using (Mat detection = _faceDetector.Forward("detection_out")) { float confidenceThreshold = 0.5f; List <Rectangle> faceRegions = new List <Rectangle>(); float[,,,] values = detection.GetData(true) as float[, , , ]; for (int i = 0; i < values.GetLength(2); i++) { float confident = values[0, 0, i, 2]; if (confident > confidenceThreshold) { float xLeftBottom = values[0, 0, i, 3] * imageSize.Width; float yLeftBottom = values[0, 0, i, 4] * imageSize.Height; float xRightTop = values[0, 0, i, 5] * imageSize.Width; float yRightTop = values[0, 0, i, 6] * imageSize.Height; RectangleF objectRegion = new RectangleF( xLeftBottom, yLeftBottom, xRightTop - xLeftBottom, yRightTop - yLeftBottom); Rectangle faceRegion = Rectangle.Round(objectRegion); faceRegions.Add(faceRegion); } } using (VectorOfRect vr = new VectorOfRect(faceRegions.ToArray())) using (VectorOfVectorOfPointF landmarks = new VectorOfVectorOfPointF()) { _facemark.Fit(image, vr, landmarks); foreach (Rectangle face in faceRegions) { CvInvoke.Rectangle(image, face, new MCvScalar(0, 255, 0)); } int len = landmarks.Size; for (int i = 0; i < landmarks.Size; i++) { using (VectorOfPointF vpf = landmarks[i]) FaceInvoke.DrawFacemarks(image, vpf, new MCvScalar(255, 0, 0)); } } } }
/// <summary> /// Recognized the objects from the tensor. /// </summary> /// <param name="image">The image tensor</param> /// <returns>The recognition result.</returns> public RecognitionResult[][] Recognize(Tensor image) { // Use this from command line to find out the input tensor name: // saved_model_cli show --dir SavedModel --tag_set serve --signature_def serving_default // inputs['input_tensor'] tensor_info: // dtype: DT_UINT8 // shape: (1, -1, -1, 3) // name: serving_default_input_tensor: 0 Output input = _session.Graph["serving_default_input_tensor"]; Output[] outputs = new Output[] { //_session.Graph["detection_boxes"], new Output(_session.Graph["StatefulPartitionedCall"], 4), //_session.Graph["detection_scores"], new Output(_session.Graph["StatefulPartitionedCall"], 8), //_session.Graph["detection_classes"], new Output(_session.Graph["StatefulPartitionedCall"], 5), //_session.Graph["num_detections"], new Output(_session.Graph["StatefulPartitionedCall"], 12), //_session.Graph["detection_masks"] new Output(_session.Graph["StatefulPartitionedCall"], 6), }; Tensor[] finalTensor = _session.Run(new Output[] { input }, new Tensor[] { image }, outputs); float[,,] detectionBoxes = finalTensor[0].JaggedData as float[, , ]; float[,] detectionScores = finalTensor[1].JaggedData as float[, ]; float[,] detectionClasses = finalTensor[2].JaggedData as float[, ]; float[,,,] detectionMask = finalTensor[4].JaggedData as float[, , , ]; int imageCount = detectionScores.GetLength(0); RecognitionResult[][] allResults = new RecognitionResult[imageCount][]; for (int idx = 0; idx < imageCount; idx++) { //int numberOfClasses = detectionScores.GetLength(1); List <RecognitionResult> results = new List <RecognitionResult>(); int numDetections = (int)(finalTensor[3].Data as float[])[0]; for (int i = 0; i < numDetections; i++) { RecognitionResult r = new RecognitionResult(); r.Class = (int)detectionClasses[0, i]; r.Label = Labels[r.Class - 1]; r.Probability = detectionScores[0, i]; r.Region = new float[] { detectionBoxes[0, i, 0], detectionBoxes[0, i, 1], detectionBoxes[0, i, 2], detectionBoxes[0, i, 3] }; results.Add(r); float[,] m = new float[detectionMask.GetLength(2), detectionMask.GetLength(3)]; for (int j = 0; j < m.GetLength(0); j++) { for (int k = 0; k < m.GetLength(1); k++) { m[j, k] = detectionMask[0, i, j, k]; } } r.Mask = m; } allResults[idx] = results.ToArray(); } return(allResults); }
/// <summary> /// Perform detection on the input image and return the results /// </summary> /// <param name="m">The input image</param> /// <param name="matchScoreThreshold">A threshold used to filter boxes by score.</param> /// <param name="nmsThreshold">A threshold used in non maximum suppression.</param> /// <returns>The detected objects</returns> public MaskedObject[] Detect(IInputArray m, float matchScoreThreshold = 0.5f, float nmsThreshold = 0.4f) { using (InputArray iaM = m.GetInputArray()) using (Mat blob = DnnInvoke.BlobFromImage(m)) using (VectorOfMat tensors = new VectorOfMat()) { _maskRcnnDetector.SetInput(blob, "image_tensor"); _maskRcnnDetector.Forward(tensors, new string[] { "detection_out_final", "detection_masks" }); using (Mat boxes = tensors[0]) using (Mat masks = tensors[1]) { System.Drawing.Size imgSize = iaM.GetSize(); float[,,,] boxesData = boxes.GetData(true) as float[, , , ]; int numDetections = boxesData.GetLength(2); List <int> classIds = new List <int>(); List <Rectangle> regions = new List <Rectangle>(); List <float> scores = new List <float>(); for (int i = 0; i < numDetections; i++) { int classId = (int)boxesData[0, 0, i, 1]; if (_objectsOfInterest == null || _objectsOfInterest.Contains(_labels[classId])) { float score = boxesData[0, 0, i, 2]; Rectangle rect = DetectedObject.GetRectangle( boxesData[0, 0, i, 3], boxesData[0, 0, i, 4], boxesData[0, 0, i, 5], boxesData[0, 0, i, 6], imgSize.Width, imgSize.Height); rect.Intersect(new Rectangle(Point.Empty, imgSize)); regions.Add(rect); scores.Add(score); classIds.Add(classId); } } int[] validIdx = DnnInvoke.NMSBoxes(regions.ToArray(), scores.ToArray(), matchScoreThreshold, nmsThreshold); List <MaskedObject> maskedObjects = new List <MaskedObject>(); for (int i = 0; i < validIdx.Length; i++) { int idx = validIdx[i]; int classId = classIds[idx]; Rectangle rect = regions[idx]; float score = scores[idx]; int[] masksDim = masks.SizeOfDimension; using (Mat mask = new Mat( masksDim[2], masksDim[3], DepthType.Cv32F, 1, masks.GetDataPointer(i, classId), masksDim[3] * masks.ElementSize)) { MaskedObject mo = new MaskedObject(classId, _labels[classId], score, rect, mask); maskedObjects.Add(mo); } } return(maskedObjects.ToArray()); } } }
public FaceLandmarkDetectionPage() : base() { var button = this.GetButton(); button.Text = "Perform Face Landmark Detection"; button.Clicked += OnButtonClicked; OnImagesLoaded += async(sender, image) => { if (image == null || image[0] == null) { return; } SetMessage("Please wait..."); SetImage(null); Task <Tuple <IInputArray, long> > t = new Task <Tuple <IInputArray, long> >( () => { InitFaceDetector(); InitFacemark(); int imgDim = 300; MCvScalar meanVal = new MCvScalar(104, 177, 123); Stopwatch watch = Stopwatch.StartNew(); Size imageSize = image[0].Size; using (Mat inputBlob = DnnInvoke.BlobFromImage( image[0], 1.0, new Size(imgDim, imgDim), meanVal, false, false)) _faceDetector.SetInput(inputBlob, "data"); using (Mat detection = _faceDetector.Forward("detection_out")) { float confidenceThreshold = 0.5f; List <Rectangle> faceRegions = new List <Rectangle>(); float[,,,] values = detection.GetData(true) as float[, , , ]; for (int i = 0; i < values.GetLength(2); i++) { float confident = values[0, 0, i, 2]; if (confident > confidenceThreshold) { float xLeftBottom = values[0, 0, i, 3] * imageSize.Width; float yLeftBottom = values[0, 0, i, 4] * imageSize.Height; float xRightTop = values[0, 0, i, 5] * imageSize.Width; float yRightTop = values[0, 0, i, 6] * imageSize.Height; RectangleF objectRegion = new RectangleF( xLeftBottom, yLeftBottom, xRightTop - xLeftBottom, yRightTop - yLeftBottom); Rectangle faceRegion = Rectangle.Round(objectRegion); faceRegions.Add(faceRegion); } } using (VectorOfRect vr = new VectorOfRect(faceRegions.ToArray())) using (VectorOfVectorOfPointF landmarks = new VectorOfVectorOfPointF()) { _facemark.Fit(image[0], vr, landmarks); foreach (Rectangle face in faceRegions) { CvInvoke.Rectangle(image[0], face, new MCvScalar(0, 255, 0)); } int len = landmarks.Size; for (int i = 0; i < landmarks.Size; i++) { using (VectorOfPointF vpf = landmarks[i]) FaceInvoke.DrawFacemarks(image[0], vpf, new MCvScalar(255, 0, 0)); } } watch.Stop(); return(new Tuple <IInputArray, long>(image[0], watch.ElapsedMilliseconds)); } }); t.Start(); var result = await t; SetImage(t.Result.Item1); String computeDevice = CvInvoke.UseOpenCL ? "OpenCL: " + Ocl.Device.Default.Name : "CPU"; SetMessage(String.Format("Detected in {0} milliseconds.", t.Result.Item2)); }; }
public DnnPage() : base() { var button = this.GetButton(); button.Text = "Perform Mask-rcnn Detection"; button.Clicked += OnButtonClicked; OnImagesLoaded += async(sender, image) => { if (image == null || image[0] == null) { return; } SetMessage("Please wait..."); SetImage(null); Task <Tuple <Mat, String, long> > t = new Task <Tuple <Mat, String, long> >( () => { String configFile = "mask_rcnn_inception_v2_coco_2018_01_28.pbtxt"; #if __ANDROID__ String path = System.IO.Path.Combine(Android.OS.Environment.ExternalStorageDirectory.AbsolutePath, Android.OS.Environment.DirectoryDownloads, "dnn_data"); FileInfo configFileInfo = AndroidFileAsset.WritePermanantFileAsset(Android.App.Application.Context, configFile, "dnn_data", AndroidFileAsset.OverwriteMethod.AlwaysOverwrite); configFile = configFileInfo.FullName; #else String path = "./dnn_data/"; #endif String graphFile = DnnDownloadFile(path, "frozen_inference_graph.pb"); String lookupFile = DnnDownloadFile(path, "coco-labels-paper.txt"); string[] labels = File.ReadAllLines(lookupFile); Emgu.CV.Dnn.Net net = Emgu.CV.Dnn.DnnInvoke.ReadNetFromTensorflow(graphFile, configFile); Mat blob = DnnInvoke.BlobFromImage(image[0]); net.SetInput(blob, "image_tensor"); using (VectorOfMat tensors = new VectorOfMat()) { net.Forward(tensors, new string[] { "detection_out_final", "detection_masks" }); using (Mat boxes = tensors[0]) using (Mat masks = tensors[1]) { System.Drawing.Size imgSize = image[0].Size; float[,,,] boxesData = boxes.GetData(true) as float[, , , ]; //float[,,,] masksData = masks.GetData(true) as float[,,,]; int numDetections = boxesData.GetLength(2); for (int i = 0; i < numDetections; i++) { float score = boxesData[0, 0, i, 2]; if (score > 0.5) { int classId = (int)boxesData[0, 0, i, 1]; String label = labels[classId]; float left = boxesData[0, 0, i, 3] * imgSize.Width; float top = boxesData[0, 0, i, 4] * imgSize.Height; float right = boxesData[0, 0, i, 5] * imgSize.Width; float bottom = boxesData[0, 0, i, 6] * imgSize.Height; RectangleF rectF = new RectangleF(left, top, right - left, bottom - top); Rectangle rect = Rectangle.Round(rectF); rect.Intersect(new Rectangle(Point.Empty, imgSize)); CvInvoke.Rectangle(image[0], rect, new MCvScalar(0, 0, 0, 0), 1); CvInvoke.PutText(image[0], label, rect.Location, FontFace.HersheyComplex, 1.0, new MCvScalar(0, 0, 255), 2); int[] masksDim = masks.SizeOfDimension; using (Mat mask = new Mat( masksDim[2], masksDim[3], DepthType.Cv32F, 1, //masks.DataPointer + //(i * masksDim[1] + classId ) //* masksDim[2] * masksDim[3] * masks.ElementSize, masks.GetDataPointer(i, classId), masksDim[3] * masks.ElementSize)) using (Mat maskLarge = new Mat()) using (Mat maskLargeInv = new Mat()) using (Mat subRegion = new Mat(image[0], rect)) using (Mat largeColor = new Mat(subRegion.Size, Emgu.CV.CvEnum.DepthType.Cv8U, 3)) { CvInvoke.Resize(mask, maskLarge, rect.Size); //give the mask at least 30% transparency using (ScalarArray sa = new ScalarArray(0.7)) CvInvoke.Min(sa, maskLarge, maskLarge); //Create the inverse mask for the original image using (ScalarArray sa = new ScalarArray(1.0)) CvInvoke.Subtract(sa, maskLarge, maskLargeInv); //The mask color largeColor.SetTo(new Emgu.CV.Structure.MCvScalar(255, 0, 0)); if (subRegion.NumberOfChannels == 4) { using (Mat bgrSubRegion = new Mat()) { CvInvoke.CvtColor(subRegion, bgrSubRegion, ColorConversion.Bgra2Bgr); CvInvoke.BlendLinear(largeColor, bgrSubRegion, maskLarge, maskLargeInv, bgrSubRegion); CvInvoke.CvtColor(bgrSubRegion, subRegion, ColorConversion.Bgr2Bgra); } } else { CvInvoke.BlendLinear(largeColor, subRegion, maskLarge, maskLargeInv, subRegion); } } } } } } long time = 0; return(new Tuple <Mat, String, long>(image[0], null, time)); }); t.Start(); var result = await t; SetImage(t.Result.Item1); String computeDevice = CvInvoke.UseOpenCL ? "OpenCL: " + Ocl.Device.Default.Name : "CPU"; SetMessage(t.Result.Item2); }; }
public void Convolve(float[,,] input, float[,,] filter, float[,,,] output) { if (input.GetLength(0) != this.inputDepth || input.GetLength(1) != this.inputHeight || input.GetLength(2) != this.inputWidth) { throw new ArgumentException("Wrong input size.", nameof(input)); } if (filter.GetLength(0) != this.filterCount || filter.GetLength(1) != this.kernelHeight || filter.GetLength(2) != this.kernelWidth) { throw new ArgumentException("Wrong input size.", nameof(filter)); } if (output.GetLength(0) != this.filterCount || output.GetLength(1) != this.inputDepth || output.GetLength(2) != this.outputHeight || output.GetLength(3) != this.outputWidth) { throw new ArgumentException("Wrong input size.", nameof(output)); } for (var i = 0; i < this.inputDepth; ++i) { for (var j = 0; j < this.inputHeight; ++j) { for (var k = 0; k < this.inputWidth; ++k) { this.inputJagged[i][j, k] = input[i, j, k]; } } } for (var i = 0; i < this.filterCount; ++i) { for (var j = 0; j < this.kernelHeight; ++j) { for (var k = 0; k < this.kernelWidth; ++k) { this.filterJagged[i][j, k] = filter[i, j, k]; } } } for (var i = 0; i < this.filterCount; ++i) { for (var j = 0; j < this.inputDepth; ++j) { this.convolution.Convolve( this.inputJagged[j], this.filterJagged[i], this.outputJagged[i, j]); } } for (var i = 0; i < this.filterCount; ++i) { for (var j = 0; j < this.inputDepth; ++j) { for (var k = 0; k < this.outputHeight; ++k) { for (var l = 0; l < this.outputWidth; ++l) { output[i, j, k, l] = this.outputJagged[i, j][k, l]; } } } } }
/// <summary> /// Detect vehicle from the given image /// </summary> /// <param name="image">The image</param> /// <returns>The detected vehicles.</returns> public Vehicle[] Detect(Mat image) { int imgDim = 300; int vehicleAttrSize = 72; MCvScalar meanVal = new MCvScalar(); double scale = 1.0; float vehicleConfidenceThreshold = 0.5f; float licensePlateConfidenceThreshold = 0.5f; //MCvScalar meanVal = new MCvScalar(127.5, 127.5, 127.5); //double scale = 127.5; Size imageSize = image.Size; using (Mat inputBlob = DnnInvoke.BlobFromImage( image, scale, new Size(imgDim, imgDim), meanVal, false, false, DepthType.Cv32F)) _vehicleLicensePlateDetector.SetInput(inputBlob, "Placeholder"); List <Vehicle> vehicles = new List <Vehicle>(); List <LicensePlate> plates = new List <LicensePlate>(); using (Mat detection = _vehicleLicensePlateDetector.Forward("DetectionOutput_")) { float[,,,] values = detection.GetData(true) as float[, , , ]; for (int i = 0; i < values.GetLength(2); i++) { float imageId = values[0, 0, i, 0]; float label = values[0, 0, i, 1]; //if label == 1, it is a vehicle; if label == 2, it is a license plate float confident = values[0, 0, i, 2]; float xLeftBottom = values[0, 0, i, 3] * imageSize.Width; float yLeftBottom = values[0, 0, i, 4] * imageSize.Height; float xRightTop = values[0, 0, i, 5] * imageSize.Width; float yRightTop = values[0, 0, i, 6] * imageSize.Height; RectangleF objectRegion = new RectangleF( xLeftBottom, yLeftBottom, xRightTop - xLeftBottom, yRightTop - yLeftBottom); Rectangle region = Rectangle.Round(objectRegion); if (label == 1 && confident > vehicleConfidenceThreshold) { //this is a vehicle Vehicle v = new Vehicle(); v.Region = region; #region find out the type and color of the vehicle using (Mat vehicle = new Mat(image, region)) { using (Mat vehicleBlob = DnnInvoke.BlobFromImage( vehicle, scale, new Size(vehicleAttrSize, vehicleAttrSize), meanVal, false, false, DepthType.Cv32F)) { _vehicleAttrRecognizer.SetInput(vehicleBlob, "input"); using (VectorOfMat vm = new VectorOfMat(2)) { _vehicleAttrRecognizer.Forward(vm, new string[] { "color", "type" }); using (Mat vehicleColorMat = vm[0]) using (Mat vehicleTypeMat = vm[1]) { float[] vehicleColorData = vehicleColorMat.GetData(false) as float[]; float maxProbColor = vehicleColorData.Max(); int maxIdxColor = Array.IndexOf(vehicleColorData, maxProbColor); v.Color = _colorName[maxIdxColor]; float[] vehicleTypeData = vehicleTypeMat.GetData(false) as float[]; float maxProbType = vehicleTypeData.Max(); int maxIdxType = Array.IndexOf(vehicleTypeData, maxProbType); v.Type = _vehicleType[maxIdxType]; } } } } #endregion vehicles.Add(v); } if (label == 2 && confident > licensePlateConfidenceThreshold) { //this is a license plate LicensePlate p = new LicensePlate(); p.Region = region; #region OCR on license plate using (Mat plate = new Mat(image, region)) { using (Mat inputBlob = DnnInvoke.BlobFromImage( plate, scale, new Size(94, 24), meanVal, false, false, DepthType.Cv32F)) using (Mat seqInd = new Mat( new Size(1, 88), DepthType.Cv32F, 1)) { _ocr.SetInput(inputBlob, "data"); if (seqInd.Depth == DepthType.Cv32F) { float[] seqIndValues = new float[seqInd.Width * seqInd.Height]; for (int j = 1; j < seqIndValues.Length; j++) { seqIndValues[j] = 1.0f; } seqIndValues[0] = 0.0f; seqInd.SetTo(seqIndValues); } _ocr.SetInput(seqInd, "seq_ind"); using (Mat output = _ocr.Forward("decode")) { float[] plateValue = output.GetData(false) as float[]; StringBuilder licensePlateStringBuilder = new StringBuilder(); foreach (int j in plateValue) { if (j >= 0) { licensePlateStringBuilder.Append(_plateText[j]); } } p.Text = licensePlateStringBuilder.ToString(); } } } #endregion plates.Add(p); } } foreach (LicensePlate p in plates) { foreach (Vehicle v in vehicles) { if (v.ContainsPlate(p)) { v.LicensePlate = p; break; } } } } return(vehicles.ToArray()); }