private static void GenerateProposals(Mat anchors, int featStride, Mat scoreBlob, Mat bboxBlob, Mat landmarkBlob, float probThreshold, IList <FaceObject> faceObjects) { var w = scoreBlob.W; var h = scoreBlob.H; // generate face proposal from bbox deltas and shifted anchors var numAnchors = anchors.H; for (var q = 0; q < numAnchors; q++) { var anchor = anchors.Row(q); using var score = scoreBlob.Channel(q + numAnchors); using var bbox = bboxBlob.ChannelRange(q * 4, 4); using var landmark = landmarkBlob.ChannelRange(q * 10, 10); // shifted anchor var anchorY = anchor[1]; var anchorW = anchor[2] - anchor[0]; var anchorH = anchor[3] - anchor[1]; for (var i = 0; i < h; i++) { var anchorX = anchor[0]; for (var j = 0; j < w; j++) { var index = i * w + j; var prob = score[index]; if (prob >= probThreshold) { // apply center size using var mat0 = bbox.Channel(0); using var mat1 = bbox.Channel(1); using var mat2 = bbox.Channel(2); using var mat3 = bbox.Channel(3); var dx = mat0[index]; var dy = mat1[index]; var dw = mat2[index]; var dh = mat3[index]; var cx = anchorX + anchorW * 0.5f; var cy = anchorY + anchorH * 0.5f; var pbCx = cx + anchorW * dx; var pbCy = cy + anchorH * dy; var pbW = anchorW * (float)Math.Exp(dw); var pbH = anchorH * (float)Math.Exp(dh); var x0 = pbCx - pbW * 0.5f; var y0 = pbCy - pbH * 0.5f; var x1 = pbCx + pbW * 0.5f; var y1 = pbCy + pbH * 0.5f; var obj = new FaceObject(); obj.Rect.X = x0; obj.Rect.Y = y0; obj.Rect.Width = x1 - x0 + 1; obj.Rect.Height = y1 - y0 + 1; using var landmarkMat0 = landmark.Channel(0); using var landmarkMat1 = landmark.Channel(1); using var landmarkMat2 = landmark.Channel(2); using var landmarkMat3 = landmark.Channel(3); using var landmarkMat4 = landmark.Channel(4); using var landmarkMat5 = landmark.Channel(5); using var landmarkMat6 = landmark.Channel(6); using var landmarkMat7 = landmark.Channel(7); using var landmarkMat8 = landmark.Channel(8); using var landmarkMat9 = landmark.Channel(9); obj.Landmark[0].X = cx + (anchorW + 1) * landmarkMat0[index]; obj.Landmark[0].Y = cy + (anchorH + 1) * landmarkMat1[index]; obj.Landmark[1].X = cx + (anchorW + 1) * landmarkMat2[index]; obj.Landmark[1].Y = cy + (anchorH + 1) * landmarkMat3[index]; obj.Landmark[2].X = cx + (anchorW + 1) * landmarkMat4[index]; obj.Landmark[2].Y = cy + (anchorH + 1) * landmarkMat5[index]; obj.Landmark[3].X = cx + (anchorW + 1) * landmarkMat6[index]; obj.Landmark[3].Y = cy + (anchorH + 1) * landmarkMat7[index]; obj.Landmark[4].X = cx + (anchorW + 1) * landmarkMat8[index]; obj.Landmark[4].Y = cy + (anchorH + 1) * landmarkMat9[index]; obj.Prob = prob; faceObjects.Add(obj); } anchorX += featStride; } anchorY += featStride; } } }
private static int DetectPoseNet(NcnnDotNet.OpenCV.Mat bgr, List <KeyPoint> keyPoints) { using (var poseNet = new Net()) { if (Ncnn.IsSupportVulkan) { poseNet.Opt.UseVulkanCompute = true; } // the simple baseline human pose estimation from gluon-cv // https://gluon-cv.mxnet.io/build/examples_pose/demo_simple_pose.html // mxnet model exported via // pose_net.hybridize() // pose_net.export('pose') // then mxnet2ncnn // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models poseNet.LoadParam("pose.param"); poseNet.LoadModel("pose.bin"); var w = bgr.Cols; var h = bgr.Rows; using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr2Rgb, bgr.Cols, bgr.Rows, 192, 256); // transforms.ToTensor(), // transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), // R' = (R / 255 - 0.485) / 0.229 = (R - 0.485 * 255) / 0.229 / 255 // G' = (G / 255 - 0.456) / 0.224 = (G - 0.456 * 255) / 0.224 / 255 // B' = (B / 255 - 0.406) / 0.225 = (B - 0.406 * 255) / 0.225 / 255 var meanVals = new[] { 0.485f * 255.0f, 0.456f * 255.0f, 0.406f * 255.0f }; var normVals = new[] { 1 / 0.229f / 255.0f, 1 / 0.224f / 255.0f, 1 / 0.225f / 255.0f }; @in.SubstractMeanNormalize(meanVals, normVals); using var ex = poseNet.CreateExtractor(); ex.Input("data", @in); using var @out = new Mat(); ex.Extract("conv3_fwd", @out); // resolve point from heatmap keyPoints.Clear(); for (var p = 0; p < @out.H; p++) { using var m = @out.Channel(p); var maxProb = 0f; var maxX = 0; var maxY = 0; for (var y = 0; y < @out.H; y++) { var ptr = m.Row(y); for (var x = 0; x < @out.W; x++) { var prob = ptr[x]; if (prob > maxProb) { maxProb = prob; maxX = x; maxY = y; } } } var keyPoint = new KeyPoint { P = new Point <float>(maxX * w / (float)@out.W, maxY * h / (float)@out.H), Prob = maxProb }; keyPoints.Add(keyPoint); } } return(0); }
private static int DetectFasterRCNN(NcnnDotNet.OpenCV.Mat bgr, List <Object> objects) { using (var fasterRcnn = new Net()) { if (Ncnn.IsSupportVulkan) { fasterRcnn.Opt.UseVulkanCompute = true; } // original pretrained model from https://github.com/rbgirshick/py-faster-rcnn // py-faster-rcnn/models/pascal_voc/ZF/faster_rcnn_alt_opt/faster_rcnn_test.pt // https://dl.dropboxusercontent.com/s/o6ii098bu51d139/faster_rcnn_models.tgz?dl=0 // ZF_faster_rcnn_final.caffemodel // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models fasterRcnn.LoadParam("ZF_faster_rcnn_final.param"); fasterRcnn.LoadModel("ZF_faster_rcnn_final.bin"); // hyper parameters taken from // py-faster-rcnn/lib/fast_rcnn/config.py // py-faster-rcnn/lib/fast_rcnn/test.py const int targetSize = 600; // __C.TEST.SCALES const int maxPerImage = 100; const float confidenceThresh = 0.05f; const float nmsThreshold = 0.3f;// __C.TEST.NMS // scale to target detect size var w = bgr.Cols; var h = bgr.Rows; float scale; if (w < h) { scale = (float)targetSize / w; w = targetSize; h = (int)(h * scale); } else { scale = (float)targetSize / h; h = targetSize; w = (int)(w * scale); } using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr, bgr.Cols, bgr.Rows, w, h); var meanVals = new[] { 102.9801f, 115.9465f, 122.7717f }; @in.SubstractMeanNormalize(meanVals, null); using var im_info = new Mat(3); im_info[0] = h; im_info[1] = w; im_info[2] = scale; // step1, extract feature and all rois using var ex1 = fasterRcnn.CreateExtractor(); ex1.Input("data", @in); ex1.Input("im_info", im_info); using var conv5Relu5 = new Mat(); // feature using var rois = new Mat(); // all rois ex1.Extract("conv5_relu5", conv5Relu5); ex1.Extract("rois", rois); // step2, extract bbox and score for each roi var classCandidates = new List <List <Object> >(); for (var i = 0; i < rois.C; i++) { using var ex2 = fasterRcnn.CreateExtractor(); using var roi = rois.Channel(i); // get single roi ex2.Input("conv5_relu5", conv5Relu5); ex2.Input("rois", roi); using var bboxPred = new Mat(); using var clsProb = new Mat(); ex2.Extract("bbox_pred", bboxPred); ex2.Extract("cls_prob", clsProb); var numClass = clsProb.W; // There is no equivalent to std::vector::resize in C# Resize(classCandidates, numClass); // find class id with highest score var label = 0; var score = 0.0f; for (var j = 0; j < numClass; j++) { var classScore = clsProb[j]; if (classScore > score) { label = j; score = classScore; } } // ignore background or low score if (label == 0 || score <= confidenceThresh) { continue; } // fprintf(stderr, "%d = %f\n", label, score); // unscale to image size var x1 = roi[0] / scale; var y1 = roi[1] / scale; var x2 = roi[2] / scale; var y2 = roi[3] / scale; var pbW = x2 - x1 + 1; var pbH = y2 - y1 + 1; // apply bbox regression var dx = bboxPred[label * 4]; var dy = bboxPred[label * 4 + 1]; var dw = bboxPred[label * 4 + 2]; var dh = bboxPred[label * 4 + 3]; var cx = x1 + pbW * 0.5f; var cy = y1 + pbH * 0.5f; var objCx = cx + pbW * dx; var objCy = cy + pbH * dy; var objW = pbW * Math.Exp(dw); var objH = pbH * Math.Exp(dh); var objX1 = (float)(objCx - objW * 0.5f); var objY1 = (float)(objCy - objH * 0.5f); var objX2 = (float)(objCx + objW * 0.5f); var objY2 = (float)(objCy + objH * 0.5f); // clip objX1 = Math.Max(Math.Min(objX1, bgr.Cols - 1), 0.0f); objY1 = Math.Max(Math.Min(objY1, bgr.Rows - 1), 0.0f); objX2 = Math.Max(Math.Min(objX2, bgr.Cols - 1), 0.0f); objY2 = Math.Max(Math.Min(objY2, bgr.Rows - 1), 0.0f); // append object var obj = new Object { Rect = new Rect <float>(objX1, objY1, objX2 - objX1 + 1, objY2 - objY1 + 1), Label = label, Prob = score }; classCandidates[label].Add(obj); } // post process objects.Clear(); for (var i = 0; i < (int)classCandidates.Count; i++) { var candidates = classCandidates[i]; QsortDescentInplace(candidates); var picked = new List <int>(); NmsSortedBBoxes(candidates, picked, nmsThreshold); for (var j = 0; j < picked.Count; j++) { var z = picked[j]; objects.Add(candidates[z]); } } QsortDescentInplace(objects); if (maxPerImage > 0 && maxPerImage < objects.Count) { Resize(objects, maxPerImage); } } return(0); }
private static int DetectRFCN(NcnnDotNet.OpenCV.Mat bgr, List <Object> objects) { using (var rfcn = new Net()) { if (Ncnn.IsSupportVulkan) { rfcn.Opt.UseVulkanCompute = true; } // original pretrained model from https://github.com/YuwenXiong/py-R-FCN // https://github.com/YuwenXiong/py-R-FCN/blob/master/models/pascal_voc/ResNet-50/rfcn_end2end/test_agnostic.prototxt // https://1drv.ms/u/s!AoN7vygOjLIQqUWHpY67oaC7mopf // resnet50_rfcn_final.caffemodel rfcn.LoadParam("rfcn_end2end.param"); rfcn.LoadModel("rfcn_end2end.bin"); const int targetSize = 224; const int maxPerImage = 100; const float confidenceThresh = 0.6f; // CONF_THRESH const float nmsThreshold = 0.3f; // NMS_THRESH // scale to target detect size var w = bgr.Cols; var h = bgr.Rows; float scale; if (w < h) { scale = (float)targetSize / w; w = targetSize; h = (int)(h * scale); } else { scale = (float)targetSize / h; h = targetSize; w = (int)(w * scale); } using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr, bgr.Cols, bgr.Rows, w, h); var meanVals = new[] { 102.9801f, 115.9465f, 122.7717f }; @in.SubstractMeanNormalize(meanVals, null); using var im_info = new Mat(3); im_info[0] = h; im_info[1] = w; im_info[2] = scale; // step1, extract feature and all rois using var ex1 = rfcn.CreateExtractor(); ex1.Input("data", @in); ex1.Input("im_info", im_info); using var rfcnCls = new Mat(); using var rfcnBBox = new Mat(); using var rois = new Mat();// all rois ex1.Extract("rfcn_cls", rfcnCls); ex1.Extract("rfcn_bbox", rfcnBBox); ex1.Extract("rois", rois); // step2, extract bbox and score for each roi var classCandidates = new List <List <Object> >(); for (var i = 0; i < rois.C; i++) { using var ex2 = rfcn.CreateExtractor(); using var roi = rois.Channel(i); // get single roi ex2.Input("rfcn_cls", rfcnCls); ex2.Input("rfcn_bbox", rfcnBBox); ex2.Input("rois", roi); using var bboxPred = new Mat(); using var clsProb = new Mat(); ex2.Extract("bbox_pred", bboxPred); ex2.Extract("cls_prob", clsProb); var numClass = clsProb.W; // There is no equivalent to std::vector::resize in C# Resize(classCandidates, numClass); // find class id with highest score var label = 0; var score = 0.0f; for (var j = 0; j < numClass; j++) { var classScore = clsProb[j]; if (classScore > score) { label = j; score = classScore; } } // ignore background or low score if (label == 0 || score <= confidenceThresh) { continue; } // fprintf(stderr, "%d = %f\n", label, score); // unscale to image size var x1 = roi[0] / scale; var y1 = roi[1] / scale; var x2 = roi[2] / scale; var y2 = roi[3] / scale; var pbW = x2 - x1 + 1; var pbH = y2 - y1 + 1; // apply bbox regression var dx = bboxPred[4]; var dy = bboxPred[4 + 1]; var dw = bboxPred[4 + 2]; var dh = bboxPred[4 + 3]; var cx = x1 + pbW * 0.5f; var cy = y1 + pbH * 0.5f; var objCx = cx + pbW * dx; var objCy = cy + pbH * dy; var objW = pbW * Math.Exp(dw); var objH = pbH * Math.Exp(dh); var objX1 = (float)(objCx - objW * 0.5f); var objY1 = (float)(objCy - objH * 0.5f); var objX2 = (float)(objCx + objW * 0.5f); var objY2 = (float)(objCy + objH * 0.5f); // clip objX1 = Math.Max(Math.Min(objX1, bgr.Cols - 1), 0.0f); objY1 = Math.Max(Math.Min(objY1, bgr.Rows - 1), 0.0f); objX2 = Math.Max(Math.Min(objX2, bgr.Cols - 1), 0.0f); objY2 = Math.Max(Math.Min(objY2, bgr.Rows - 1), 0.0f); // append object var obj = new Object { Rect = new Rect <float>(objX1, objY1, objX2 - objX1 + 1, objY2 - objY1 + 1), Label = label, Prob = score }; classCandidates[label].Add(obj); } // post process objects.Clear(); for (var i = 0; i < (int)classCandidates.Count; i++) { var candidates = classCandidates[i]; QsortDescentInplace(candidates); var picked = new List <int>(); NmsSortedBBoxes(candidates, picked, nmsThreshold); for (var j = 0; j < picked.Count; j++) { var z = picked[j]; objects.Add(candidates[z]); } } QsortDescentInplace(objects); if (maxPerImage > 0 && maxPerImage < objects.Count) { Resize(objects, maxPerImage); } } return(0); }