public List <Face> DetectFaces(string imagePath) { List <Face> faces = new List <Face>() { }; float tolerance = 0.5f; using (NcnnDotNet.OpenCV.Mat frame = NcnnDotNet.OpenCV.Cv2.ImRead(imagePath)) { using (NcnnDotNet.Mat inMat = NcnnDotNet.Mat.FromPixels(frame.Data, NcnnDotNet.PixelType.Bgr2Rgb, frame.Cols, frame.Rows)) { FaceInfo[] faceInfos = _centerFaceDetector.Detect(inMat, frame.Cols, frame.Rows, tolerance).ToArray(); foreach (FaceInfo detectedFace in faceInfos) { faces.Add(new Face((int)detectedFace.X1, (int)detectedFace.Y1, (int)detectedFace.X2 - (int)detectedFace.X1, (int)detectedFace.Y2 - (int)detectedFace.Y1, DrawPen)); } } } return(faces); }
private static int DetectSqueezeNet(NcnnDotNet.OpenCV.Mat bgr, List <float> clsScores) { using (var squeezeNet = new Net()) { if (Ncnn.IsSupportVulkan) { squeezeNet.Opt.UseVulkanCompute = true; } // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models squeezeNet.LoadParam("squeezenet_v1.1.param"); squeezeNet.LoadModel("squeezenet_v1.1.bin"); using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr, bgr.Cols, bgr.Rows, 227, 227); var meanVals = new[] { 104f, 117f, 123f }; @in.SubstractMeanNormalize(meanVals, null); using var ex = squeezeNet.CreateExtractor(); ex.Input("data", @in); using var @out = new Mat(); ex.Extract("prob", @out); clsScores.Capacity = @out.W; for (var j = 0; j < @out.W; j++) { clsScores.Add(@out[j]); } } return(0); }
private static int DetectYoloV3(NcnnDotNet.OpenCV.Mat bgr, List <Object> objects) { using (var yolov3 = new Net()) { if (Ncnn.IsSupportVulkan) { yolov3.Opt.UseVulkanCompute = true; } // original pretrained model from https://github.com/eric612/MobileNet-YOLO // param : https://drive.google.com/open?id=1V9oKHP6G6XvXZqhZbzNKL6FI_clRWdC- // bin : https://drive.google.com/open?id=1DBcuFCr-856z3FRQznWL_S5h-Aj3RawA // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models yolov3.LoadParam("mobilenetv2_yolov3.param"); yolov3.LoadModel("mobilenetv2_yolov3.bin"); const int targetSize = 352; var imgW = bgr.Cols; var imgH = bgr.Rows; using (var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr, bgr.Cols, bgr.Rows, targetSize, targetSize)) { var meanVals = new[] { 127.5f, 127.5f, 127.5f }; var normVals = new[] { 0.007843f, 0.007843f, 0.007843f }; @in.SubstractMeanNormalize(meanVals, normVals); using (var ex = yolov3.CreateExtractor()) { ex.SetNumThreads(4); ex.Input("data", @in); using (var @out = new Mat()) { ex.Extract("detection_out", @out); // printf("%d %d %d\n", out.w, out.h, out.c); objects.Clear(); for (var i = 0; i < @out.H; i++) { var values = @out.Row(i); var @object = new Object(); @object.Label = (int)values[0]; @object.Prob = values[1]; @object.Rect.X = values[2] * imgW; @object.Rect.Y = values[3] * imgH; @object.Rect.Width = values[4] * imgW - @object.Rect.X; @object.Rect.Height = values[5] * imgH - @object.Rect.Y; objects.Add(@object); } } } } } return(0); }
private static int DetectMobileNetV3(NcnnDotNet.OpenCV.Mat bgr, List <Object> objects) { using (var mobilenetV3 = new Net()) { if (Ncnn.IsSupportVulkan) { mobilenetV3.Opt.UseVulkanCompute = true; } // converted ncnn model from https://github.com/ujsyehao/mobilenetv3-ssd mobilenetV3.LoadParam("mobilenetv3_ssdlite_voc.param"); mobilenetV3.LoadModel("mobilenetv3_ssdlite_voc.bin"); const int targetSize = 300; var imgW = bgr.Cols; var imgH = bgr.Rows; using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr2Rgb, bgr.Cols, bgr.Rows, targetSize, targetSize); var meanVals = new[] { 123.675f, 116.28f, 103.53f }; var normVals = new[] { 1.0f, 1.0f, 1.0f }; @in.SubstractMeanNormalize(meanVals, normVals); using var ex = mobilenetV3.CreateExtractor(); ex.SetLiteMode(true); ex.SetNumThreads(4); ex.Input("input", @in); using var @out = new Mat(); ex.Extract("detection_out", @out); // printf("%d %d %d\n", out.w, out.h, out.c); objects.Clear(); for (var i = 0; i < @out.H; i++) { var values = @out.Row(i); var @object = new Object(); @object.Label = (int)values[0]; @object.Prob = values[1]; // filter out cross-boundary var x1 = Clamp(values[2] * targetSize, 0.0f, targetSize - 1) / targetSize * imgW; var y1 = Clamp(values[3] * targetSize, 0.0f, targetSize - 1) / targetSize * imgH; var x2 = Clamp(values[4] * targetSize, 0.0f, targetSize - 1) / targetSize * imgW; var y2 = Clamp(values[5] * targetSize, 0.0f, targetSize - 1) / targetSize * imgH; @object.Rect.X = x1; @object.Rect.Y = y1; @object.Rect.Width = x2 - x1; @object.Rect.Height = y2 - y1; objects.Add(@object); } } return(0); }
private static int DetectYoloV2(NcnnDotNet.OpenCV.Mat bgr, List <Object> objects) { using (var yolov2 = new Net()) { if (Ncnn.IsSupportVulkan) { yolov2.Opt.UseVulkanCompute = true; } // original pretrained model from https://github.com/eric612/MobileNet-YOLO // https://github.com/eric612/MobileNet-YOLO/blob/master/models/yolov2/mobilenet_yolo_deploy.prototxt // https://github.com/eric612/MobileNet-YOLO/blob/master/models/yolov2/mobilenet_yolo_deploy_iter_80000.caffemodel // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models yolov2.LoadParam("mobilenet_yolo.param"); yolov2.LoadModel("mobilenet_yolo.bin"); const int targetSize = 416; var imgW = bgr.Cols; var imgH = bgr.Rows; using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr, bgr.Cols, bgr.Rows, targetSize, targetSize); // the Caffe-YOLOv2-Windows style // X' = X * scale - mean var meanVals = new[] { 1.0f, 1.0f, 1.0f }; var normVals = new[] { 0.007843f, 0.007843f, 0.007843f }; @in.SubstractMeanNormalize(null, normVals); @in.SubstractMeanNormalize(meanVals, null); using var ex = yolov2.CreateExtractor(); ex.SetNumThreads(4); ex.Input("data", @in); using var @out = new Mat(); ex.Extract("detection_out", @out); // printf("%d %d %d\n", out.w, out.h, out.c); objects.Clear(); for (var i = 0; i < @out.H; i++) { var values = @out.Row(i); var @object = new Object(); @object.Label = (int)values[0]; @object.Prob = values[1]; @object.Rect.X = values[2] * imgW; @object.Rect.Y = values[3] * imgH; @object.Rect.Width = values[4] * imgW - @object.Rect.X; @object.Rect.Height = values[5] * imgH - @object.Rect.Y; objects.Add(@object); } } return(0); }
private static int DetectPeleeNet(NcnnDotNet.OpenCV.Mat bgr, List <Object> objects, NcnnDotNet.Mat resized) { using (var peleenet = new Net()) { if (Ncnn.IsSupportVulkan) { peleenet.Opt.UseVulkanCompute = true; } // model is converted from https://github.com/eric612/MobileNet-YOLO // and can be downloaded from https://drive.google.com/open?id=1Wt6jKv13sBRMHgrGAJYlOlRF-o80pC0g // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models peleenet.LoadParam("pelee.param"); peleenet.LoadModel("pelee.bin"); const int targetSize = 300; var imgW = bgr.Cols; var imgH = bgr.Rows; using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr, bgr.Cols, bgr.Rows, targetSize, targetSize); var meanVals = new[] { 103.9f, 116.7f, 123.6f }; var normVals = new[] { 0.017f, 0.017f, 0.017f }; @in.SubstractMeanNormalize(meanVals, normVals); using var ex = peleenet.CreateExtractor(); //ex.SetNumThreads(4); ex.Input("data", @in); using var @out = new Mat(); ex.Extract("detection_out", @out); // printf("%d %d %d\n", out.w, out.h, out.c); objects.Clear(); for (var i = 0; i < @out.H; i++) { var values = @out.Row(i); var @object = new Object(); @object.Label = (int)values[0]; @object.Prob = values[1]; @object.Rect.X = values[2] * imgW; @object.Rect.Y = values[3] * imgH; @object.Rect.Width = values[4] * imgW - @object.Rect.X; @object.Rect.Height = values[5] * imgH - @object.Rect.Y; objects.Add(@object); } using var segOut = new Mat(); ex.Extract("sigmoid", segOut); Ncnn.ResizeBilinear(segOut, resized, imgW, imgH); } return(0); }
private static int DetectMobileNetV2(NcnnDotNet.OpenCV.Mat bgr, List <Object> objects) { using (var mobilenetV2 = new Net()) { if (Ncnn.IsSupportVulkan) { mobilenetV2.Opt.UseVulkanCompute = true; } // ToDo: Support Custom Layer from C# //mobilenetV2.register_custom_layer("Silence", Noop_layer_creator); // original pretrained model from https://github.com/chuanqi305/MobileNetv2-SSDLite // https://github.com/chuanqi305/MobileNetv2-SSDLite/blob/master/ssdlite/voc/deploy.prototxt // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models mobilenetV2.LoadParam("mobilenetv2_ssdlite_voc.param"); mobilenetV2.LoadModel("mobilenetv2_ssdlite_voc.bin"); const int targetSize = 300; var imgW = bgr.Cols; var imgH = bgr.Rows; using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr, bgr.Cols, bgr.Rows, targetSize, targetSize); var meanVals = new[] { 127.5f, 127.5f, 127.5f }; var normVals = new[] { (float)(1.0 / 127.5), (float)(1.0 / 127.5), (float)(1.0 / 127.5) }; @in.SubstractMeanNormalize(meanVals, normVals); using var ex = mobilenetV2.CreateExtractor(); ex.SetLiteMode(true); ex.SetNumThreads(4); ex.Input("data", @in); using var @out = new Mat(); ex.Extract("detection_out", @out); // printf("%d %d %d\n", out.w, out.h, out.c); objects.Clear(); for (var i = 0; i < @out.H; i++) { var values = @out.Row(i); var @object = new Object(); @object.Label = (int)values[0]; @object.Prob = values[1]; @object.Rect.X = values[2] * imgW; @object.Rect.Y = values[3] * imgH; @object.Rect.Width = values[4] * imgW - @object.Rect.X; @object.Rect.Height = values[5] * imgH - @object.Rect.Y; objects.Add(@object); } } return(0); }
private static int DetectSqueezeNet(NcnnDotNet.OpenCV.Mat bgr, List <Object> objects) { using (var squeezenet = new Net()) { if (Ncnn.IsSupportVulkan) { squeezenet.Opt.UseVulkanCompute = true; } // original pretrained model from https://github.com/chuanqi305/SqueezeNet-SSD // squeezenet_ssd_voc_deploy.prototxt // https://drive.google.com/open?id=0B3gersZ2cHIxdGpyZlZnbEQ5Snc // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models squeezenet.LoadParam("squeezenet_ssd_voc.param"); squeezenet.LoadModel("squeezenet_ssd_voc.bin"); const int targetSize = 300; var imgW = bgr.Cols; var imgH = bgr.Rows; using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr, bgr.Cols, bgr.Rows, targetSize, targetSize); var meanVals = new[] { 104f, 117f, 123f }; @in.SubstractMeanNormalize(meanVals, null); using var ex = squeezenet.CreateExtractor(); ex.SetNumThreads(4); ex.Input("data", @in); using var @out = new Mat(); ex.Extract("detection_out", @out); // printf("%d %d %d\n", out.w, out.h, out.c); objects.Clear(); for (var i = 0; i < @out.H; i++) { var values = @out.Row(i); var @object = new Object(); @object.Label = (int)values[0]; @object.Prob = values[1]; @object.Rect.X = values[2] * imgW; @object.Rect.Y = values[3] * imgH; @object.Rect.Width = values[4] * imgW - @object.Rect.X; @object.Rect.Height = values[5] * imgH - @object.Rect.Y; objects.Add(@object); } } return(0); }
private static int DetectMobileNet(NcnnDotNet.OpenCV.Mat bgr, List <Object> objects) { using (var mobilenet = new Net()) { if (Ncnn.IsSupportVulkan) { mobilenet.Opt.UseVulkanCompute = true; } // model is converted from https://github.com/chuanqi305/MobileNet-SSD // and can be downloaded from https://drive.google.com/open?id=0ByaKLD9QaPtucWk0Y0dha1VVY0U // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models mobilenet.LoadParam("mobilenet_ssd_voc_ncnn.param"); mobilenet.LoadModel("mobilenet_ssd_voc_ncnn.bin"); const int targetSize = 300; var imgW = bgr.Cols; var imgH = bgr.Rows; using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr, bgr.Cols, bgr.Rows, targetSize, targetSize); var meanVals = new[] { 127.5f, 127.5f, 127.5f }; var normVals = new[] { (float)(1.0 / 127.5), (float)(1.0 / 127.5), (float)(1.0 / 127.5) }; @in.SubstractMeanNormalize(meanVals, normVals); using var ex = mobilenet.CreateExtractor(); //ex.SetNumThreads(4); ex.Input("data", @in); using var @out = new Mat(); ex.Extract("detection_out", @out); // printf("%d %d %d\n", out.w, out.h, out.c); objects.Clear(); for (var i = 0; i < @out.H; i++) { var values = @out.Row(i); var @object = new Object(); @object.Label = (int)values[0]; @object.Prob = values[1]; @object.Rect.X = values[2] * imgW; @object.Rect.Y = values[3] * imgH; @object.Rect.Width = values[4] * imgW - @object.Rect.X; @object.Rect.Height = values[5] * imgH - @object.Rect.Y; objects.Add(@object); } } return(0); }
private static int DetectShuffleNetV2(NcnnDotNet.OpenCV.Mat bgr, List <float> clsScores) { using (var shuffleNetV2 = new Net()) { if (Ncnn.IsSupportVulkan) { shuffleNetV2.Opt.UseVulkanCompute = true; } // https://github.com/miaow1988/ShuffleNet_V2_pytorch_caffe // models can be downloaded from https://github.com/miaow1988/ShuffleNet_V2_pytorch_caffe/releases shuffleNetV2.LoadParam("shufflenet_v2_x0.5.param"); shuffleNetV2.LoadModel("shufflenet_v2_x0.5.bin"); using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr, bgr.Cols, bgr.Rows, 224, 224); var normVals = new[] { 1 / 255.0f, 1 / 255.0f, 1 / 255.0f }; @in.SubstractMeanNormalize(null, normVals); using var ex = shuffleNetV2.CreateExtractor(); ex.Input("data", @in); using var @out = new Mat(); ex.Extract("fc", @out); // manually call softmax on the fc output // convert result into probability // skip if your model already has softmax operation { using var softmax = Ncnn.CreateLayer("Softmax"); using var pd = new ParamDict(); softmax.LoadParam(pd); softmax.ForwardInplace(@out, shuffleNetV2.Opt); } using var @out2 = @out.Reshape(@out.W * @out.H * @out.C); clsScores.Capacity = @out2.W; for (var j = 0; j < @out2.W; j++) { clsScores.Add(@out2[j]); } } return(0); }
// copy from src/layer/proposal.cpp private static Mat GenerateAnchors(int baseSize, Mat ratios, Mat scales) { var numRatio = ratios.W; var numScale = scales.W; var anchors = new Mat(); anchors.Create(4, numRatio * numScale); var cx = baseSize * 0.5f; var cy = baseSize * 0.5f; for (var i = 0; i < numRatio; i++) { var ar = ratios[i]; var rW = (int)Math.Round(baseSize / Math.Sqrt(ar)); var rH = (int)Math.Round(rW * ar); //round(baseSize * sqrt(ar)); for (var j = 0; j < numScale; j++) { var scale = scales[j]; var rsW = rW * scale; var rsH = rH * scale; var anchor = anchors.Row(i * numScale + j); anchor[0] = cx - rsW * 0.5f; anchor[1] = cy - rsH * 0.5f; anchor[2] = cx + rsW * 0.5f; anchor[3] = cy + rsH * 0.5f; } } return(anchors); }
public DetectResult Detect(byte[] file) { using var frame = Cv2.ImDecode(file, CvLoadImage.Grayscale); if (frame.IsEmpty) { throw new NotSupportedException("This file is not supported!!"); } if (Ncnn.IsSupportVulkan) { Ncnn.CreateGpuInstance(); } using var inMat = Mat.FromPixels(frame.Data, NcnnDotNet.PixelType.Bgr2Rgb, frame.Cols, frame.Rows); var faceInfos = this._UltraFace.Detect(inMat).ToArray(); if (Ncnn.IsSupportVulkan) { Ncnn.DestroyGpuInstance(); } return(new DetectResult(frame.Cols, frame.Rows, faceInfos)); }
private static int DetectRFCN(NcnnDotNet.OpenCV.Mat bgr, List <Object> objects) { using (var rfcn = new Net()) { if (Ncnn.IsSupportVulkan) { rfcn.Opt.UseVulkanCompute = true; } // original pretrained model from https://github.com/YuwenXiong/py-R-FCN // https://github.com/YuwenXiong/py-R-FCN/blob/master/models/pascal_voc/ResNet-50/rfcn_end2end/test_agnostic.prototxt // https://1drv.ms/u/s!AoN7vygOjLIQqUWHpY67oaC7mopf // resnet50_rfcn_final.caffemodel rfcn.LoadParam("rfcn_end2end.param"); rfcn.LoadModel("rfcn_end2end.bin"); const int targetSize = 224; const int maxPerImage = 100; const float confidenceThresh = 0.6f; // CONF_THRESH const float nmsThreshold = 0.3f; // NMS_THRESH // scale to target detect size var w = bgr.Cols; var h = bgr.Rows; float scale; if (w < h) { scale = (float)targetSize / w; w = targetSize; h = (int)(h * scale); } else { scale = (float)targetSize / h; h = targetSize; w = (int)(w * scale); } using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr, bgr.Cols, bgr.Rows, w, h); var meanVals = new[] { 102.9801f, 115.9465f, 122.7717f }; @in.SubstractMeanNormalize(meanVals, null); using var im_info = new Mat(3); im_info[0] = h; im_info[1] = w; im_info[2] = scale; // step1, extract feature and all rois using var ex1 = rfcn.CreateExtractor(); ex1.Input("data", @in); ex1.Input("im_info", im_info); using var rfcnCls = new Mat(); using var rfcnBBox = new Mat(); using var rois = new Mat();// all rois ex1.Extract("rfcn_cls", rfcnCls); ex1.Extract("rfcn_bbox", rfcnBBox); ex1.Extract("rois", rois); // step2, extract bbox and score for each roi var classCandidates = new List <List <Object> >(); for (var i = 0; i < rois.C; i++) { using var ex2 = rfcn.CreateExtractor(); using var roi = rois.Channel(i); // get single roi ex2.Input("rfcn_cls", rfcnCls); ex2.Input("rfcn_bbox", rfcnBBox); ex2.Input("rois", roi); using var bboxPred = new Mat(); using var clsProb = new Mat(); ex2.Extract("bbox_pred", bboxPred); ex2.Extract("cls_prob", clsProb); var numClass = clsProb.W; // There is no equivalent to std::vector::resize in C# Resize(classCandidates, numClass); // find class id with highest score var label = 0; var score = 0.0f; for (var j = 0; j < numClass; j++) { var classScore = clsProb[j]; if (classScore > score) { label = j; score = classScore; } } // ignore background or low score if (label == 0 || score <= confidenceThresh) { continue; } // fprintf(stderr, "%d = %f\n", label, score); // unscale to image size var x1 = roi[0] / scale; var y1 = roi[1] / scale; var x2 = roi[2] / scale; var y2 = roi[3] / scale; var pbW = x2 - x1 + 1; var pbH = y2 - y1 + 1; // apply bbox regression var dx = bboxPred[4]; var dy = bboxPred[4 + 1]; var dw = bboxPred[4 + 2]; var dh = bboxPred[4 + 3]; var cx = x1 + pbW * 0.5f; var cy = y1 + pbH * 0.5f; var objCx = cx + pbW * dx; var objCy = cy + pbH * dy; var objW = pbW * Math.Exp(dw); var objH = pbH * Math.Exp(dh); var objX1 = (float)(objCx - objW * 0.5f); var objY1 = (float)(objCy - objH * 0.5f); var objX2 = (float)(objCx + objW * 0.5f); var objY2 = (float)(objCy + objH * 0.5f); // clip objX1 = Math.Max(Math.Min(objX1, bgr.Cols - 1), 0.0f); objY1 = Math.Max(Math.Min(objY1, bgr.Rows - 1), 0.0f); objX2 = Math.Max(Math.Min(objX2, bgr.Cols - 1), 0.0f); objY2 = Math.Max(Math.Min(objY2, bgr.Rows - 1), 0.0f); // append object var obj = new Object { Rect = new Rect <float>(objX1, objY1, objX2 - objX1 + 1, objY2 - objY1 + 1), Label = label, Prob = score }; classCandidates[label].Add(obj); } // post process objects.Clear(); for (var i = 0; i < (int)classCandidates.Count; i++) { var candidates = classCandidates[i]; QsortDescentInplace(candidates); var picked = new List <int>(); NmsSortedBBoxes(candidates, picked, nmsThreshold); for (var j = 0; j < picked.Count; j++) { var z = picked[j]; objects.Add(candidates[z]); } } QsortDescentInplace(objects); if (maxPerImage > 0 && maxPerImage < objects.Count) { Resize(objects, maxPerImage); } } return(0); }
private static int DetectFasterRCNN(NcnnDotNet.OpenCV.Mat bgr, List <Object> objects) { using (var fasterRcnn = new Net()) { if (Ncnn.IsSupportVulkan) { fasterRcnn.Opt.UseVulkanCompute = true; } // original pretrained model from https://github.com/rbgirshick/py-faster-rcnn // py-faster-rcnn/models/pascal_voc/ZF/faster_rcnn_alt_opt/faster_rcnn_test.pt // https://dl.dropboxusercontent.com/s/o6ii098bu51d139/faster_rcnn_models.tgz?dl=0 // ZF_faster_rcnn_final.caffemodel // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models fasterRcnn.LoadParam("ZF_faster_rcnn_final.param"); fasterRcnn.LoadModel("ZF_faster_rcnn_final.bin"); // hyper parameters taken from // py-faster-rcnn/lib/fast_rcnn/config.py // py-faster-rcnn/lib/fast_rcnn/test.py const int targetSize = 600; // __C.TEST.SCALES const int maxPerImage = 100; const float confidenceThresh = 0.05f; const float nmsThreshold = 0.3f;// __C.TEST.NMS // scale to target detect size var w = bgr.Cols; var h = bgr.Rows; float scale; if (w < h) { scale = (float)targetSize / w; w = targetSize; h = (int)(h * scale); } else { scale = (float)targetSize / h; h = targetSize; w = (int)(w * scale); } using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr, bgr.Cols, bgr.Rows, w, h); var meanVals = new[] { 102.9801f, 115.9465f, 122.7717f }; @in.SubstractMeanNormalize(meanVals, null); using var im_info = new Mat(3); im_info[0] = h; im_info[1] = w; im_info[2] = scale; // step1, extract feature and all rois using var ex1 = fasterRcnn.CreateExtractor(); ex1.Input("data", @in); ex1.Input("im_info", im_info); using var conv5Relu5 = new Mat(); // feature using var rois = new Mat(); // all rois ex1.Extract("conv5_relu5", conv5Relu5); ex1.Extract("rois", rois); // step2, extract bbox and score for each roi var classCandidates = new List <List <Object> >(); for (var i = 0; i < rois.C; i++) { using var ex2 = fasterRcnn.CreateExtractor(); using var roi = rois.Channel(i); // get single roi ex2.Input("conv5_relu5", conv5Relu5); ex2.Input("rois", roi); using var bboxPred = new Mat(); using var clsProb = new Mat(); ex2.Extract("bbox_pred", bboxPred); ex2.Extract("cls_prob", clsProb); var numClass = clsProb.W; // There is no equivalent to std::vector::resize in C# Resize(classCandidates, numClass); // find class id with highest score var label = 0; var score = 0.0f; for (var j = 0; j < numClass; j++) { var classScore = clsProb[j]; if (classScore > score) { label = j; score = classScore; } } // ignore background or low score if (label == 0 || score <= confidenceThresh) { continue; } // fprintf(stderr, "%d = %f\n", label, score); // unscale to image size var x1 = roi[0] / scale; var y1 = roi[1] / scale; var x2 = roi[2] / scale; var y2 = roi[3] / scale; var pbW = x2 - x1 + 1; var pbH = y2 - y1 + 1; // apply bbox regression var dx = bboxPred[label * 4]; var dy = bboxPred[label * 4 + 1]; var dw = bboxPred[label * 4 + 2]; var dh = bboxPred[label * 4 + 3]; var cx = x1 + pbW * 0.5f; var cy = y1 + pbH * 0.5f; var objCx = cx + pbW * dx; var objCy = cy + pbH * dy; var objW = pbW * Math.Exp(dw); var objH = pbH * Math.Exp(dh); var objX1 = (float)(objCx - objW * 0.5f); var objY1 = (float)(objCy - objH * 0.5f); var objX2 = (float)(objCx + objW * 0.5f); var objY2 = (float)(objCy + objH * 0.5f); // clip objX1 = Math.Max(Math.Min(objX1, bgr.Cols - 1), 0.0f); objY1 = Math.Max(Math.Min(objY1, bgr.Rows - 1), 0.0f); objX2 = Math.Max(Math.Min(objX2, bgr.Cols - 1), 0.0f); objY2 = Math.Max(Math.Min(objY2, bgr.Rows - 1), 0.0f); // append object var obj = new Object { Rect = new Rect <float>(objX1, objY1, objX2 - objX1 + 1, objY2 - objY1 + 1), Label = label, Prob = score }; classCandidates[label].Add(obj); } // post process objects.Clear(); for (var i = 0; i < (int)classCandidates.Count; i++) { var candidates = classCandidates[i]; QsortDescentInplace(candidates); var picked = new List <int>(); NmsSortedBBoxes(candidates, picked, nmsThreshold); for (var j = 0; j < picked.Count; j++) { var z = picked[j]; objects.Add(candidates[z]); } } QsortDescentInplace(objects); if (maxPerImage > 0 && maxPerImage < objects.Count) { Resize(objects, maxPerImage); } } return(0); }
private static int DetectRetinaFace(NcnnDotNet.OpenCV.Mat bgr, List <FaceObject> faceObjects) { using (var retinaFace = new Net()) { if (Ncnn.IsSupportVulkan) { retinaFace.Opt.UseVulkanCompute = true; } // model is converted from // https://github.com/deepinsight/insightface/tree/master/RetinaFace#retinaface-pretrained-models // https://github.com/deepinsight/insightface/issues/669 // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models // retinaface.load_param("retinaface-R50.param"); // retinaface.load_model("retinaface-R50.bin"); retinaFace.LoadParam("mnet.25-opt.param"); retinaFace.LoadModel("mnet.25-opt.bin"); const float probThreshold = 0.8f; const float nmsThreshold = 0.4f; var imgW = bgr.Cols; var imgH = bgr.Rows; using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr2Rgb, bgr.Cols, bgr.Rows, imgW, imgH); using var ex = retinaFace.CreateExtractor(); ex.Input("data", @in); var faceProposals = new List <FaceObject>(); // stride 32 { using var scoreBlob = new Mat(); using var bboxBlob = new Mat(); using var landmarkBlob = new Mat(); ex.Extract("face_rpn_cls_prob_reshape_stride32", scoreBlob); ex.Extract("face_rpn_bbox_pred_stride32", bboxBlob); ex.Extract("face_rpn_landmark_pred_stride32", landmarkBlob); const int baseSize = 16; const int featStride = 32; using var ratios = new Mat(1); ratios[0] = 1.0f; using var scales = new Mat(2); scales[0] = 32.0f; scales[1] = 16.0f; using var anchors = GenerateAnchors(baseSize, ratios, scales); var faceObjects32 = new List <FaceObject>(); GenerateProposals(anchors, featStride, scoreBlob, bboxBlob, landmarkBlob, probThreshold, faceObjects32); faceProposals.AddRange(faceObjects32); } // stride 16 { using var scoreBlob = new Mat(); using var bboxBlob = new Mat(); using var landmarkBlob = new Mat(); ex.Extract("face_rpn_cls_prob_reshape_stride16", scoreBlob); ex.Extract("face_rpn_bbox_pred_stride16", bboxBlob); ex.Extract("face_rpn_landmark_pred_stride16", landmarkBlob); const int baseSize = 16; const int featStride = 16; using var ratios = new Mat(1); ratios[0] = 1.0f; using var scales = new Mat(2); scales[0] = 8.0f; scales[1] = 4.0f; using var anchors = GenerateAnchors(baseSize, ratios, scales); var faceObjects16 = new List <FaceObject>(); GenerateProposals(anchors, featStride, scoreBlob, bboxBlob, landmarkBlob, probThreshold, faceObjects16); faceProposals.AddRange(faceObjects16); } // stride 8 { using var scoreBlob = new Mat(); using var bboxBlob = new Mat(); using var landmarkBlob = new Mat(); ex.Extract("face_rpn_cls_prob_reshape_stride8", scoreBlob); ex.Extract("face_rpn_bbox_pred_stride8", bboxBlob); ex.Extract("face_rpn_landmark_pred_stride8", landmarkBlob); const int baseSize = 16; const int featStride = 8; using var ratios = new Mat(1); ratios[0] = 1.0f; using var scales = new Mat(2); scales[0] = 2.0f; scales[1] = 1.0f; using var anchors = GenerateAnchors(baseSize, ratios, scales); var faceObjects8 = new List <FaceObject>(); GenerateProposals(anchors, featStride, scoreBlob, bboxBlob, landmarkBlob, probThreshold, faceObjects8); faceProposals.AddRange(faceObjects8); } // sort all proposals by score from highest to lowest QsortDescentInplace(faceProposals); // apply nms with nms_threshold var picked = new List <int>(); NmsSortedBBoxes(faceProposals, picked, nmsThreshold); var faceCount = picked.Count; // resolve point from heatmap faceObjects.AddRange(new FaceObject[faceCount]); for (var i = 0; i < faceCount; i++) { faceObjects[i] = faceProposals[picked[i]]; // clip to image size var x0 = faceProposals[i].Rect.X; var y0 = faceProposals[i].Rect.Y; var x1 = x0 + faceProposals[i].Rect.Width; var y1 = y0 + faceProposals[i].Rect.Height; x0 = Math.Max(Math.Min(x0, (float)imgW - 1), 0.0f); y0 = Math.Max(Math.Min(y0, (float)imgH - 1), 0.0f); x1 = Math.Max(Math.Min(x1, (float)imgW - 1), 0.0f); y1 = Math.Max(Math.Min(y1, (float)imgH - 1), 0.0f); faceObjects[i].Rect.X = x0; faceObjects[i].Rect.Y = y0; faceObjects[i].Rect.Width = x1 - x0; faceObjects[i].Rect.Height = y1 - y0; } } return(0); }
private static void GenerateProposals(Mat anchors, int featStride, Mat scoreBlob, Mat bboxBlob, Mat landmarkBlob, float probThreshold, IList <FaceObject> faceObjects) { var w = scoreBlob.W; var h = scoreBlob.H; // generate face proposal from bbox deltas and shifted anchors var numAnchors = anchors.H; for (var q = 0; q < numAnchors; q++) { var anchor = anchors.Row(q); using var score = scoreBlob.Channel(q + numAnchors); using var bbox = bboxBlob.ChannelRange(q * 4, 4); using var landmark = landmarkBlob.ChannelRange(q * 10, 10); // shifted anchor var anchorY = anchor[1]; var anchorW = anchor[2] - anchor[0]; var anchorH = anchor[3] - anchor[1]; for (var i = 0; i < h; i++) { var anchorX = anchor[0]; for (var j = 0; j < w; j++) { var index = i * w + j; var prob = score[index]; if (prob >= probThreshold) { // apply center size using var mat0 = bbox.Channel(0); using var mat1 = bbox.Channel(1); using var mat2 = bbox.Channel(2); using var mat3 = bbox.Channel(3); var dx = mat0[index]; var dy = mat1[index]; var dw = mat2[index]; var dh = mat3[index]; var cx = anchorX + anchorW * 0.5f; var cy = anchorY + anchorH * 0.5f; var pbCx = cx + anchorW * dx; var pbCy = cy + anchorH * dy; var pbW = anchorW * (float)Math.Exp(dw); var pbH = anchorH * (float)Math.Exp(dh); var x0 = pbCx - pbW * 0.5f; var y0 = pbCy - pbH * 0.5f; var x1 = pbCx + pbW * 0.5f; var y1 = pbCy + pbH * 0.5f; var obj = new FaceObject(); obj.Rect.X = x0; obj.Rect.Y = y0; obj.Rect.Width = x1 - x0 + 1; obj.Rect.Height = y1 - y0 + 1; using var landmarkMat0 = landmark.Channel(0); using var landmarkMat1 = landmark.Channel(1); using var landmarkMat2 = landmark.Channel(2); using var landmarkMat3 = landmark.Channel(3); using var landmarkMat4 = landmark.Channel(4); using var landmarkMat5 = landmark.Channel(5); using var landmarkMat6 = landmark.Channel(6); using var landmarkMat7 = landmark.Channel(7); using var landmarkMat8 = landmark.Channel(8); using var landmarkMat9 = landmark.Channel(9); obj.Landmark[0].X = cx + (anchorW + 1) * landmarkMat0[index]; obj.Landmark[0].Y = cy + (anchorH + 1) * landmarkMat1[index]; obj.Landmark[1].X = cx + (anchorW + 1) * landmarkMat2[index]; obj.Landmark[1].Y = cy + (anchorH + 1) * landmarkMat3[index]; obj.Landmark[2].X = cx + (anchorW + 1) * landmarkMat4[index]; obj.Landmark[2].Y = cy + (anchorH + 1) * landmarkMat5[index]; obj.Landmark[3].X = cx + (anchorW + 1) * landmarkMat6[index]; obj.Landmark[3].Y = cy + (anchorH + 1) * landmarkMat7[index]; obj.Landmark[4].X = cx + (anchorW + 1) * landmarkMat8[index]; obj.Landmark[4].Y = cy + (anchorH + 1) * landmarkMat9[index]; obj.Prob = prob; faceObjects.Add(obj); } anchorX += featStride; } anchorY += featStride; } } }
private static int DetectPoseNet(NcnnDotNet.OpenCV.Mat bgr, List <KeyPoint> keyPoints) { using (var poseNet = new Net()) { if (Ncnn.IsSupportVulkan) { poseNet.Opt.UseVulkanCompute = true; } // the simple baseline human pose estimation from gluon-cv // https://gluon-cv.mxnet.io/build/examples_pose/demo_simple_pose.html // mxnet model exported via // pose_net.hybridize() // pose_net.export('pose') // then mxnet2ncnn // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models poseNet.LoadParam("pose.param"); poseNet.LoadModel("pose.bin"); var w = bgr.Cols; var h = bgr.Rows; using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr2Rgb, bgr.Cols, bgr.Rows, 192, 256); // transforms.ToTensor(), // transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), // R' = (R / 255 - 0.485) / 0.229 = (R - 0.485 * 255) / 0.229 / 255 // G' = (G / 255 - 0.456) / 0.224 = (G - 0.456 * 255) / 0.224 / 255 // B' = (B / 255 - 0.406) / 0.225 = (B - 0.406 * 255) / 0.225 / 255 var meanVals = new[] { 0.485f * 255.0f, 0.456f * 255.0f, 0.406f * 255.0f }; var normVals = new[] { 1 / 0.229f / 255.0f, 1 / 0.224f / 255.0f, 1 / 0.225f / 255.0f }; @in.SubstractMeanNormalize(meanVals, normVals); using var ex = poseNet.CreateExtractor(); ex.Input("data", @in); using var @out = new Mat(); ex.Extract("conv3_fwd", @out); // resolve point from heatmap keyPoints.Clear(); for (var p = 0; p < @out.H; p++) { using var m = @out.Channel(p); var maxProb = 0f; var maxX = 0; var maxY = 0; for (var y = 0; y < @out.H; y++) { var ptr = m.Row(y); for (var x = 0; x < @out.W; x++) { var prob = ptr[x]; if (prob > maxProb) { maxProb = prob; maxX = x; maxY = y; } } } var keyPoint = new KeyPoint { P = new Point <float>(maxX * w / (float)@out.W, maxY * h / (float)@out.H), Prob = maxProb }; keyPoints.Add(keyPoint); } } return(0); }