private static int DetectSqueezeNet(NcnnDotNet.OpenCV.Mat bgr, List <float> clsScores) { using (var squeezeNet = new Net()) { if (Ncnn.IsSupportVulkan) { squeezeNet.Opt.UseVulkanCompute = true; } // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models squeezeNet.LoadParam("squeezenet_v1.1.param"); squeezeNet.LoadModel("squeezenet_v1.1.bin"); using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr, bgr.Cols, bgr.Rows, 227, 227); var meanVals = new[] { 104f, 117f, 123f }; @in.SubstractMeanNormalize(meanVals, null); using var ex = squeezeNet.CreateExtractor(); ex.Input("data", @in); using var @out = new Mat(); ex.Extract("prob", @out); clsScores.Capacity = @out.W; for (var j = 0; j < @out.W; j++) { clsScores.Add(@out[j]); } } return(0); }
private static int DetectYoloV3(NcnnDotNet.OpenCV.Mat bgr, List <Object> objects) { using (var yolov3 = new Net()) { if (Ncnn.IsSupportVulkan) { yolov3.Opt.UseVulkanCompute = true; } // original pretrained model from https://github.com/eric612/MobileNet-YOLO // param : https://drive.google.com/open?id=1V9oKHP6G6XvXZqhZbzNKL6FI_clRWdC- // bin : https://drive.google.com/open?id=1DBcuFCr-856z3FRQznWL_S5h-Aj3RawA // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models yolov3.LoadParam("mobilenetv2_yolov3.param"); yolov3.LoadModel("mobilenetv2_yolov3.bin"); const int targetSize = 352; var imgW = bgr.Cols; var imgH = bgr.Rows; using (var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr, bgr.Cols, bgr.Rows, targetSize, targetSize)) { var meanVals = new[] { 127.5f, 127.5f, 127.5f }; var normVals = new[] { 0.007843f, 0.007843f, 0.007843f }; @in.SubstractMeanNormalize(meanVals, normVals); using (var ex = yolov3.CreateExtractor()) { ex.SetNumThreads(4); ex.Input("data", @in); using (var @out = new Mat()) { ex.Extract("detection_out", @out); // printf("%d %d %d\n", out.w, out.h, out.c); objects.Clear(); for (var i = 0; i < @out.H; i++) { var values = @out.Row(i); var @object = new Object(); @object.Label = (int)values[0]; @object.Prob = values[1]; @object.Rect.X = values[2] * imgW; @object.Rect.Y = values[3] * imgH; @object.Rect.Width = values[4] * imgW - @object.Rect.X; @object.Rect.Height = values[5] * imgH - @object.Rect.Y; objects.Add(@object); } } } } } return(0); }
private static int DetectMobileNetV3(NcnnDotNet.OpenCV.Mat bgr, List <Object> objects) { using (var mobilenetV3 = new Net()) { if (Ncnn.IsSupportVulkan) { mobilenetV3.Opt.UseVulkanCompute = true; } // converted ncnn model from https://github.com/ujsyehao/mobilenetv3-ssd mobilenetV3.LoadParam("mobilenetv3_ssdlite_voc.param"); mobilenetV3.LoadModel("mobilenetv3_ssdlite_voc.bin"); const int targetSize = 300; var imgW = bgr.Cols; var imgH = bgr.Rows; using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr2Rgb, bgr.Cols, bgr.Rows, targetSize, targetSize); var meanVals = new[] { 123.675f, 116.28f, 103.53f }; var normVals = new[] { 1.0f, 1.0f, 1.0f }; @in.SubstractMeanNormalize(meanVals, normVals); using var ex = mobilenetV3.CreateExtractor(); ex.SetLiteMode(true); ex.SetNumThreads(4); ex.Input("input", @in); using var @out = new Mat(); ex.Extract("detection_out", @out); // printf("%d %d %d\n", out.w, out.h, out.c); objects.Clear(); for (var i = 0; i < @out.H; i++) { var values = @out.Row(i); var @object = new Object(); @object.Label = (int)values[0]; @object.Prob = values[1]; // filter out cross-boundary var x1 = Clamp(values[2] * targetSize, 0.0f, targetSize - 1) / targetSize * imgW; var y1 = Clamp(values[3] * targetSize, 0.0f, targetSize - 1) / targetSize * imgH; var x2 = Clamp(values[4] * targetSize, 0.0f, targetSize - 1) / targetSize * imgW; var y2 = Clamp(values[5] * targetSize, 0.0f, targetSize - 1) / targetSize * imgH; @object.Rect.X = x1; @object.Rect.Y = y1; @object.Rect.Width = x2 - x1; @object.Rect.Height = y2 - y1; objects.Add(@object); } } return(0); }
private static int DetectYoloV2(NcnnDotNet.OpenCV.Mat bgr, List <Object> objects) { using (var yolov2 = new Net()) { if (Ncnn.IsSupportVulkan) { yolov2.Opt.UseVulkanCompute = true; } // original pretrained model from https://github.com/eric612/MobileNet-YOLO // https://github.com/eric612/MobileNet-YOLO/blob/master/models/yolov2/mobilenet_yolo_deploy.prototxt // https://github.com/eric612/MobileNet-YOLO/blob/master/models/yolov2/mobilenet_yolo_deploy_iter_80000.caffemodel // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models yolov2.LoadParam("mobilenet_yolo.param"); yolov2.LoadModel("mobilenet_yolo.bin"); const int targetSize = 416; var imgW = bgr.Cols; var imgH = bgr.Rows; using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr, bgr.Cols, bgr.Rows, targetSize, targetSize); // the Caffe-YOLOv2-Windows style // X' = X * scale - mean var meanVals = new[] { 1.0f, 1.0f, 1.0f }; var normVals = new[] { 0.007843f, 0.007843f, 0.007843f }; @in.SubstractMeanNormalize(null, normVals); @in.SubstractMeanNormalize(meanVals, null); using var ex = yolov2.CreateExtractor(); ex.SetNumThreads(4); ex.Input("data", @in); using var @out = new Mat(); ex.Extract("detection_out", @out); // printf("%d %d %d\n", out.w, out.h, out.c); objects.Clear(); for (var i = 0; i < @out.H; i++) { var values = @out.Row(i); var @object = new Object(); @object.Label = (int)values[0]; @object.Prob = values[1]; @object.Rect.X = values[2] * imgW; @object.Rect.Y = values[3] * imgH; @object.Rect.Width = values[4] * imgW - @object.Rect.X; @object.Rect.Height = values[5] * imgH - @object.Rect.Y; objects.Add(@object); } } return(0); }
private static int DetectPeleeNet(NcnnDotNet.OpenCV.Mat bgr, List <Object> objects, NcnnDotNet.Mat resized) { using (var peleenet = new Net()) { if (Ncnn.IsSupportVulkan) { peleenet.Opt.UseVulkanCompute = true; } // model is converted from https://github.com/eric612/MobileNet-YOLO // and can be downloaded from https://drive.google.com/open?id=1Wt6jKv13sBRMHgrGAJYlOlRF-o80pC0g // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models peleenet.LoadParam("pelee.param"); peleenet.LoadModel("pelee.bin"); const int targetSize = 300; var imgW = bgr.Cols; var imgH = bgr.Rows; using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr, bgr.Cols, bgr.Rows, targetSize, targetSize); var meanVals = new[] { 103.9f, 116.7f, 123.6f }; var normVals = new[] { 0.017f, 0.017f, 0.017f }; @in.SubstractMeanNormalize(meanVals, normVals); using var ex = peleenet.CreateExtractor(); //ex.SetNumThreads(4); ex.Input("data", @in); using var @out = new Mat(); ex.Extract("detection_out", @out); // printf("%d %d %d\n", out.w, out.h, out.c); objects.Clear(); for (var i = 0; i < @out.H; i++) { var values = @out.Row(i); var @object = new Object(); @object.Label = (int)values[0]; @object.Prob = values[1]; @object.Rect.X = values[2] * imgW; @object.Rect.Y = values[3] * imgH; @object.Rect.Width = values[4] * imgW - @object.Rect.X; @object.Rect.Height = values[5] * imgH - @object.Rect.Y; objects.Add(@object); } using var segOut = new Mat(); ex.Extract("sigmoid", segOut); Ncnn.ResizeBilinear(segOut, resized, imgW, imgH); } return(0); }
private static int DetectMobileNetV2(NcnnDotNet.OpenCV.Mat bgr, List <Object> objects) { using (var mobilenetV2 = new Net()) { if (Ncnn.IsSupportVulkan) { mobilenetV2.Opt.UseVulkanCompute = true; } // ToDo: Support Custom Layer from C# //mobilenetV2.register_custom_layer("Silence", Noop_layer_creator); // original pretrained model from https://github.com/chuanqi305/MobileNetv2-SSDLite // https://github.com/chuanqi305/MobileNetv2-SSDLite/blob/master/ssdlite/voc/deploy.prototxt // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models mobilenetV2.LoadParam("mobilenetv2_ssdlite_voc.param"); mobilenetV2.LoadModel("mobilenetv2_ssdlite_voc.bin"); const int targetSize = 300; var imgW = bgr.Cols; var imgH = bgr.Rows; using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr, bgr.Cols, bgr.Rows, targetSize, targetSize); var meanVals = new[] { 127.5f, 127.5f, 127.5f }; var normVals = new[] { (float)(1.0 / 127.5), (float)(1.0 / 127.5), (float)(1.0 / 127.5) }; @in.SubstractMeanNormalize(meanVals, normVals); using var ex = mobilenetV2.CreateExtractor(); ex.SetLiteMode(true); ex.SetNumThreads(4); ex.Input("data", @in); using var @out = new Mat(); ex.Extract("detection_out", @out); // printf("%d %d %d\n", out.w, out.h, out.c); objects.Clear(); for (var i = 0; i < @out.H; i++) { var values = @out.Row(i); var @object = new Object(); @object.Label = (int)values[0]; @object.Prob = values[1]; @object.Rect.X = values[2] * imgW; @object.Rect.Y = values[3] * imgH; @object.Rect.Width = values[4] * imgW - @object.Rect.X; @object.Rect.Height = values[5] * imgH - @object.Rect.Y; objects.Add(@object); } } return(0); }
private static int DetectSqueezeNet(NcnnDotNet.OpenCV.Mat bgr, List <Object> objects) { using (var squeezenet = new Net()) { if (Ncnn.IsSupportVulkan) { squeezenet.Opt.UseVulkanCompute = true; } // original pretrained model from https://github.com/chuanqi305/SqueezeNet-SSD // squeezenet_ssd_voc_deploy.prototxt // https://drive.google.com/open?id=0B3gersZ2cHIxdGpyZlZnbEQ5Snc // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models squeezenet.LoadParam("squeezenet_ssd_voc.param"); squeezenet.LoadModel("squeezenet_ssd_voc.bin"); const int targetSize = 300; var imgW = bgr.Cols; var imgH = bgr.Rows; using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr, bgr.Cols, bgr.Rows, targetSize, targetSize); var meanVals = new[] { 104f, 117f, 123f }; @in.SubstractMeanNormalize(meanVals, null); using var ex = squeezenet.CreateExtractor(); ex.SetNumThreads(4); ex.Input("data", @in); using var @out = new Mat(); ex.Extract("detection_out", @out); // printf("%d %d %d\n", out.w, out.h, out.c); objects.Clear(); for (var i = 0; i < @out.H; i++) { var values = @out.Row(i); var @object = new Object(); @object.Label = (int)values[0]; @object.Prob = values[1]; @object.Rect.X = values[2] * imgW; @object.Rect.Y = values[3] * imgH; @object.Rect.Width = values[4] * imgW - @object.Rect.X; @object.Rect.Height = values[5] * imgH - @object.Rect.Y; objects.Add(@object); } } return(0); }
private static int DetectMobileNet(NcnnDotNet.OpenCV.Mat bgr, List <Object> objects) { using (var mobilenet = new Net()) { if (Ncnn.IsSupportVulkan) { mobilenet.Opt.UseVulkanCompute = true; } // model is converted from https://github.com/chuanqi305/MobileNet-SSD // and can be downloaded from https://drive.google.com/open?id=0ByaKLD9QaPtucWk0Y0dha1VVY0U // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models mobilenet.LoadParam("mobilenet_ssd_voc_ncnn.param"); mobilenet.LoadModel("mobilenet_ssd_voc_ncnn.bin"); const int targetSize = 300; var imgW = bgr.Cols; var imgH = bgr.Rows; using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr, bgr.Cols, bgr.Rows, targetSize, targetSize); var meanVals = new[] { 127.5f, 127.5f, 127.5f }; var normVals = new[] { (float)(1.0 / 127.5), (float)(1.0 / 127.5), (float)(1.0 / 127.5) }; @in.SubstractMeanNormalize(meanVals, normVals); using var ex = mobilenet.CreateExtractor(); //ex.SetNumThreads(4); ex.Input("data", @in); using var @out = new Mat(); ex.Extract("detection_out", @out); // printf("%d %d %d\n", out.w, out.h, out.c); objects.Clear(); for (var i = 0; i < @out.H; i++) { var values = @out.Row(i); var @object = new Object(); @object.Label = (int)values[0]; @object.Prob = values[1]; @object.Rect.X = values[2] * imgW; @object.Rect.Y = values[3] * imgH; @object.Rect.Width = values[4] * imgW - @object.Rect.X; @object.Rect.Height = values[5] * imgH - @object.Rect.Y; objects.Add(@object); } } return(0); }
private static int DetectShuffleNetV2(NcnnDotNet.OpenCV.Mat bgr, List <float> clsScores) { using (var shuffleNetV2 = new Net()) { if (Ncnn.IsSupportVulkan) { shuffleNetV2.Opt.UseVulkanCompute = true; } // https://github.com/miaow1988/ShuffleNet_V2_pytorch_caffe // models can be downloaded from https://github.com/miaow1988/ShuffleNet_V2_pytorch_caffe/releases shuffleNetV2.LoadParam("shufflenet_v2_x0.5.param"); shuffleNetV2.LoadModel("shufflenet_v2_x0.5.bin"); using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr, bgr.Cols, bgr.Rows, 224, 224); var normVals = new[] { 1 / 255.0f, 1 / 255.0f, 1 / 255.0f }; @in.SubstractMeanNormalize(null, normVals); using var ex = shuffleNetV2.CreateExtractor(); ex.Input("data", @in); using var @out = new Mat(); ex.Extract("fc", @out); // manually call softmax on the fc output // convert result into probability // skip if your model already has softmax operation { using var softmax = Ncnn.CreateLayer("Softmax"); using var pd = new ParamDict(); softmax.LoadParam(pd); softmax.ForwardInplace(@out, shuffleNetV2.Opt); } using var @out2 = @out.Reshape(@out.W * @out.H * @out.C); clsScores.Capacity = @out2.W; for (var j = 0; j < @out2.W; j++) { clsScores.Add(@out2[j]); } } return(0); }
private static int DetectRetinaFace(NcnnDotNet.OpenCV.Mat bgr, List <FaceObject> faceObjects) { using (var retinaFace = new Net()) { if (Ncnn.IsSupportVulkan) { retinaFace.Opt.UseVulkanCompute = true; } // model is converted from // https://github.com/deepinsight/insightface/tree/master/RetinaFace#retinaface-pretrained-models // https://github.com/deepinsight/insightface/issues/669 // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models // retinaface.load_param("retinaface-R50.param"); // retinaface.load_model("retinaface-R50.bin"); retinaFace.LoadParam("mnet.25-opt.param"); retinaFace.LoadModel("mnet.25-opt.bin"); const float probThreshold = 0.8f; const float nmsThreshold = 0.4f; var imgW = bgr.Cols; var imgH = bgr.Rows; using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr2Rgb, bgr.Cols, bgr.Rows, imgW, imgH); using var ex = retinaFace.CreateExtractor(); ex.Input("data", @in); var faceProposals = new List <FaceObject>(); // stride 32 { using var scoreBlob = new Mat(); using var bboxBlob = new Mat(); using var landmarkBlob = new Mat(); ex.Extract("face_rpn_cls_prob_reshape_stride32", scoreBlob); ex.Extract("face_rpn_bbox_pred_stride32", bboxBlob); ex.Extract("face_rpn_landmark_pred_stride32", landmarkBlob); const int baseSize = 16; const int featStride = 32; using var ratios = new Mat(1); ratios[0] = 1.0f; using var scales = new Mat(2); scales[0] = 32.0f; scales[1] = 16.0f; using var anchors = GenerateAnchors(baseSize, ratios, scales); var faceObjects32 = new List <FaceObject>(); GenerateProposals(anchors, featStride, scoreBlob, bboxBlob, landmarkBlob, probThreshold, faceObjects32); faceProposals.AddRange(faceObjects32); } // stride 16 { using var scoreBlob = new Mat(); using var bboxBlob = new Mat(); using var landmarkBlob = new Mat(); ex.Extract("face_rpn_cls_prob_reshape_stride16", scoreBlob); ex.Extract("face_rpn_bbox_pred_stride16", bboxBlob); ex.Extract("face_rpn_landmark_pred_stride16", landmarkBlob); const int baseSize = 16; const int featStride = 16; using var ratios = new Mat(1); ratios[0] = 1.0f; using var scales = new Mat(2); scales[0] = 8.0f; scales[1] = 4.0f; using var anchors = GenerateAnchors(baseSize, ratios, scales); var faceObjects16 = new List <FaceObject>(); GenerateProposals(anchors, featStride, scoreBlob, bboxBlob, landmarkBlob, probThreshold, faceObjects16); faceProposals.AddRange(faceObjects16); } // stride 8 { using var scoreBlob = new Mat(); using var bboxBlob = new Mat(); using var landmarkBlob = new Mat(); ex.Extract("face_rpn_cls_prob_reshape_stride8", scoreBlob); ex.Extract("face_rpn_bbox_pred_stride8", bboxBlob); ex.Extract("face_rpn_landmark_pred_stride8", landmarkBlob); const int baseSize = 16; const int featStride = 8; using var ratios = new Mat(1); ratios[0] = 1.0f; using var scales = new Mat(2); scales[0] = 2.0f; scales[1] = 1.0f; using var anchors = GenerateAnchors(baseSize, ratios, scales); var faceObjects8 = new List <FaceObject>(); GenerateProposals(anchors, featStride, scoreBlob, bboxBlob, landmarkBlob, probThreshold, faceObjects8); faceProposals.AddRange(faceObjects8); } // sort all proposals by score from highest to lowest QsortDescentInplace(faceProposals); // apply nms with nms_threshold var picked = new List <int>(); NmsSortedBBoxes(faceProposals, picked, nmsThreshold); var faceCount = picked.Count; // resolve point from heatmap faceObjects.AddRange(new FaceObject[faceCount]); for (var i = 0; i < faceCount; i++) { faceObjects[i] = faceProposals[picked[i]]; // clip to image size var x0 = faceProposals[i].Rect.X; var y0 = faceProposals[i].Rect.Y; var x1 = x0 + faceProposals[i].Rect.Width; var y1 = y0 + faceProposals[i].Rect.Height; x0 = Math.Max(Math.Min(x0, (float)imgW - 1), 0.0f); y0 = Math.Max(Math.Min(y0, (float)imgH - 1), 0.0f); x1 = Math.Max(Math.Min(x1, (float)imgW - 1), 0.0f); y1 = Math.Max(Math.Min(y1, (float)imgH - 1), 0.0f); faceObjects[i].Rect.X = x0; faceObjects[i].Rect.Y = y0; faceObjects[i].Rect.Width = x1 - x0; faceObjects[i].Rect.Height = y1 - y0; } } return(0); }
private static int DetectFasterRCNN(NcnnDotNet.OpenCV.Mat bgr, List <Object> objects) { using (var fasterRcnn = new Net()) { if (Ncnn.IsSupportVulkan) { fasterRcnn.Opt.UseVulkanCompute = true; } // original pretrained model from https://github.com/rbgirshick/py-faster-rcnn // py-faster-rcnn/models/pascal_voc/ZF/faster_rcnn_alt_opt/faster_rcnn_test.pt // https://dl.dropboxusercontent.com/s/o6ii098bu51d139/faster_rcnn_models.tgz?dl=0 // ZF_faster_rcnn_final.caffemodel // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models fasterRcnn.LoadParam("ZF_faster_rcnn_final.param"); fasterRcnn.LoadModel("ZF_faster_rcnn_final.bin"); // hyper parameters taken from // py-faster-rcnn/lib/fast_rcnn/config.py // py-faster-rcnn/lib/fast_rcnn/test.py const int targetSize = 600; // __C.TEST.SCALES const int maxPerImage = 100; const float confidenceThresh = 0.05f; const float nmsThreshold = 0.3f;// __C.TEST.NMS // scale to target detect size var w = bgr.Cols; var h = bgr.Rows; float scale; if (w < h) { scale = (float)targetSize / w; w = targetSize; h = (int)(h * scale); } else { scale = (float)targetSize / h; h = targetSize; w = (int)(w * scale); } using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr, bgr.Cols, bgr.Rows, w, h); var meanVals = new[] { 102.9801f, 115.9465f, 122.7717f }; @in.SubstractMeanNormalize(meanVals, null); using var im_info = new Mat(3); im_info[0] = h; im_info[1] = w; im_info[2] = scale; // step1, extract feature and all rois using var ex1 = fasterRcnn.CreateExtractor(); ex1.Input("data", @in); ex1.Input("im_info", im_info); using var conv5Relu5 = new Mat(); // feature using var rois = new Mat(); // all rois ex1.Extract("conv5_relu5", conv5Relu5); ex1.Extract("rois", rois); // step2, extract bbox and score for each roi var classCandidates = new List <List <Object> >(); for (var i = 0; i < rois.C; i++) { using var ex2 = fasterRcnn.CreateExtractor(); using var roi = rois.Channel(i); // get single roi ex2.Input("conv5_relu5", conv5Relu5); ex2.Input("rois", roi); using var bboxPred = new Mat(); using var clsProb = new Mat(); ex2.Extract("bbox_pred", bboxPred); ex2.Extract("cls_prob", clsProb); var numClass = clsProb.W; // There is no equivalent to std::vector::resize in C# Resize(classCandidates, numClass); // find class id with highest score var label = 0; var score = 0.0f; for (var j = 0; j < numClass; j++) { var classScore = clsProb[j]; if (classScore > score) { label = j; score = classScore; } } // ignore background or low score if (label == 0 || score <= confidenceThresh) { continue; } // fprintf(stderr, "%d = %f\n", label, score); // unscale to image size var x1 = roi[0] / scale; var y1 = roi[1] / scale; var x2 = roi[2] / scale; var y2 = roi[3] / scale; var pbW = x2 - x1 + 1; var pbH = y2 - y1 + 1; // apply bbox regression var dx = bboxPred[label * 4]; var dy = bboxPred[label * 4 + 1]; var dw = bboxPred[label * 4 + 2]; var dh = bboxPred[label * 4 + 3]; var cx = x1 + pbW * 0.5f; var cy = y1 + pbH * 0.5f; var objCx = cx + pbW * dx; var objCy = cy + pbH * dy; var objW = pbW * Math.Exp(dw); var objH = pbH * Math.Exp(dh); var objX1 = (float)(objCx - objW * 0.5f); var objY1 = (float)(objCy - objH * 0.5f); var objX2 = (float)(objCx + objW * 0.5f); var objY2 = (float)(objCy + objH * 0.5f); // clip objX1 = Math.Max(Math.Min(objX1, bgr.Cols - 1), 0.0f); objY1 = Math.Max(Math.Min(objY1, bgr.Rows - 1), 0.0f); objX2 = Math.Max(Math.Min(objX2, bgr.Cols - 1), 0.0f); objY2 = Math.Max(Math.Min(objY2, bgr.Rows - 1), 0.0f); // append object var obj = new Object { Rect = new Rect <float>(objX1, objY1, objX2 - objX1 + 1, objY2 - objY1 + 1), Label = label, Prob = score }; classCandidates[label].Add(obj); } // post process objects.Clear(); for (var i = 0; i < (int)classCandidates.Count; i++) { var candidates = classCandidates[i]; QsortDescentInplace(candidates); var picked = new List <int>(); NmsSortedBBoxes(candidates, picked, nmsThreshold); for (var j = 0; j < picked.Count; j++) { var z = picked[j]; objects.Add(candidates[z]); } } QsortDescentInplace(objects); if (maxPerImage > 0 && maxPerImage < objects.Count) { Resize(objects, maxPerImage); } } return(0); }
private static int DetectPoseNet(NcnnDotNet.OpenCV.Mat bgr, List <KeyPoint> keyPoints) { using (var poseNet = new Net()) { if (Ncnn.IsSupportVulkan) { poseNet.Opt.UseVulkanCompute = true; } // the simple baseline human pose estimation from gluon-cv // https://gluon-cv.mxnet.io/build/examples_pose/demo_simple_pose.html // mxnet model exported via // pose_net.hybridize() // pose_net.export('pose') // then mxnet2ncnn // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models poseNet.LoadParam("pose.param"); poseNet.LoadModel("pose.bin"); var w = bgr.Cols; var h = bgr.Rows; using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr2Rgb, bgr.Cols, bgr.Rows, 192, 256); // transforms.ToTensor(), // transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), // R' = (R / 255 - 0.485) / 0.229 = (R - 0.485 * 255) / 0.229 / 255 // G' = (G / 255 - 0.456) / 0.224 = (G - 0.456 * 255) / 0.224 / 255 // B' = (B / 255 - 0.406) / 0.225 = (B - 0.406 * 255) / 0.225 / 255 var meanVals = new[] { 0.485f * 255.0f, 0.456f * 255.0f, 0.406f * 255.0f }; var normVals = new[] { 1 / 0.229f / 255.0f, 1 / 0.224f / 255.0f, 1 / 0.225f / 255.0f }; @in.SubstractMeanNormalize(meanVals, normVals); using var ex = poseNet.CreateExtractor(); ex.Input("data", @in); using var @out = new Mat(); ex.Extract("conv3_fwd", @out); // resolve point from heatmap keyPoints.Clear(); for (var p = 0; p < @out.H; p++) { using var m = @out.Channel(p); var maxProb = 0f; var maxX = 0; var maxY = 0; for (var y = 0; y < @out.H; y++) { var ptr = m.Row(y); for (var x = 0; x < @out.W; x++) { var prob = ptr[x]; if (prob > maxProb) { maxProb = prob; maxX = x; maxY = y; } } } var keyPoint = new KeyPoint { P = new Point <float>(maxX * w / (float)@out.W, maxY * h / (float)@out.H), Prob = maxProb }; keyPoints.Add(keyPoint); } } return(0); }
private static int DetectRFCN(NcnnDotNet.OpenCV.Mat bgr, List <Object> objects) { using (var rfcn = new Net()) { if (Ncnn.IsSupportVulkan) { rfcn.Opt.UseVulkanCompute = true; } // original pretrained model from https://github.com/YuwenXiong/py-R-FCN // https://github.com/YuwenXiong/py-R-FCN/blob/master/models/pascal_voc/ResNet-50/rfcn_end2end/test_agnostic.prototxt // https://1drv.ms/u/s!AoN7vygOjLIQqUWHpY67oaC7mopf // resnet50_rfcn_final.caffemodel rfcn.LoadParam("rfcn_end2end.param"); rfcn.LoadModel("rfcn_end2end.bin"); const int targetSize = 224; const int maxPerImage = 100; const float confidenceThresh = 0.6f; // CONF_THRESH const float nmsThreshold = 0.3f; // NMS_THRESH // scale to target detect size var w = bgr.Cols; var h = bgr.Rows; float scale; if (w < h) { scale = (float)targetSize / w; w = targetSize; h = (int)(h * scale); } else { scale = (float)targetSize / h; h = targetSize; w = (int)(w * scale); } using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr, bgr.Cols, bgr.Rows, w, h); var meanVals = new[] { 102.9801f, 115.9465f, 122.7717f }; @in.SubstractMeanNormalize(meanVals, null); using var im_info = new Mat(3); im_info[0] = h; im_info[1] = w; im_info[2] = scale; // step1, extract feature and all rois using var ex1 = rfcn.CreateExtractor(); ex1.Input("data", @in); ex1.Input("im_info", im_info); using var rfcnCls = new Mat(); using var rfcnBBox = new Mat(); using var rois = new Mat();// all rois ex1.Extract("rfcn_cls", rfcnCls); ex1.Extract("rfcn_bbox", rfcnBBox); ex1.Extract("rois", rois); // step2, extract bbox and score for each roi var classCandidates = new List <List <Object> >(); for (var i = 0; i < rois.C; i++) { using var ex2 = rfcn.CreateExtractor(); using var roi = rois.Channel(i); // get single roi ex2.Input("rfcn_cls", rfcnCls); ex2.Input("rfcn_bbox", rfcnBBox); ex2.Input("rois", roi); using var bboxPred = new Mat(); using var clsProb = new Mat(); ex2.Extract("bbox_pred", bboxPred); ex2.Extract("cls_prob", clsProb); var numClass = clsProb.W; // There is no equivalent to std::vector::resize in C# Resize(classCandidates, numClass); // find class id with highest score var label = 0; var score = 0.0f; for (var j = 0; j < numClass; j++) { var classScore = clsProb[j]; if (classScore > score) { label = j; score = classScore; } } // ignore background or low score if (label == 0 || score <= confidenceThresh) { continue; } // fprintf(stderr, "%d = %f\n", label, score); // unscale to image size var x1 = roi[0] / scale; var y1 = roi[1] / scale; var x2 = roi[2] / scale; var y2 = roi[3] / scale; var pbW = x2 - x1 + 1; var pbH = y2 - y1 + 1; // apply bbox regression var dx = bboxPred[4]; var dy = bboxPred[4 + 1]; var dw = bboxPred[4 + 2]; var dh = bboxPred[4 + 3]; var cx = x1 + pbW * 0.5f; var cy = y1 + pbH * 0.5f; var objCx = cx + pbW * dx; var objCy = cy + pbH * dy; var objW = pbW * Math.Exp(dw); var objH = pbH * Math.Exp(dh); var objX1 = (float)(objCx - objW * 0.5f); var objY1 = (float)(objCy - objH * 0.5f); var objX2 = (float)(objCx + objW * 0.5f); var objY2 = (float)(objCy + objH * 0.5f); // clip objX1 = Math.Max(Math.Min(objX1, bgr.Cols - 1), 0.0f); objY1 = Math.Max(Math.Min(objY1, bgr.Rows - 1), 0.0f); objX2 = Math.Max(Math.Min(objX2, bgr.Cols - 1), 0.0f); objY2 = Math.Max(Math.Min(objY2, bgr.Rows - 1), 0.0f); // append object var obj = new Object { Rect = new Rect <float>(objX1, objY1, objX2 - objX1 + 1, objY2 - objY1 + 1), Label = label, Prob = score }; classCandidates[label].Add(obj); } // post process objects.Clear(); for (var i = 0; i < (int)classCandidates.Count; i++) { var candidates = classCandidates[i]; QsortDescentInplace(candidates); var picked = new List <int>(); NmsSortedBBoxes(candidates, picked, nmsThreshold); for (var j = 0; j < picked.Count; j++) { var z = picked[j]; objects.Add(candidates[z]); } } QsortDescentInplace(objects); if (maxPerImage > 0 && maxPerImage < objects.Count) { Resize(objects, maxPerImage); } } return(0); }