/// <summary>
        /// Releases all resources used by this <see cref="NcnnObject"/>.
        /// </summary>
        /// <param name="disposing">Indicate value whether <see cref="IDisposable.Dispose"/> method was called.</param>
        private void Dispose(bool disposing)
        {
            if (this.IsDisposed)
            {
                return;
            }

            // pre-disposing
            {
                if (disposing)
                {
                    // managed dispose
                }

                // unmanaged dispose
            }

            this.IsDisposed = true;

            if (disposing)
            {
                // managed dispose
            }

            // unmanaged dispose
            Ncnn.DestroyGpuInstance();
        }
 public GlobalGpuInstance()
 {
     if (Ncnn.IsSupportVulkan)
     {
         Ncnn.CreateGpuInstance();
     }
 }
Ejemplo n.º 3
0
        private static int DetectPeleeNet(NcnnDotNet.OpenCV.Mat bgr, List <Object> objects, NcnnDotNet.Mat resized)
        {
            using (var peleenet = new Net())
            {
                if (Ncnn.IsSupportVulkan)
                {
                    peleenet.Opt.UseVulkanCompute = true;
                }

                // model is converted from https://github.com/eric612/MobileNet-YOLO
                // and can be downloaded from https://drive.google.com/open?id=1Wt6jKv13sBRMHgrGAJYlOlRF-o80pC0g
                // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
                peleenet.LoadParam("pelee.param");
                peleenet.LoadModel("pelee.bin");

                const int targetSize = 300;

                var imgW = bgr.Cols;
                var imgH = bgr.Rows;

                using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr, bgr.Cols, bgr.Rows, targetSize, targetSize);
                var meanVals = new[] { 103.9f, 116.7f, 123.6f };
                var normVals = new[] { 0.017f, 0.017f, 0.017f };
                @in.SubstractMeanNormalize(meanVals, normVals);

                using var ex = peleenet.CreateExtractor();
                //ex.SetNumThreads(4);

                ex.Input("data", @in);

                using var @out = new Mat();
                ex.Extract("detection_out", @out);

                //     printf("%d %d %d\n", out.w, out.h, out.c);
                objects.Clear();
                for (var i = 0; i < @out.H; i++)
                {
                    var values = @out.Row(i);

                    var @object = new Object();
                    @object.Label       = (int)values[0];
                    @object.Prob        = values[1];
                    @object.Rect.X      = values[2] * imgW;
                    @object.Rect.Y      = values[3] * imgH;
                    @object.Rect.Width  = values[4] * imgW - @object.Rect.X;
                    @object.Rect.Height = values[5] * imgH - @object.Rect.Y;

                    objects.Add(@object);
                }

                using var segOut = new Mat();
                ex.Extract("sigmoid", segOut);
                Ncnn.ResizeBilinear(segOut, resized, imgW, imgH);
            }

            return(0);
        }
Ejemplo n.º 4
0
        private int Detect(Mat image,
                           int resizedWidth,
                           int resizedHeight,
                           float scoreThreshold,
                           float nmsThreshold,
                           List <FaceInfo> faces)
        {
            if (image.IsEmpty)
            {
                Console.WriteLine("image is empty ,please check!");
                return(-1);
            }

            this._ImageH = image.H;
            this._ImageW = image.W;

            this._ScaleW = (float)this._ImageW / resizedWidth;
            this._ScaleH = (float)this._ImageH / resizedHeight;

            using (Mat @in = new Mat())
            {
                //scale
                this.DynamicScale(resizedWidth, resizedHeight);
                Ncnn.ResizeBilinear(image, @in, this._DW, this._DH);

                using (Extractor ex = this._Net.CreateExtractor())
                {
                    ex.Input("input.1", @in);

                    using (Mat heatMap = new Mat())
                    {
                        using (Mat scale = new Mat())
                        {
                            using (Mat offset = new Mat())
                            {
                                using (Mat landmarks = new Mat())
                                {
                                    ex.Extract("537", heatMap);
                                    ex.Extract("538", scale);
                                    ex.Extract("539", offset);
                                    ex.Extract("540", landmarks);

                                    this.Decode(heatMap, scale, offset, landmarks, faces, scoreThreshold, nmsThreshold);
                                    this.SquareBox(faces);
                                }
                            }
                        }
                    }
                }
            }

            return(0);
        }
Ejemplo n.º 5
0
        private static int DetectShuffleNetV2(NcnnDotNet.OpenCV.Mat bgr, List <float> clsScores)
        {
            using (var shuffleNetV2 = new Net())
            {
                if (Ncnn.IsSupportVulkan)
                {
                    shuffleNetV2.Opt.UseVulkanCompute = true;
                }

                // https://github.com/miaow1988/ShuffleNet_V2_pytorch_caffe
                // models can be downloaded from https://github.com/miaow1988/ShuffleNet_V2_pytorch_caffe/releases
                shuffleNetV2.LoadParam("shufflenet_v2_x0.5.param");
                shuffleNetV2.LoadModel("shufflenet_v2_x0.5.bin");

                using var @in = Mat.FromPixelsResize(bgr.Data, PixelType.Bgr, bgr.Cols, bgr.Rows, 224, 224);
                var normVals = new[] { 1 / 255.0f, 1 / 255.0f, 1 / 255.0f };
                @in.SubstractMeanNormalize(null, normVals);

                using var ex = shuffleNetV2.CreateExtractor();
                ex.Input("data", @in);

                using var @out = new Mat();
                ex.Extract("fc", @out);

                // manually call softmax on the fc output
                // convert result into probability
                // skip if your model already has softmax operation
                {
                    using var softmax = Ncnn.CreateLayer("Softmax");

                    using var pd = new ParamDict();
                    softmax.LoadParam(pd);

                    softmax.ForwardInplace(@out, shuffleNetV2.Opt);
                }

                using var @out2 = @out.Reshape(@out.W * @out.H * @out.C);

                clsScores.Capacity = @out2.W;
                for (var j = 0; j < @out2.W; j++)
                {
                    clsScores.Add(@out2[j]);
                }
            }

            return(0);
        }
Ejemplo n.º 6
0
        public void CheckClassification()
        {
            const string image = "goldfish.jpg";

            using (var m = NcnnDotNet.OpenCV.Cv2.ImRead(image, NcnnDotNet.OpenCV.CvLoadImage.Color))
            {
                if (m.IsEmpty)
                {
                    Assert.False(true, $"Failed to load {image}.");
                }

                if (Ncnn.IsSupportVulkan)
                {
                    Ncnn.CreateGpuInstance();
                }

                var clsScores = new List <float>();
                DetectSqueezeNet(m, clsScores);

                if (!clsScores.Any())
                {
                    Assert.False(true, $"Failed to classify {image}. Reason: No classification");
                }

                if (Ncnn.IsSupportVulkan)
                {
                    Ncnn.DestroyGpuInstance();
                }

                var top = PrintTop(clsScores);
                if (top.Item1 < 0.9)
                {
                    Assert.False(true, $"Failed to classify {image}. Reason: Low Score");
                }
                if (top.Item2 != 1)
                {
                    Assert.False(true, $"Failed to classify {image}. Reason: Wrong class");
                }
            }
        }
Ejemplo n.º 7
0
        private int Detect(Mat img, ICollection <FaceInfo> faceList)
        {
            if (img.IsEmpty)
            {
                Console.WriteLine("image is empty ,please check!");
                return(-1);
            }

            this._ImageH = img.H;
            this._ImageW = img.W;

            using (var @in = new Mat())
            {
                Ncnn.ResizeBilinear(img, @in, this._InW, this._InH);

                var ncnnImg = @in;
                ncnnImg.SubstractMeanNormalize(this._MeanVals, this._NormVals);

                var boundingBoxCollection = new List <FaceInfo>();
                //var validInput = new List<FaceInfo>();

                using (var ex = this._UltraFace.CreateExtractor())
                {
                    ex.SetNumThreads(this._NumThread);
                    ex.Input("input", ncnnImg);

                    using (var scores = new Mat())
                        using (var boxes = new Mat())
                        {
                            ex.Extract("scores", scores);
                            ex.Extract("boxes", boxes);

                            GenerateBBox(boundingBoxCollection, scores, boxes, this._ScoreThreshold, this._NumAnchors);
                            NonMaximumSuppression(boundingBoxCollection, faceList);
                        }
                }
            }

            return(0);
        }
Ejemplo n.º 8
0
        private static int Main(string[] args)
        {
            if (args.Length != 1)
            {
                Console.WriteLine($"Usage: {nameof(PeleeNetSSDSeg)} [imagepath]");
                return(-1);
            }

            var imagepath = args[0];

            using (var m = Cv2.ImRead(imagepath, CvLoadImage.Grayscale))
            {
                if (m.IsEmpty)
                {
                    Console.WriteLine($"cv::imread {imagepath} failed");
                    return(-1);
                }

                if (Ncnn.IsSupportVulkan)
                {
                    Ncnn.CreateGpuInstance();
                }

                var objects = new List <Object>();
                using var segOut = new NcnnDotNet.Mat();
                DetectPeleeNet(m, objects, segOut);

                if (Ncnn.IsSupportVulkan)
                {
                    Ncnn.DestroyGpuInstance();
                }

                DrawObjects(m, objects, segOut);
            }

            return(0);
        }
Ejemplo n.º 9
0
        private static int Main(string[] args)
        {
            if (args.Length != 1)
            {
                Console.WriteLine($"Usage: {nameof(ShuffleNetV2)} [imagepath]");
                return(-1);
            }

            var imagepath = args[0];

            using (var m = Cv2.ImRead(imagepath, CvLoadImage.Grayscale))
            {
                if (m.IsEmpty)
                {
                    Console.WriteLine($"cv::imread {imagepath} failed");
                    return(-1);
                }

                if (Ncnn.IsSupportVulkan)
                {
                    Ncnn.CreateGpuInstance();
                }

                var clsScores = new List <float>();
                DetectShuffleNetV2(m, clsScores);

                if (Ncnn.IsSupportVulkan)
                {
                    Ncnn.DestroyGpuInstance();
                }

                PrintTopK(clsScores, 3);
            }

            return(0);
        }
Ejemplo n.º 10
0
        private static int Main(string[] args)
        {
            if (args.Length != 1)
            {
                Console.WriteLine($"Usage: {nameof(RetinaFace)} [imagepath]");
                return(-1);
            }

            var imagepath = args[0];

            using (var m = Cv2.ImRead(imagepath, CvLoadImage.Grayscale))
            {
                if (m.IsEmpty)
                {
                    Console.WriteLine($"cv::imread {imagepath} failed");
                    return(-1);
                }

                if (Ncnn.IsSupportVulkan)
                {
                    Ncnn.CreateGpuInstance();
                }

                var keyPoints = new List <FaceObject>();
                DetectRetinaFace(m, keyPoints);

                if (Ncnn.IsSupportVulkan)
                {
                    Ncnn.DestroyGpuInstance();
                }

                DrawFaceObject(m, keyPoints);
            }

            return(0);
        }
Ejemplo n.º 11
0
        public DetectResult Detect(byte[] file)
        {
            using var frame = Cv2.ImDecode(file, CvLoadImage.Grayscale);
            if (frame.IsEmpty)
            {
                throw new NotSupportedException("This file is not supported!!");
            }

            if (Ncnn.IsSupportVulkan)
            {
                Ncnn.CreateGpuInstance();
            }

            using var inMat = Mat.FromPixels(frame.Data, NcnnDotNet.PixelType.Bgr2Rgb, frame.Cols, frame.Rows);

            var faceInfos = this._UltraFace.Detect(inMat).ToArray();

            if (Ncnn.IsSupportVulkan)
            {
                Ncnn.DestroyGpuInstance();
            }

            return(new DetectResult(frame.Cols, frame.Rows, faceInfos));
        }
Ejemplo n.º 12
0
        public DetectResult Detect(byte[] file)
        {
            using var m = Cv2.ImDecode(file, CvLoadImage.Grayscale);
            if (m.IsEmpty)
            {
                throw new NotSupportedException("This file is not supported!!");
            }

            if (Ncnn.IsSupportVulkan)
            {
                Ncnn.CreateGpuInstance();
            }

            var objects = new List <Object>();

            DetectYoloV3(m, objects);

            if (Ncnn.IsSupportVulkan)
            {
                Ncnn.DestroyGpuInstance();
            }

            return(new DetectResult(m.Cols, m.Rows, objects));
        }
Ejemplo n.º 13
0
        private static int Main(string[] args)
        {
            var loopCount  = 4;
            var numThreads = Ncnn.GetGpuCount();
            var powerSave  = 0;
            var gpuDevice  = -1;

            if (args.Length >= 1)
            {
                loopCount = int.Parse(args[0]);
            }
            if (args.Length >= 2)
            {
                numThreads = int.Parse(args[1]);
            }
            if (args.Length >= 3)
            {
                powerSave = int.Parse(args[2]);
            }
            if (args.Length >= 4)
            {
                gpuDevice = int.Parse(args[3]);
            }

            var useVulkanCompute = gpuDevice != -1;

            g_loop_count = loopCount;

            g_blob_pool_allocator.SetSizeCompareRatio(0.0f);
            g_workspace_pool_allocator.SetSizeCompareRatio(0.5f);

            if (useVulkanCompute)
            {
                g_warmup_loop_count   = 10;
                g_vkdev               = Ncnn.GetGpuDevice(gpuDevice);
                g_blob_vkallocator    = new VkBlobBufferAllocator(g_vkdev);
                g_staging_vkallocator = new VkStagingBufferAllocator(g_vkdev);
            }

            // default option
            using var opt          = new Option();
            opt.LightMode          = true;
            opt.NumThreads         = numThreads;
            opt.BlobAllocator      = g_blob_pool_allocator;
            opt.WorkspaceAllocator = g_workspace_pool_allocator;
            if (Ncnn.IsSupportVulkan)
            {
                opt.BlobVkAllocator      = g_blob_vkallocator;
                opt.WorkspaceVkAllocator = g_blob_vkallocator;
                opt.StagingVkAllocator   = g_staging_vkallocator;
            }
            opt.UseWinogradConvolution = true;
            opt.UseSgemmConvolution    = true;
            opt.UseInt8Inference       = true;
            opt.UseVulkanCompute       = useVulkanCompute;
            opt.UseFP16Packed          = true;
            opt.UseFP16Storage         = true;
            opt.UseFP16Arithmetic      = true;
            opt.UseInt8Storage         = true;
            opt.UseInt8Arithmetic      = true;
            opt.UsePackingLayout       = true;

            Ncnn.SetCpuPowerSave((PowerSave)powerSave);

            Ncnn.SetOmpDynamic(0);
            Ncnn.SetOmpNumThreads(numThreads);

            Console.WriteLine($"loop_count = {loopCount}");
            Console.WriteLine($"num_threads = {numThreads}");
            Console.WriteLine($"powersave = {(int)Ncnn.SetCpuPowerSave()}");
            Console.WriteLine($"gpu_device = {gpuDevice}");


            // run
            Benchmark("squeezenet", new Mat(227, 227, 3), opt);

            if (!Ncnn.IsSupportVulkan || !useVulkanCompute)
            {
                Benchmark("squeezenet_int8", new Mat(227, 227, 3), opt);
            }

            Benchmark("mobilenet", new Mat(224, 224, 3), opt);

            if (!Ncnn.IsSupportVulkan || !useVulkanCompute)
            {
                Benchmark("mobilenet_int8", new Mat(224, 224, 3), opt);
            }

            Benchmark("mobilenet_v2", new Mat(224, 224, 3), opt);

            //if (!Ncnn.IsSupportVulkan || !useVulkanCompute)
            //{
            //    Benchmark("mobilenet_v2_int8", new Mat(224, 224, 3), opt);
            //}

            Benchmark("mobilenet_v3", new Mat(224, 224, 3), opt);

            Benchmark("shufflenet", new Mat(224, 224, 3), opt);

            Benchmark("shufflenet_v2", new Mat(224, 224, 3), opt);

            Benchmark("mnasnet", new Mat(224, 224, 3), opt);

            Benchmark("proxylessnasnet", new Mat(224, 224, 3), opt);

            Benchmark("googlenet", new Mat(224, 224, 3), opt);

            if (!Ncnn.IsSupportVulkan || !useVulkanCompute)
            {
                Benchmark("googlenet_int8", new Mat(224, 224, 3), opt);
            }

            Benchmark("resnet18", new Mat(224, 224, 3), opt);

            if (!Ncnn.IsSupportVulkan || !useVulkanCompute)
            {
                Benchmark("resnet18_int8", new Mat(224, 224, 3), opt);
            }

            Benchmark("alexnet", new Mat(227, 227, 3), opt);

            Benchmark("vgg16", new Mat(224, 224, 3), opt);

            if (!Ncnn.IsSupportVulkan || !useVulkanCompute)
            {
                Benchmark("vgg16_int8", new Mat(224, 224, 3), opt);
            }

            Benchmark("resnet50", new Mat(224, 224, 3), opt);

            if (!Ncnn.IsSupportVulkan || !useVulkanCompute)
            {
                Benchmark("resnet50_int8", new Mat(224, 224, 3), opt);
            }

            Benchmark("squeezenet_ssd", new Mat(300, 300, 3), opt);

            if (!Ncnn.IsSupportVulkan || !useVulkanCompute)
            {
                Benchmark("squeezenet_ssd_int8", new Mat(300, 300, 3), opt);
            }

            Benchmark("mobilenet_ssd", new Mat(300, 300, 3), opt);

            if (!Ncnn.IsSupportVulkan || !useVulkanCompute)
            {
                Benchmark("mobilenet_ssd_int8", new Mat(300, 300, 3), opt);
            }

            Benchmark("mobilenet_yolo", new Mat(416, 416, 3), opt);

            Benchmark("mobilenetv2_yolov3", new Mat(352, 352, 3), opt);

            if (Ncnn.IsSupportVulkan)
            {
                g_blob_vkallocator?.Dispose();
                g_staging_vkallocator?.Dispose();
            }

            return(0);
        }
Ejemplo n.º 14
0
        private static void Benchmark(string comment, Mat @in, Option opt)
        {
            @in.Fill(0.01f);

            using var net = new Net();
            net.Opt       = opt;


            if (Ncnn.IsSupportVulkan)
            {
                if (net.Opt.UseVulkanCompute)
                {
                    net.SetVulkanDevice(g_vkdev);
                }
            }

            net.LoadParam($"{comment}.param");

            using var dr = new DataReaderFromEmpty();
            net.LoadModel(dr);

            g_blob_pool_allocator.Clear();
            g_workspace_pool_allocator.Clear();

            if (net.Opt.UseVulkanCompute)
            {
                g_blob_vkallocator.Clear();
                g_staging_vkallocator.Clear();
            }

            Thread.Sleep(10 * 1000);

            using var @out = new Mat();

            // warm up
            for (var i = 0; i < g_warmup_loop_count; i++)
            {
                using var ex = net.CreateExtractor();
                ex.Input("data", @in);
                ex.Extract("output", @out);
            }

            var timeMin = double.MaxValue;
            var timeMax = -double.MaxValue;
            var timeAvg = 0d;

            for (var i = 0; i < g_loop_count; i++)
            {
                double start = Ncnn.GetCurrentTime();

                {
                    using var ex = net.CreateExtractor();
                    ex.Input("data", @in);
                    ex.Extract("output", @out);
                }

                double end = Ncnn.GetCurrentTime();

                var time = end - start;

                timeMin  = Math.Min(timeMin, time);
                timeMax  = Math.Max(timeMax, time);
                timeAvg += time;
            }

            timeAvg /= g_loop_count;

            var com = comment.PadLeft(20, ' ');
            var min = $"{timeMin:F2}".PadLeft(7, ' ');
            var max = $"{timeMax:F2}".PadLeft(7, ' ');
            var avg = $"{timeAvg:F2}".PadLeft(7, ' ');

            Console.WriteLine($"{com}  min = {min}  max = {max}  avg = {avg}");
        }
Ejemplo n.º 15
0
 public static int TestLayer <T>(string layerType, ParamDict pd, ModelBin mb, Option opt, Mat a, float epsilon = 0.001f)
     where T : Layer, new()
 {
     return(TestLayer <T>(Ncnn.LayerToIndex(layerType), pd, mb, opt, a, epsilon));
 }
Ejemplo n.º 16
0
 public GlobalGpuInstance()
 {
     Ncnn.CreateGpuInstance();
 }
Ejemplo n.º 17
0
        public static int TestLayer <T>(int typeIndex, ParamDict pd, ModelBin mb, Option opt, Mat a, float epsilon = 0.001f)
            where T : Layer, new()
        {
            using (var op = Ncnn.CreateLayer <T>(typeIndex))
            {
                if (!op.SupportPacking)
                {
                    opt.UsePackingLayout = false;
                }

                VulkanDevice                   vkDev = null;
                VkWeightBufferAllocator        gWeightVkAllocator        = null;
                VkWeightStagingBufferAllocator gWeightStagingVkAllocator = null;
                VkBlobBufferAllocator          gBlobVkAllocator          = null;
                VkStagingBufferAllocator       gStagingVkAllocator       = null;

                if (Ncnn.IsSupportVulkan)
                {
                    vkDev = Ncnn.GetGpuDevice();

                    gWeightVkAllocator        = new VkWeightBufferAllocator(vkDev);
                    gWeightStagingVkAllocator = new VkWeightStagingBufferAllocator(vkDev);

                    gBlobVkAllocator    = new VkBlobBufferAllocator(vkDev);
                    gStagingVkAllocator = new VkStagingBufferAllocator(vkDev);

                    opt.BlobVkAllocator      = gBlobVkAllocator;
                    opt.WorkspaceVkAllocator = gBlobVkAllocator;
                    opt.StagingVkAllocator   = gStagingVkAllocator;

                    if (!vkDev.Info.SupportFP16Storage)
                    {
                        opt.UseFP16Storage = false;
                    }
                    if (!vkDev.Info.SupportFP16Packed)
                    {
                        opt.UseFP16Packed = false;
                    }

                    op.VkDev = vkDev;
                }

                op.LoadParam(pd);

                op.LoadModel(mb);

                op.CreatePipeline(opt);

                if (Ncnn.IsSupportVulkan)
                {
                    if (opt.UseVulkanCompute)
                    {
                        using var cmd = new VkTransfer(vkDev)
                              {
                                  WeightVkAllocator  = gWeightVkAllocator,
                                  StagingVkAllocator = gWeightStagingVkAllocator
                              };

                        op.UploadModel(cmd, opt);

                        cmd.SubmitAndWait();

                        gWeightStagingVkAllocator?.Clear();
                    }
                }

                using var b = new Mat();
                ((T)op).Forward(a, b, opt);

                var c = new Mat();
                {
                    Mat a4;
                    if (opt.UsePackingLayout)
                    {
                        a4 = new Mat();
                        Ncnn.ConvertPacking(a, a4, 4, opt);
                    }
                    else
                    {
                        a4 = a;
                    }

                    var c4 = new Mat();
                    op.Forward(a4, c4, opt);

                    if (opt.UsePackingLayout)
                    {
                        Ncnn.ConvertPacking(c4, c, 1, opt);
                        c4.Dispose();
                    }
                    else
                    {
                        c?.Dispose();
                        c = c4;
                    }
                }

                Mat d = null;

                try
                {
                    if (Ncnn.IsSupportVulkan)
                    {
                        d = new Mat();

                        if (opt.UseVulkanCompute)
                        {
                            using var a4 = new Mat();
                            Mat a4_fp16 = null;

                            try
                            {
                                // pack
                                Ncnn.ConvertPacking(a, a4, 4, opt);

                                // fp16
                                if (opt.UseFP16Storage || a4.ElemPack == 4 && opt.UseFP16Packed)
                                {
                                    a4_fp16 = new Mat();
                                    Ncnn.CastFloat32ToFloat16(a4, a4_fp16, opt);
                                }
                                else
                                {
                                    a4_fp16 = a4;
                                }

                                // upload
                                using var a4_fp16_gpu = new VkMat();
                                a4_fp16_gpu.CreateLike(a4_fp16, gBlobVkAllocator, gStagingVkAllocator);
                                a4_fp16_gpu.PrepareStagingBuffer();
                                a4_fp16_gpu.Upload(a4_fp16);

                                // forward
                                using var cmd = new VkCompute(vkDev);

                                cmd.RecordUpload(a4_fp16_gpu);

                                using var d4_fp16_gpu = new VkMat();
                                op.Forward(a4_fp16_gpu, d4_fp16_gpu, cmd, opt);

                                d4_fp16_gpu.PrepareStagingBuffer();

                                cmd.RecordDownload(d4_fp16_gpu);

                                cmd.SubmitAndWait();

                                // download
                                using var d4_fp16 = new Mat();
                                d4_fp16.CreateLike(d4_fp16_gpu);
                                d4_fp16_gpu.Download(d4_fp16);

                                // fp32
                                Mat d4 = null;

                                try
                                {
                                    if (opt.UseFP16Storage || d4_fp16.ElemPack == 4 && opt.UseFP16Packed)
                                    {
                                        d4 = new Mat();
                                        Ncnn.CastFloat16ToFloat32(d4_fp16, d4, opt);
                                    }
                                    else
                                    {
                                        d4 = d4_fp16;
                                    }

                                    // unpack
                                    Ncnn.ConvertPacking(d4, d, 1, opt);
                                }
                                finally
                                {
                                    d4?.Dispose();
                                }
                            }
                            finally
                            {
                                a4_fp16?.Dispose();
                            }
                        }
                    }

                    op.DestroyPipeline(opt);

                    // Must dispose here!!
                    op.Dispose();

                    if (Ncnn.IsSupportVulkan)
                    {
                        gBlobVkAllocator.Clear();
                        gStagingVkAllocator.Clear();
                        gWeightVkAllocator.Clear();

                        gBlobVkAllocator?.Dispose();
                        gStagingVkAllocator?.Dispose();
                        gWeightVkAllocator?.Dispose();
                        gWeightStagingVkAllocator?.Dispose();
                    }

                    if (CompareMat(b, c, epsilon) != 0)
                    {
                        Console.Error.WriteLine("test_layer failed cpu");
                        return(-1);
                    }

                    if (Ncnn.IsSupportVulkan)
                    {
                        if (opt.UseVulkanCompute && CompareMat(b, d, epsilon) != 0)
                        {
                            Console.Error.WriteLine("test_layer failed gpu");
                            return(-1);
                        }
                    }
                }
                finally
                {
                    c?.Dispose();
                    d?.Dispose();
                }
            }

            return(0);
        }