public static int TestLayer <T>(int typeIndex, ParamDict pd, ModelBin mb, Option opt, Mat a, float epsilon = 0.001f) where T : Layer, new() { using (var op = Ncnn.CreateLayer <T>(typeIndex)) { if (!op.SupportPacking) { opt.UsePackingLayout = false; } VulkanDevice vkDev = null; VkWeightBufferAllocator gWeightVkAllocator = null; VkWeightStagingBufferAllocator gWeightStagingVkAllocator = null; VkBlobBufferAllocator gBlobVkAllocator = null; VkStagingBufferAllocator gStagingVkAllocator = null; if (Ncnn.IsSupportVulkan) { vkDev = Ncnn.GetGpuDevice(); gWeightVkAllocator = new VkWeightBufferAllocator(vkDev); gWeightStagingVkAllocator = new VkWeightStagingBufferAllocator(vkDev); gBlobVkAllocator = new VkBlobBufferAllocator(vkDev); gStagingVkAllocator = new VkStagingBufferAllocator(vkDev); opt.BlobVkAllocator = gBlobVkAllocator; opt.WorkspaceVkAllocator = gBlobVkAllocator; opt.StagingVkAllocator = gStagingVkAllocator; if (!vkDev.Info.SupportFP16Storage) { opt.UseFP16Storage = false; } if (!vkDev.Info.SupportFP16Packed) { opt.UseFP16Packed = false; } op.VkDev = vkDev; } op.LoadParam(pd); op.LoadModel(mb); op.CreatePipeline(opt); if (Ncnn.IsSupportVulkan) { if (opt.UseVulkanCompute) { using var cmd = new VkTransfer(vkDev) { WeightVkAllocator = gWeightVkAllocator, StagingVkAllocator = gWeightStagingVkAllocator }; op.UploadModel(cmd, opt); cmd.SubmitAndWait(); gWeightStagingVkAllocator?.Clear(); } } using var b = new Mat(); ((T)op).Forward(a, b, opt); var c = new Mat(); { Mat a4; if (opt.UsePackingLayout) { a4 = new Mat(); Ncnn.ConvertPacking(a, a4, 4, opt); } else { a4 = a; } var c4 = new Mat(); op.Forward(a4, c4, opt); if (opt.UsePackingLayout) { Ncnn.ConvertPacking(c4, c, 1, opt); c4.Dispose(); } else { c?.Dispose(); c = c4; } } Mat d = null; try { if (Ncnn.IsSupportVulkan) { d = new Mat(); if (opt.UseVulkanCompute) { using var a4 = new Mat(); Mat a4_fp16 = null; try { // pack Ncnn.ConvertPacking(a, a4, 4, opt); // fp16 if (opt.UseFP16Storage || a4.ElemPack == 4 && opt.UseFP16Packed) { a4_fp16 = new Mat(); Ncnn.CastFloat32ToFloat16(a4, a4_fp16, opt); } else { a4_fp16 = a4; } // upload using var a4_fp16_gpu = new VkMat(); a4_fp16_gpu.CreateLike(a4_fp16, gBlobVkAllocator, gStagingVkAllocator); a4_fp16_gpu.PrepareStagingBuffer(); a4_fp16_gpu.Upload(a4_fp16); // forward using var cmd = new VkCompute(vkDev); cmd.RecordUpload(a4_fp16_gpu); using var d4_fp16_gpu = new VkMat(); op.Forward(a4_fp16_gpu, d4_fp16_gpu, cmd, opt); d4_fp16_gpu.PrepareStagingBuffer(); cmd.RecordDownload(d4_fp16_gpu); cmd.SubmitAndWait(); // download using var d4_fp16 = new Mat(); d4_fp16.CreateLike(d4_fp16_gpu); d4_fp16_gpu.Download(d4_fp16); // fp32 Mat d4 = null; try { if (opt.UseFP16Storage || d4_fp16.ElemPack == 4 && opt.UseFP16Packed) { d4 = new Mat(); Ncnn.CastFloat16ToFloat32(d4_fp16, d4, opt); } else { d4 = d4_fp16; } // unpack Ncnn.ConvertPacking(d4, d, 1, opt); } finally { d4?.Dispose(); } } finally { a4_fp16?.Dispose(); } } } op.DestroyPipeline(opt); // Must dispose here!! op.Dispose(); if (Ncnn.IsSupportVulkan) { gBlobVkAllocator.Clear(); gStagingVkAllocator.Clear(); gWeightVkAllocator.Clear(); gBlobVkAllocator?.Dispose(); gStagingVkAllocator?.Dispose(); gWeightVkAllocator?.Dispose(); gWeightStagingVkAllocator?.Dispose(); } if (CompareMat(b, c, epsilon) != 0) { Console.Error.WriteLine("test_layer failed cpu"); return(-1); } if (Ncnn.IsSupportVulkan) { if (opt.UseVulkanCompute && CompareMat(b, d, epsilon) != 0) { Console.Error.WriteLine("test_layer failed gpu"); return(-1); } } } finally { c?.Dispose(); d?.Dispose(); } } return(0); }