private static int Main(string[] args) { var loopCount = 4; var numThreads = Ncnn.GetGpuCount(); var powerSave = 0; var gpuDevice = -1; if (args.Length >= 1) { loopCount = int.Parse(args[0]); } if (args.Length >= 2) { numThreads = int.Parse(args[1]); } if (args.Length >= 3) { powerSave = int.Parse(args[2]); } if (args.Length >= 4) { gpuDevice = int.Parse(args[3]); } var useVulkanCompute = gpuDevice != -1; g_loop_count = loopCount; g_blob_pool_allocator.SetSizeCompareRatio(0.0f); g_workspace_pool_allocator.SetSizeCompareRatio(0.5f); if (useVulkanCompute) { g_warmup_loop_count = 10; g_vkdev = Ncnn.GetGpuDevice(gpuDevice); g_blob_vkallocator = new VkBlobBufferAllocator(g_vkdev); g_staging_vkallocator = new VkStagingBufferAllocator(g_vkdev); } // default option using var opt = new Option(); opt.LightMode = true; opt.NumThreads = numThreads; opt.BlobAllocator = g_blob_pool_allocator; opt.WorkspaceAllocator = g_workspace_pool_allocator; if (Ncnn.IsSupportVulkan) { opt.BlobVkAllocator = g_blob_vkallocator; opt.WorkspaceVkAllocator = g_blob_vkallocator; opt.StagingVkAllocator = g_staging_vkallocator; } opt.UseWinogradConvolution = true; opt.UseSgemmConvolution = true; opt.UseInt8Inference = true; opt.UseVulkanCompute = useVulkanCompute; opt.UseFP16Packed = true; opt.UseFP16Storage = true; opt.UseFP16Arithmetic = true; opt.UseInt8Storage = true; opt.UseInt8Arithmetic = true; opt.UsePackingLayout = true; Ncnn.SetCpuPowerSave((PowerSave)powerSave); Ncnn.SetOmpDynamic(0); Ncnn.SetOmpNumThreads(numThreads); Console.WriteLine($"loop_count = {loopCount}"); Console.WriteLine($"num_threads = {numThreads}"); Console.WriteLine($"powersave = {(int)Ncnn.SetCpuPowerSave()}"); Console.WriteLine($"gpu_device = {gpuDevice}"); // run Benchmark("squeezenet", new Mat(227, 227, 3), opt); if (!Ncnn.IsSupportVulkan || !useVulkanCompute) { Benchmark("squeezenet_int8", new Mat(227, 227, 3), opt); } Benchmark("mobilenet", new Mat(224, 224, 3), opt); if (!Ncnn.IsSupportVulkan || !useVulkanCompute) { Benchmark("mobilenet_int8", new Mat(224, 224, 3), opt); } Benchmark("mobilenet_v2", new Mat(224, 224, 3), opt); //if (!Ncnn.IsSupportVulkan || !useVulkanCompute) //{ // Benchmark("mobilenet_v2_int8", new Mat(224, 224, 3), opt); //} Benchmark("mobilenet_v3", new Mat(224, 224, 3), opt); Benchmark("shufflenet", new Mat(224, 224, 3), opt); Benchmark("shufflenet_v2", new Mat(224, 224, 3), opt); Benchmark("mnasnet", new Mat(224, 224, 3), opt); Benchmark("proxylessnasnet", new Mat(224, 224, 3), opt); Benchmark("googlenet", new Mat(224, 224, 3), opt); if (!Ncnn.IsSupportVulkan || !useVulkanCompute) { Benchmark("googlenet_int8", new Mat(224, 224, 3), opt); } Benchmark("resnet18", new Mat(224, 224, 3), opt); if (!Ncnn.IsSupportVulkan || !useVulkanCompute) { Benchmark("resnet18_int8", new Mat(224, 224, 3), opt); } Benchmark("alexnet", new Mat(227, 227, 3), opt); Benchmark("vgg16", new Mat(224, 224, 3), opt); if (!Ncnn.IsSupportVulkan || !useVulkanCompute) { Benchmark("vgg16_int8", new Mat(224, 224, 3), opt); } Benchmark("resnet50", new Mat(224, 224, 3), opt); if (!Ncnn.IsSupportVulkan || !useVulkanCompute) { Benchmark("resnet50_int8", new Mat(224, 224, 3), opt); } Benchmark("squeezenet_ssd", new Mat(300, 300, 3), opt); if (!Ncnn.IsSupportVulkan || !useVulkanCompute) { Benchmark("squeezenet_ssd_int8", new Mat(300, 300, 3), opt); } Benchmark("mobilenet_ssd", new Mat(300, 300, 3), opt); if (!Ncnn.IsSupportVulkan || !useVulkanCompute) { Benchmark("mobilenet_ssd_int8", new Mat(300, 300, 3), opt); } Benchmark("mobilenet_yolo", new Mat(416, 416, 3), opt); Benchmark("mobilenetv2_yolov3", new Mat(352, 352, 3), opt); if (Ncnn.IsSupportVulkan) { g_blob_vkallocator?.Dispose(); g_staging_vkallocator?.Dispose(); } return(0); }
public static int TestLayer <T>(int typeIndex, ParamDict pd, ModelBin mb, Option opt, Mat a, float epsilon = 0.001f) where T : Layer, new() { using (var op = Ncnn.CreateLayer <T>(typeIndex)) { if (!op.SupportPacking) { opt.UsePackingLayout = false; } VulkanDevice vkDev = null; VkWeightBufferAllocator gWeightVkAllocator = null; VkWeightStagingBufferAllocator gWeightStagingVkAllocator = null; VkBlobBufferAllocator gBlobVkAllocator = null; VkStagingBufferAllocator gStagingVkAllocator = null; if (Ncnn.IsSupportVulkan) { vkDev = Ncnn.GetGpuDevice(); gWeightVkAllocator = new VkWeightBufferAllocator(vkDev); gWeightStagingVkAllocator = new VkWeightStagingBufferAllocator(vkDev); gBlobVkAllocator = new VkBlobBufferAllocator(vkDev); gStagingVkAllocator = new VkStagingBufferAllocator(vkDev); opt.BlobVkAllocator = gBlobVkAllocator; opt.WorkspaceVkAllocator = gBlobVkAllocator; opt.StagingVkAllocator = gStagingVkAllocator; if (!vkDev.Info.SupportFP16Storage) { opt.UseFP16Storage = false; } if (!vkDev.Info.SupportFP16Packed) { opt.UseFP16Packed = false; } op.VkDev = vkDev; } op.LoadParam(pd); op.LoadModel(mb); op.CreatePipeline(opt); if (Ncnn.IsSupportVulkan) { if (opt.UseVulkanCompute) { using var cmd = new VkTransfer(vkDev) { WeightVkAllocator = gWeightVkAllocator, StagingVkAllocator = gWeightStagingVkAllocator }; op.UploadModel(cmd, opt); cmd.SubmitAndWait(); gWeightStagingVkAllocator?.Clear(); } } using var b = new Mat(); ((T)op).Forward(a, b, opt); var c = new Mat(); { Mat a4; if (opt.UsePackingLayout) { a4 = new Mat(); Ncnn.ConvertPacking(a, a4, 4, opt); } else { a4 = a; } var c4 = new Mat(); op.Forward(a4, c4, opt); if (opt.UsePackingLayout) { Ncnn.ConvertPacking(c4, c, 1, opt); c4.Dispose(); } else { c?.Dispose(); c = c4; } } Mat d = null; try { if (Ncnn.IsSupportVulkan) { d = new Mat(); if (opt.UseVulkanCompute) { using var a4 = new Mat(); Mat a4_fp16 = null; try { // pack Ncnn.ConvertPacking(a, a4, 4, opt); // fp16 if (opt.UseFP16Storage || a4.ElemPack == 4 && opt.UseFP16Packed) { a4_fp16 = new Mat(); Ncnn.CastFloat32ToFloat16(a4, a4_fp16, opt); } else { a4_fp16 = a4; } // upload using var a4_fp16_gpu = new VkMat(); a4_fp16_gpu.CreateLike(a4_fp16, gBlobVkAllocator, gStagingVkAllocator); a4_fp16_gpu.PrepareStagingBuffer(); a4_fp16_gpu.Upload(a4_fp16); // forward using var cmd = new VkCompute(vkDev); cmd.RecordUpload(a4_fp16_gpu); using var d4_fp16_gpu = new VkMat(); op.Forward(a4_fp16_gpu, d4_fp16_gpu, cmd, opt); d4_fp16_gpu.PrepareStagingBuffer(); cmd.RecordDownload(d4_fp16_gpu); cmd.SubmitAndWait(); // download using var d4_fp16 = new Mat(); d4_fp16.CreateLike(d4_fp16_gpu); d4_fp16_gpu.Download(d4_fp16); // fp32 Mat d4 = null; try { if (opt.UseFP16Storage || d4_fp16.ElemPack == 4 && opt.UseFP16Packed) { d4 = new Mat(); Ncnn.CastFloat16ToFloat32(d4_fp16, d4, opt); } else { d4 = d4_fp16; } // unpack Ncnn.ConvertPacking(d4, d, 1, opt); } finally { d4?.Dispose(); } } finally { a4_fp16?.Dispose(); } } } op.DestroyPipeline(opt); // Must dispose here!! op.Dispose(); if (Ncnn.IsSupportVulkan) { gBlobVkAllocator.Clear(); gStagingVkAllocator.Clear(); gWeightVkAllocator.Clear(); gBlobVkAllocator?.Dispose(); gStagingVkAllocator?.Dispose(); gWeightVkAllocator?.Dispose(); gWeightStagingVkAllocator?.Dispose(); } if (CompareMat(b, c, epsilon) != 0) { Console.Error.WriteLine("test_layer failed cpu"); return(-1); } if (Ncnn.IsSupportVulkan) { if (opt.UseVulkanCompute && CompareMat(b, d, epsilon) != 0) { Console.Error.WriteLine("test_layer failed gpu"); return(-1); } } } finally { c?.Dispose(); d?.Dispose(); } } return(0); }