static void InvokeConv2DInternal (string kernelName, Tensor input, Tensor filter, Tensor bias, Tensor output) { Debug.Assert(filter.Shape[0] == 4); Debug.Assert(filter.Shape[1] == 4); Debug.Assert(filter.Shape[2] == input.Shape[2]); var outChannels = filter.Shape[3]; Debug.Assert(bias.Shape[0] == outChannels); var compute = _instance._computeAssets.Convolution; var kernel = compute.FindKernel(kernelName); var threadCount = compute.GetThreadGroupSizeVector(kernel); Debug.Assert(outChannels % threadCount.x == 0 || outChannels == 3); compute.SetInts("InputShape", input.Shape); compute.SetInts("FilterShape", filter.Shape); compute.SetInts("OutputShape", output.Shape); compute.SetInts("InputIndexer", GpuBackendHelper.Indexer(input.Shape)); compute.SetInts("FilterIndexer", GpuBackendHelper.Indexer(filter.Shape)); compute.SetInts("OutputIndexer", GpuBackendHelper.Indexer(output.Shape)); compute.SetBuffer(kernel, "Input", input.Buffer); compute.SetBuffer(kernel, "Filter", filter.Buffer); compute.SetBuffer(kernel, "Bias", bias.Buffer); compute.SetBuffer(kernel, "Output", output.Buffer); var groupCount = Mathf.Max(1, outChannels / threadCount.x); compute.Dispatch(kernel, groupCount, output.Shape[1], output.Shape[0]); }
internal static void InvokeReorderWeights(Tensor input, Tensor output) { var compute = _instance._computeAssets.Setup; var kernel = compute.FindKernel("ReorderWeights"); Debug.Assert(input.Shape[0] == output.Shape[0]); Debug.Assert(input.Shape[1] == output.Shape[1]); Debug.Assert(input.Shape[2] == output.Shape[3]); Debug.Assert(input.Shape[3] == output.Shape[2]); compute.SetInts("InputShape", input.Shape); compute.SetInts("OutputShape", output.Shape); compute.SetInts("InputIndexer", GpuBackendHelper.Indexer(input.Shape)); compute.SetInts("OutputIndexer", GpuBackendHelper.Indexer(output.Shape)); compute.SetBuffer(kernel, "Input", input.Buffer); compute.SetBuffer(kernel, "Output", output.Buffer); compute.Dispatch(kernel, input.Shape[0], input.Shape[1], 1); }
internal static void InvokeReorderWeights(Tensor input, Tensor output) { var compute = _instance._computeAssets.Setup; var kernel = compute.FindKernel("ReorderWeights"); Debug.Assert(input.Shape[0] == output.Shape[0]); Debug.Assert(input.Shape[1] == output.Shape[1]); Debug.Assert(input.Shape[2] == output.Shape[3]); Debug.Assert(input.Shape[3] == output.Shape[2]); var cb = SharedCommandBuffer; cb.SetComputeShapeAsIntParams(compute, "InputShape", input.Shape); cb.SetComputeShapeAsIntParams(compute, "OutputShape", output.Shape); cb.SetComputeIntParams(compute, "InputIndexer", GpuBackendHelper.Indexer(input.Shape)); cb.SetComputeIntParams(compute, "OutputIndexer", GpuBackendHelper.Indexer(output.Shape)); cb.SetComputeBufferParam(compute, kernel, "Input", input.Buffer); cb.SetComputeBufferParam(compute, kernel, "Output", output.Buffer); cb.DispatchCompute(compute, kernel, input.Shape[0], input.Shape[1], 1); }