Beispiel #1
0
        static void InvokeConv2DInternal
            (string kernelName, Tensor input, Tensor filter, Tensor bias, Tensor output)
        {
            Debug.Assert(filter.Shape[0] == 4);
            Debug.Assert(filter.Shape[1] == 4);
            Debug.Assert(filter.Shape[2] == input.Shape[2]);

            var outChannels = filter.Shape[3];

            Debug.Assert(bias.Shape[0] == outChannels);

            var compute     = _instance._computeAssets.Convolution;
            var kernel      = compute.FindKernel(kernelName);
            var threadCount = compute.GetThreadGroupSizeVector(kernel);

            Debug.Assert(outChannels % threadCount.x == 0 || outChannels == 3);

            compute.SetInts("InputShape", input.Shape);
            compute.SetInts("FilterShape", filter.Shape);
            compute.SetInts("OutputShape", output.Shape);

            compute.SetInts("InputIndexer", GpuBackendHelper.Indexer(input.Shape));
            compute.SetInts("FilterIndexer", GpuBackendHelper.Indexer(filter.Shape));
            compute.SetInts("OutputIndexer", GpuBackendHelper.Indexer(output.Shape));

            compute.SetBuffer(kernel, "Input", input.Buffer);
            compute.SetBuffer(kernel, "Filter", filter.Buffer);
            compute.SetBuffer(kernel, "Bias", bias.Buffer);
            compute.SetBuffer(kernel, "Output", output.Buffer);

            var groupCount = Mathf.Max(1, outChannels / threadCount.x);

            compute.Dispatch(kernel, groupCount, output.Shape[1], output.Shape[0]);
        }
Beispiel #2
0
        internal static void InvokeReorderWeights(Tensor input, Tensor output)
        {
            var compute = _instance._computeAssets.Setup;
            var kernel  = compute.FindKernel("ReorderWeights");

            Debug.Assert(input.Shape[0] == output.Shape[0]);
            Debug.Assert(input.Shape[1] == output.Shape[1]);
            Debug.Assert(input.Shape[2] == output.Shape[3]);
            Debug.Assert(input.Shape[3] == output.Shape[2]);

            compute.SetInts("InputShape", input.Shape);
            compute.SetInts("OutputShape", output.Shape);

            compute.SetInts("InputIndexer", GpuBackendHelper.Indexer(input.Shape));
            compute.SetInts("OutputIndexer", GpuBackendHelper.Indexer(output.Shape));

            compute.SetBuffer(kernel, "Input", input.Buffer);
            compute.SetBuffer(kernel, "Output", output.Buffer);

            compute.Dispatch(kernel, input.Shape[0], input.Shape[1], 1);
        }
Beispiel #3
0
        internal static void InvokeReorderWeights(Tensor input, Tensor output)
        {
            var compute = _instance._computeAssets.Setup;
            var kernel  = compute.FindKernel("ReorderWeights");

            Debug.Assert(input.Shape[0] == output.Shape[0]);
            Debug.Assert(input.Shape[1] == output.Shape[1]);
            Debug.Assert(input.Shape[2] == output.Shape[3]);
            Debug.Assert(input.Shape[3] == output.Shape[2]);

            var cb = SharedCommandBuffer;

            cb.SetComputeShapeAsIntParams(compute, "InputShape", input.Shape);
            cb.SetComputeShapeAsIntParams(compute, "OutputShape", output.Shape);

            cb.SetComputeIntParams(compute, "InputIndexer", GpuBackendHelper.Indexer(input.Shape));
            cb.SetComputeIntParams(compute, "OutputIndexer", GpuBackendHelper.Indexer(output.Shape));

            cb.SetComputeBufferParam(compute, kernel, "Input", input.Buffer);
            cb.SetComputeBufferParam(compute, kernel, "Output", output.Buffer);

            cb.DispatchCompute(compute, kernel, input.Shape[0], input.Shape[1], 1);
        }