Ejemplo n.º 1
0
        Tensor Conv2DWinograd(Tensor X, Tensor K, Tensor B, Tensor O, int[] stride, int[] pad)
        {
            Assert.AreEqual(X.channels, K.kernelDepth);
            Assert.AreEqual(K.kernelCount, B.flatWidth);
            Assert.AreEqual(B.flatWidth, B.length);
            Assert.AreEqual(stride.Length, 2);
            Assert.AreEqual(pad.Length, 4);

            // Winograd
            // transform kernel
            TensorShape Kws   = new TensorShape(K.batch + 1, K.height + 1, K.width, K.channels);
            var         fn_wk = new ComputeFunc(m_Kernels, "KernelWinograd_3x3");

            fn_wk.SetTensor("X", K.shape, Pin(K).buffer);

            var Kw = Dispatch(fn_wk, Kws, K.kernelCount, X.channels, 1);

            var fn_w = new ComputeFunc(m_Kernels, "Conv2DWinograd_2x2_3x3");

            SetTensor(fn_w, "X", X);
            SetTensor(fn_w, "K", Kw);
            SetTensor(fn_w, "B", B);

            fn_w.shader.SetInts("_Pad", pad);

            var OW = Dispatch(fn_w, O.shape, Kw.kernelCount, IDivC(O.width, 2), IDivC(O.height, 2));

            return(OW);
        }
        public static ComputeKernel BestKernel(ComputeShader[] kernels, ComputeKernelLibrary.Entry[] entrees, bool verbose)
        {
            var bestEntry = entrees[0];
            var bestScore = long.MaxValue;

            for (int i = 0; i < entrees.Length; i++)
            {
                var score = CalculateEntryScore(kernels, entrees[i], verbose);
                if (score < bestScore)
                {
                    bestEntry = entrees[i];
                    bestScore = score;
                }
            }

            if (verbose)
            {
                D.Log(bestEntry.name);
            }

            var func = new ComputeFunc(kernels, bestEntry.name);

            if (bestEntry.loopStride > 0)
            {
                int preferedDispatch = (int)bestEntry.loopStride * (int)func.threadGroupSizeX;
                var kernel           = new ComputeKernel(func, new int[] { preferedDispatch, 1, 1 });
                kernel.shader.SetInt("_LoopStride", preferedDispatch);
                return(kernel);
            }
            else
            {
                return(new ComputeKernel(func, bestEntry.dispatch));
            }
        }
        internal static long CalculateEntryScore(ComputeShader[] kernels, ComputeKernelLibrary.Entry entry, bool verbose)
        {
            const long InvalidEntry = long.MaxValue;
            long       work         = InvalidEntry;

            try
            {
                if (!entry.valid)
                {
                    return(InvalidEntry);
                }

                // @TODO: @OPTIMIZE: cache threadGroupSize instead of creating ComputeFunc and querying every time
                var fn = new ComputeFunc(kernels, entry.name);

                if (fn.threadGroupSizeX * fn.threadGroupSizeY * fn.threadGroupSizeZ > ComputeInfo.maxComputeWorkGroupSize)
                {
                    return(InvalidEntry);
                }

                if (entry.strict)
                {
                    if (entry.dispatch[0] % fn.threadGroupSizeX != 0 ||
                        entry.dispatch[1] % fn.threadGroupSizeY != 0 ||
                        entry.dispatch[2] % fn.threadGroupSizeZ != 0)
                    {
                        return(InvalidEntry);
                    }
                }

                var x = (long)ComputeFunc.IntDivCeil(entry.dispatch[0], (int)fn.threadGroupSizeX);
                var y = (long)ComputeFunc.IntDivCeil(entry.dispatch[1], (int)fn.threadGroupSizeY);
                var z = (long)ComputeFunc.IntDivCeil(entry.dispatch[2], (int)fn.threadGroupSizeZ);

                if (entry.loopStride == 0 && (x > 65535 || y > 65535 || z > 65535))
                {
                    if (verbose)
                    {
                        D.LogWarning($"Kernel {entry.name} dispatch arguments out of range (any [{x},{y},{z}] > 65535), skipping..");
                    }

                    return(InvalidEntry);
                }

                work = x * y * z;

                work *= (int)fn.threadGroupSize;
                work  = (long)(entry.bigO * work);
            }
            catch (ArgumentException)
            {
                if (verbose)
                {
                    D.LogWarning($"Kernel processing failed, skipping {entry.name}");
                }
            }
            return(work);
        }
Ejemplo n.º 4
0
        public static ComputeKernel BestKernel(ComputeShader[] kernels, ComputeKernelLibrary.Entry[] entrees, bool verbose)
        {
            var  bestEntry = entrees[0];
            var  bestScore = InvalidEntry;
            bool foundKernelWithDevicePriority = false;

            for (int i = 0; i < entrees.Length; i++)
            {
                var  score = CalculateEntryScore(kernels, entrees[i], verbose);
                bool entryDevicePriority = entrees[i].devicePriority;

                if (score == InvalidEntry)
                {
                    continue;
                }

                // first time we encounter a kernel with device priority
                if (!foundKernelWithDevicePriority && entryDevicePriority)
                {
                    bestScore = score;
                    bestEntry = entrees[i];
                }
                // compute best entry: sort only on priority kernels (if some exist), else sort on non priority
                else if ((!foundKernelWithDevicePriority && !entryDevicePriority) || (foundKernelWithDevicePriority && entryDevicePriority))
                {
                    bestScore = (score <= bestScore) ? score : bestScore;
                    bestEntry = (score <= bestScore) ? entrees[i] : bestEntry;
                }

                foundKernelWithDevicePriority = foundKernelWithDevicePriority || entryDevicePriority;
            }

            if (verbose)
            {
                D.Log(bestEntry.name);
            }

            var func = new ComputeFunc(kernels, bestEntry.name);

            if (bestEntry.loopStride > 0)
            {
                int preferedDispatch = (int)bestEntry.loopStride * (int)func.threadGroupSizeX;
                var kernel           = new ComputeKernel(func, new int[] { preferedDispatch, 1, 1 });
                kernel.shader.SetInt("_LoopStride", preferedDispatch);
                return(kernel);
            }
            else
            {
                return(new ComputeKernel(func, bestEntry.dispatch));
            }
        }
        internal static DispatchInfo CreateFromComputeFunc(ComputeFunc computeFunc, int x, int y, int z)
        {
            var backend = computeFunc.computeShaderContext == ComputeShaderContext.Reference?"REF":"OPT";

            return(new DispatchInfo(backend, computeFunc.kernelName, x, y, z));
        }
Ejemplo n.º 6
0
 public ComputeKernel(ComputeFunc func_, int[] dispatch_)
 {
     func     = func_;
     dispatch = dispatch_;
 }