Beispiel #1
0
        public override Tensor Normalization(Tensor X, Tensor S, Tensor B, int pool, int axis, float epsilon)
        {
            if (axis != 3 && axis != -1)
            {
                throw new NotImplementedException();
            }

            if (pool <= 0)
            {
                pool = X.batch;
            }

            if (pool > 1)
            {
                throw new NotImplementedException(); // @TODO: support other types of Normalization at test time
            }
            // Currently supported only pool=1 (InstanceNormalization)

            var meanVariance = GlobalAvgVariancePool2D(X);

            var O  = NewTensor(X.shape);
            var fn = BestKernel(ComputeKernelLibrary.NormalizationTail(X.shape, O.shape));

            fn.SetTensor("X", X.shape, Pin(X).buffer);
            fn.SetTensor("O", O.shape, Pin(O).buffer);
            fn.SetTensor("W", meanVariance.shape, Pin(meanVariance).buffer);

            fn.shader.SetFloat("_Epsilon", epsilon);

            fn.Dispatch();

            return(ScaleBias(O, S, B));
        }
Beispiel #2
0
        protected override Tensor Pool2D(string kernelName, Tensor X, int[] pool, int[] stride, int[] pad)
        {
            Assert.AreEqual(pool.Length, 2);
            Assert.AreEqual(stride.Length, 2);

            if (pad[0] == 0 && pad[1] == 0 && pad[2] == 0 && pad[3] == 0)
            {
                kernelName += "_NoPads";
            }

            var O  = NewTensor(X.shape.ApplyPool(pool, stride, pad));
            var fn = BestKernel(ComputeKernelLibrary.Pool2D(X.shape, O.shape, kernelName));

            if (printKernels)
            {
                D.Log($"{fn.func.kernelName}: {O.shape} = {X.shape} ^ pool: {pool[0]},{pool[1]} stride: {stride[0]},{stride[1]} pad:{pad[0]},{pad[1]}");
            }

            fn.SetTensor("X", X.shape, Pin(X).buffer);
            fn.SetTensor("O", O.shape, Pin(O).buffer);

            fn.shader.SetInts("_Pool", pool);
            fn.shader.SetInts("_Stride", stride);
            fn.shader.SetInts("_Pad", pad);

            fn.Dispatch();
            return(O);
        }
Beispiel #3
0
        public override Tensor DepthwiseConv2D(Tensor X, Tensor K, Tensor B, int[] stride, int[] pad)
        {
            if (K.kernelDepth != 1)
            {
                return(base.DepthwiseConv2D(X, K, B, stride, pad));
            }

            Assert.AreEqual(K.kernelDepth, 1);
            Assert.AreEqual(K.kernelCount, X.channels);
            Assert.AreEqual(K.kernelCount, B.flatWidth);
            Assert.AreEqual(B.flatWidth, B.length);
            Assert.AreEqual(stride.Length, 2);
            Assert.AreEqual(pad.Length, 4);

            var O  = NewTensor(X.shape.ApplyKernel(K.shape, stride, pad));
            var fn = BestKernel(ComputeKernelLibrary.DepthwiseConv2D(X.shape, K.shape, O.shape));

            if (printKernels)
            {
                Debug.Log($"{fn.func.kernelName}: {O.shape} = {X.shape} ∆ {K.shape} stride: {stride[0]},{stride[1]} pad:{pad[0]},{pad[1]}");
            }

            fn.SetTensor("X", X.shape, Pin(X).buffer);
            fn.SetTensor("O", O.shape, Pin(O).buffer);
            fn.SetTensorDecl("K", K.shape, Pin(K).offset);
            fn.SetTensorDecl("B", B.shape, Pin(B).offset);
            Assert.AreEqual(Pin(K).buffer, Pin(B).buffer);
            fn.SetTensorBuffer("WBK", Pin(K).buffer);

            fn.shader.SetInts("_Stride", stride);
            fn.shader.SetInts("_Pad", pad);

            fn.Dispatch();
            return(O);
        }
Beispiel #4
0
        protected override Tensor ApplyPadding(Tensor X, int[] pad, string kernelName, float constant = 0.0f)
        {
            Assert.AreEqual(pad.Length, 4);

            var O  = NewTensor(X.shape.ApplyBorder(pad));
            var fn = BestKernel(ComputeKernelLibrary.Padding(X.shape, O.shape, kernelName));

            fn.SetTensor("X", X.shape, Pin(X).buffer);
            fn.SetTensor("O", O.shape, Pin(O).buffer);

            fn.shader.SetInts("_Pad", pad);

            if (kernelName == "Border2D")
            {
                // NOTE: negative "pad" variable will crop X tensor
                int croppedWidth  = X.width - Math.Max(0, -pad[2]);
                int croppedHeight = X.height - Math.Max(0, -pad[3]);
                var croppedSize   = new int[] { 0, 0 };
                croppedSize[0] = croppedWidth;
                croppedSize[1] = croppedHeight;

                fn.shader.SetInts("_Pool", croppedSize);
                fn.shader.SetFloat("_Beta", constant);
            }

            fn.Dispatch();
            return(O);
        }
Beispiel #5
0
        // ---------------------------------------------------------------------------------
        public override Tensor Dense(Tensor X, Tensor W, Tensor B)
        {
            Assert.IsTrue(W.dimensions <= 2);
            Assert.AreEqual(B.flatWidth, B.length);
            Assert.AreEqual(X.flatWidth, W.flatHeight);

            var O = NewTensor(X.flatHeight, W.flatWidth);

            var itemSize = 4; // @TODO: itemSizeInBytes == 2 | float16
            var fn       = BestKernel(ComputeKernelLibrary.Dense(X.shape, W.shape, O.shape, itemSize >> 2));

            if (printKernels)
            {
                Debug.Log($"{fn.func.kernelName}: {O.shape} = {X.shape} * {W.shape}");
            }

            fn.SetTensor("X", X.shape, Pin(X).buffer);
            fn.SetTensor("O", O.shape, Pin(O).buffer);
            fn.SetTensorDecl("W", W.shape, Pin(W).offset);
            fn.SetTensorDecl("B", B.shape, Pin(B).offset);
            Assert.AreEqual(Pin(W).buffer, Pin(B).buffer);
            fn.SetTensorBuffer("WBK", Pin(W).buffer);

            fn.Dispatch();
            return(O);
        }
Beispiel #6
0
        public override Tensor ElementwiseWithBroadcast(string kernelName, Tensor[] tensors)
        {
            Assert.IsTrue(tensors.Length > 0);

            Tensor outputTensor1 = NewTensor(TensorExtensions.MaxShape(tensors));
            Tensor outputTensor2 = null;

            if (tensors.Length > 2)
            {
                outputTensor2 = NewTensor(TensorExtensions.MaxShape(tensors));
            }

            var X  = tensors[0];
            var fn = BestKernel(ComputeKernelLibrary.Broadcast(X.shape, outputTensor1.shape, kernelName));

            Tensor O = null;

            for (int t = 1; t < tensors.Length; ++t)
            {
                var B = tensors[t];
                O = (t % 2 == 1)?outputTensor1:outputTensor2;
                fn.SetTensor("X", X.shape, Pin(X).buffer);
                fn.SetTensor("O", O.shape, Pin(O).buffer);
                fn.SetTensor("B", B.shape, Pin(B).buffer, Pin(B).offset);

                fn.Dispatch();

                X = O;
            }

            return(O);
        }
Beispiel #7
0
        public override Tensor LogicalNot(Tensor X)
        {
            var O  = NewTensor(X.shape);
            var fn = BestKernel(ComputeKernelLibrary.Activation(X.shape, O.shape, "LogicalNot"));

            fn.SetTensor("X", X.shape, Pin(X).buffer);
            fn.SetTensor("O", O.shape, Pin(O).buffer);

            fn.Dispatch();
            return(O);
        }
Beispiel #8
0
        public override Tensor GlobalAvgVariancePool2D(Tensor X)
        {
            var O  = NewTensor(X.batch, 2, 1, X.channels);
            var fn = BestKernel(ComputeKernelLibrary.GlobalPool2D(X.shape, O.shape, "GlobalAvgVariancePool2D"));

            fn.SetTensor("X", X.shape, Pin(X).buffer);
            fn.SetTensor("O", O.shape, Pin(O).buffer);

            fn.Dispatch();
            return(O);
        }
Beispiel #9
0
        public override Tensor LogSoftmax(Tensor X)
        {
            var O  = NewTensor(X.shape);
            var fn = BestKernel(ComputeKernelLibrary.LogSoftmax(X.shape, O.shape));

            fn.SetTensor("X", X.shape, Pin(X).buffer);
            fn.SetTensor("O", O.shape, Pin(O).buffer);

            fn.Dispatch();
            return(O);
        }
Beispiel #10
0
        protected override Tensor Activation(string kernelName, Tensor X, float alpha = 0f, float beta = 0f)
        {
            var O  = NewTensor(X.shape);
            var fn = BestKernel(ComputeKernelLibrary.Activation(X.shape, O.shape, kernelName));

            if (printKernels)
            {
                D.Log(fn.func.kernelName);
            }

            fn.SetTensor("X", X.shape, Pin(X).buffer);
            fn.SetTensor("O", O.shape, Pin(O).buffer);

            fn.shader.SetFloat("_Alpha", alpha);
            fn.shader.SetFloat("_Beta", beta);

            fn.Dispatch();
            return(O);
        }
Beispiel #11
0
        public override Tensor PRelu(Tensor X, Tensor S)
        {
            Assert.IsTrue((X.flatWidth == S.flatWidth) || (S.flatWidth == 1));

            var O  = NewTensor(X.shape);
            var fn = BestKernel(ComputeKernelLibrary.PRelu(X.shape, O.shape));

            if (printKernels)
            {
                D.Log(fn.func.kernelName);
            }

            fn.SetTensor("X", X.shape, Pin(X).buffer);
            fn.SetTensor("O", O.shape, Pin(O).buffer);
            fn.SetTensor("W", S.shape, Pin(S).buffer);

            fn.Dispatch();
            return(O);
        }
Beispiel #12
0
        public override Tensor Upsample2D(Tensor X, int[] size)
        {
            Assert.AreEqual(size.Length, 2);

            var O  = NewTensor(X.batch, X.height * size[1], X.width * size[0], X.channels);
            var fn = BestKernel(ComputeKernelLibrary.Upsample2D(X.shape, O.shape));

            if (printKernels)
            {
                D.Log($"{fn.func.kernelName}: {O.shape} = {X.shape} ^ size: {size[0]},{size[1]}");
            }

            fn.SetTensor("X", X.shape, Pin(X).buffer);
            fn.SetTensor("O", O.shape, Pin(O).buffer);

            fn.shader.SetInts("_Pool", size);

            fn.Dispatch();
            return(O);
        }
Beispiel #13
0
        protected override Tensor CopyAndReshape(Tensor X, TensorShape newShape)
        {
            var copyShape = X.shape;

            Assert.AreEqual(copyShape.length, newShape.length);

            // NOTE: "Copy" kernel copies tensor data while preserving the shape
            // However here in CopyAndReshape we want to both copy and change the shape,
            // To be able to piggyback "Copy" kernel we specify new shape when allocating destination tensor,
            // but use shape identical to source when copying.

            var O  = NewTensor(newShape);
            var fn = BestKernel(ComputeKernelLibrary.Copy(copyShape, copyShape));

            fn.SetTensor("X", copyShape, Pin(X).buffer);
            fn.SetTensor("O", copyShape, Pin(O).buffer);

            fn.shader.SetInts("_Pad", new int[] { 0, 0, 0, 0 });

            fn.Dispatch();
            return(O);
        }
Beispiel #14
0
        public override Tensor ScaleBias(Tensor X, Tensor S, Tensor B)
        {
            Assert.AreEqual(X.channels, B.channels); Assert.AreEqual(X.channels, S.channels);
            Assert.AreEqual(B.length, B.channels); Assert.AreEqual(S.length, S.channels);

            var O  = NewTensor(X.shape);
            var fn = BestKernel(ComputeKernelLibrary.ScaleBias(X.shape, O.shape));

            if (printKernels)
            {
                D.Log(fn.func.kernelName);
            }

            fn.SetTensor("X", X.shape, Pin(X).buffer);
            fn.SetTensor("O", O.shape, Pin(O).buffer);
            fn.SetTensorDecl("W", S.shape, Pin(S).offset);
            fn.SetTensorDecl("B", B.shape, Pin(B).offset);
            Assert.AreEqual(Pin(S).buffer, Pin(B).buffer);
            fn.SetTensorBuffer("WBK", Pin(S).buffer);

            fn.Dispatch();
            return(O);
        }
Beispiel #15
0
        protected virtual Tensor GlobalPool2D(string smallKernelName, string globalKernelName, Tensor X)
        {
            // downsample with pyramid approach
            while (X.height * X.width >= 256)
            {
                var pool   = new [] { 4, 4 };
                var stride = pool;
                var noPad  = new[] { 0, 0, 0, 0 };

                var lastLength = X.length;
                X = Pool2D(smallKernelName, X, pool, stride, noPad);
                Assert.IsTrue(X.length < lastLength);
            }

            var O  = NewTensor(X.batch, 1, 1, X.channels);
            var fn = BestKernel(ComputeKernelLibrary.GlobalPool2D(X.shape, O.shape, globalKernelName));

            fn.SetTensor("X", X.shape, Pin(X).buffer);
            fn.SetTensor("O", O.shape, Pin(O).buffer);

            fn.Dispatch();
            return(O);
        }
Beispiel #16
0
        public override Tensor Conv2DTrans(Tensor X, Tensor K, Tensor B, int[] stride, int[] pad, int[] outputAdjustment)
        {
            Assert.AreEqual(X.channels, K.kernelDepth);
            Assert.AreEqual(K.kernelCount, B.flatWidth);
            Assert.AreEqual(B.flatWidth, B.length);
            Assert.AreEqual(stride.Length, 2);
            Assert.AreEqual(pad.Length, 4);

            var O  = NewTensor(X.shape.ApplyKernelInverse(K.shape, stride, pad, outputAdjustment));
            var fn = BestKernel(ComputeKernelLibrary.Conv2DTrans(X.shape, K.shape, O.shape));

            if (printKernels)
            {
                D.Log($"{fn.func.kernelName}: {O.shape} = {X.shape} @ {K.shape} stride: {stride[0]},{stride[1]} pad:{pad[0]},{pad[1]}");
            }

            pad = new int[]
            {
                K.kernelWidth - pad[0] - 1, K.kernelHeight - pad[1] - 1,
                K.kernelWidth - pad[2] - 1, K.kernelHeight - pad[3] - 1
            };

            fn.SetTensor("X", X.shape, Pin(X).buffer);
            fn.SetTensor("O", O.shape, Pin(O).buffer);
            fn.SetTensorDecl("K", K.shape, Pin(K).offset);
            fn.SetTensorDecl("B", B.shape, Pin(B).offset);
            Assert.AreEqual(Pin(K).buffer, Pin(B).buffer);
            fn.SetTensorBuffer("WBK", Pin(K).buffer);

            fn.shader.SetInts("_Pad", pad);
            fn.shader.SetInts("_Stride", stride);

            fn.Dispatch();

            return(O);
        }
Beispiel #17
0
        public override Tensor Concat(Tensor[] tensors, int axis)
        {
            var O = NewTensor(TensorExtensions.Concat(tensors.Select(t => t.shape).ToArray(), axis));

            var offsets = new int[] { 0, 0, 0, 0 };

            axis = O.shape.Axis(axis);

            foreach (var X in tensors)
            {
                var fn = BestKernel(ComputeKernelLibrary.Copy(X.shape, O.shape));

                fn.SetTensor("X", X.shape, Pin(X).buffer);
                fn.SetTensor("O", O.shape, Pin(O).buffer);

                fn.shader.SetInts("_Pad", offsets);

                fn.Dispatch();

                offsets[axis] += X.shape[axis];
            }

            return(O);
        }
Beispiel #18
0
        public virtual void PrepareModel(Model model, IDictionary <string, TensorShape> inputShapes)
        {
            var modelHash = CalcModelWithInputsHashCode(model, inputShapes);

            if (modelHash == m_CachedModelHash)
            {
                return;
            }

            m_CachedModelHash = modelHash;
            m_CompiledLayers.Clear();

            IDictionary <string, TensorShape> shapesByName;

            ModelAnalyzer.ListTemporaryTensorShapes(model, inputShapes, out shapesByName);

            foreach (var l in model.layers)
            {
                if (m_CompiledLayers.ContainsKey(l))
                {
                    continue; // already compiled
                }
                if (l.inputs.Length == 0)
                {
                    continue; // don't need to compile layers without inputs, so far all of them are CPU only
                }
                var X = shapesByName[l.inputs[0]];
                var O = shapesByName[l.name];

                ComputeKernel kernel = new ComputeKernel();
                if (l.type == Layer.Type.Dense)
                {
                    var itemSize = 4; // @TODO: itemSizeInBytes == 2 | float16
                    kernel = BestKernel(
                        ComputeKernelLibrary.Dense(X, l.datasets[0].shape, O, itemSize >> 2));
                }
                else if (
                    l.type == Layer.Type.Conv2D)
                {
                    Assert.IsNotNull(l.stride);
                    Assert.IsNotNull(l.pad);
                    kernel = BestKernel(
                        ComputeKernelLibrary.Conv2D(X, l.datasets[0].shape, O, l.stride, l.pad));
                }
                else if (
                    l.type == Layer.Type.DepthwiseConv2D)
                {
                    kernel = BestKernel(
                        ComputeKernelLibrary.DepthwiseConv2D(X, l.datasets[0].shape, O));
                }
                else if (
                    l.type == Layer.Type.Conv2DTrans)
                {
                    kernel = BestKernel(
                        ComputeKernelLibrary.Conv2DTrans(X, l.datasets[0].shape, O));
                }
                else if (
                    l.type == Layer.Type.Upsample2D)
                {
                    kernel = BestKernel(
                        ComputeKernelLibrary.Upsample2D(X, O));
                }
                else if (
                    l.type == Layer.Type.MaxPool2D ||
                    l.type == Layer.Type.AvgPool2D)
                {
                    var kernelName = l.type.ToString();

                    Assert.IsNotNull(l.pool);
                    Assert.IsNotNull(l.stride);
                    Assert.IsNotNull(l.pad);
                    var pad = X.AdjustPadToPool(l.pool, l.stride, l.pad);
                    if (pad[0] == 0 && pad[1] == 0 && pad[2] == 0 && pad[3] == 0)
                    {
                        kernelName += "_NoPads";
                    }

                    kernel = BestKernel(
                        ComputeKernelLibrary.Pool2D(X, O, kernelName));
                }
                // @TODO: reimplement GlobalPools, currently require different kernels for each pyramid step
                //else if (
                //    l.type == Layer.Type.GlobalMaxPool2D ||
                //    l.type == Layer.Type.GlobalAvgPool2D)
                //{
                //    var kernelName = l.type.ToString();
                //    kernel = BestKernel(
                //        ComputeKernelLibrary.GlobalPool2D(X, O, kernelName));
                //}
                else if (
                    l.type == Layer.Type.ScaleBias)
                {
                    kernel = BestKernel(
                        ComputeKernelLibrary.ScaleBias(X, O));
                }
                // @TODO: reimplement Normalization, which became a multi-kernel operation after optimizations
                //else if (
                //    l.type == Layer.Type.Normalization)
                //{
                //    kernel = BestKernel(
                //        ComputeKernelLibrary.Normalization(X, O));
                //}
                else if (
                    l.type == Layer.Type.Add ||
                    l.type == Layer.Type.Sub ||
                    l.type == Layer.Type.Mul ||
                    l.type == Layer.Type.Div ||
                    l.type == Layer.Type.Pow ||
                    l.type == Layer.Type.Min ||
                    l.type == Layer.Type.Max
                    // || l.type == Layer.Type.Mean @TODO: implement BroadcastMean
                    )
                {
                    var kernelName = "Broadcast" + l.type;
                    kernel = BestKernel(
                        ComputeKernelLibrary.Broadcast(X, O, kernelName));
                }
                // @TODO: implement Concat, currently might require different kernel for each tensor
                //else if (
                //    l.type == Layer.Type.Concat) {}
                // Activations
                else if (l.type == Layer.Type.Activation)
                {
                    if (l.activation == Layer.Activation.Softmax)
                    {
                        kernel = BestKernel(
                            ComputeKernelLibrary.Softmax(X, O));
                    }
                    else if (l.activation == Layer.Activation.LogSoftmax)
                    {
                        kernel = BestKernel(
                            ComputeKernelLibrary.LogSoftmax(X, O));
                    }
                    else if (l.activation == Layer.Activation.PRelu)
                    {
                        kernel = BestKernel(
                            ComputeKernelLibrary.PRelu(X, O));
                    }
                    else if (l.activation != Layer.Activation.None)
                    {
                        var kernelName = l.activation.ToString();
                        kernel = BestKernel(
                            ComputeKernelLibrary.Activation(X, O, kernelName));
                    }
                }

                m_CompiledLayers.Add(l, new CompiledLayer {
                    kernel = kernel, shape = O
                });
            }
        }