public override Tensor Normalization(Tensor X, Tensor S, Tensor B, int pool, int axis, float epsilon) { if (axis != 3 && axis != -1) { throw new NotImplementedException(); } if (pool <= 0) { pool = X.batch; } if (pool > 1) { throw new NotImplementedException(); // @TODO: support other types of Normalization at test time } // Currently supported only pool=1 (InstanceNormalization) var meanVariance = GlobalAvgVariancePool2D(X); var O = NewTensor(X.shape); var fn = BestKernel(ComputeKernelLibrary.NormalizationTail(X.shape, O.shape)); fn.SetTensor("X", X.shape, Pin(X).buffer); fn.SetTensor("O", O.shape, Pin(O).buffer); fn.SetTensor("W", meanVariance.shape, Pin(meanVariance).buffer); fn.shader.SetFloat("_Epsilon", epsilon); fn.Dispatch(); return(ScaleBias(O, S, B)); }
protected override Tensor Pool2D(string kernelName, Tensor X, int[] pool, int[] stride, int[] pad) { Assert.AreEqual(pool.Length, 2); Assert.AreEqual(stride.Length, 2); if (pad[0] == 0 && pad[1] == 0 && pad[2] == 0 && pad[3] == 0) { kernelName += "_NoPads"; } var O = NewTensor(X.shape.ApplyPool(pool, stride, pad)); var fn = BestKernel(ComputeKernelLibrary.Pool2D(X.shape, O.shape, kernelName)); if (printKernels) { D.Log($"{fn.func.kernelName}: {O.shape} = {X.shape} ^ pool: {pool[0]},{pool[1]} stride: {stride[0]},{stride[1]} pad:{pad[0]},{pad[1]}"); } fn.SetTensor("X", X.shape, Pin(X).buffer); fn.SetTensor("O", O.shape, Pin(O).buffer); fn.shader.SetInts("_Pool", pool); fn.shader.SetInts("_Stride", stride); fn.shader.SetInts("_Pad", pad); fn.Dispatch(); return(O); }
public override Tensor DepthwiseConv2D(Tensor X, Tensor K, Tensor B, int[] stride, int[] pad) { if (K.kernelDepth != 1) { return(base.DepthwiseConv2D(X, K, B, stride, pad)); } Assert.AreEqual(K.kernelDepth, 1); Assert.AreEqual(K.kernelCount, X.channels); Assert.AreEqual(K.kernelCount, B.flatWidth); Assert.AreEqual(B.flatWidth, B.length); Assert.AreEqual(stride.Length, 2); Assert.AreEqual(pad.Length, 4); var O = NewTensor(X.shape.ApplyKernel(K.shape, stride, pad)); var fn = BestKernel(ComputeKernelLibrary.DepthwiseConv2D(X.shape, K.shape, O.shape)); if (printKernels) { Debug.Log($"{fn.func.kernelName}: {O.shape} = {X.shape} ∆ {K.shape} stride: {stride[0]},{stride[1]} pad:{pad[0]},{pad[1]}"); } fn.SetTensor("X", X.shape, Pin(X).buffer); fn.SetTensor("O", O.shape, Pin(O).buffer); fn.SetTensorDecl("K", K.shape, Pin(K).offset); fn.SetTensorDecl("B", B.shape, Pin(B).offset); Assert.AreEqual(Pin(K).buffer, Pin(B).buffer); fn.SetTensorBuffer("WBK", Pin(K).buffer); fn.shader.SetInts("_Stride", stride); fn.shader.SetInts("_Pad", pad); fn.Dispatch(); return(O); }
protected override Tensor ApplyPadding(Tensor X, int[] pad, string kernelName, float constant = 0.0f) { Assert.AreEqual(pad.Length, 4); var O = NewTensor(X.shape.ApplyBorder(pad)); var fn = BestKernel(ComputeKernelLibrary.Padding(X.shape, O.shape, kernelName)); fn.SetTensor("X", X.shape, Pin(X).buffer); fn.SetTensor("O", O.shape, Pin(O).buffer); fn.shader.SetInts("_Pad", pad); if (kernelName == "Border2D") { // NOTE: negative "pad" variable will crop X tensor int croppedWidth = X.width - Math.Max(0, -pad[2]); int croppedHeight = X.height - Math.Max(0, -pad[3]); var croppedSize = new int[] { 0, 0 }; croppedSize[0] = croppedWidth; croppedSize[1] = croppedHeight; fn.shader.SetInts("_Pool", croppedSize); fn.shader.SetFloat("_Beta", constant); } fn.Dispatch(); return(O); }
// --------------------------------------------------------------------------------- public override Tensor Dense(Tensor X, Tensor W, Tensor B) { Assert.IsTrue(W.dimensions <= 2); Assert.AreEqual(B.flatWidth, B.length); Assert.AreEqual(X.flatWidth, W.flatHeight); var O = NewTensor(X.flatHeight, W.flatWidth); var itemSize = 4; // @TODO: itemSizeInBytes == 2 | float16 var fn = BestKernel(ComputeKernelLibrary.Dense(X.shape, W.shape, O.shape, itemSize >> 2)); if (printKernels) { Debug.Log($"{fn.func.kernelName}: {O.shape} = {X.shape} * {W.shape}"); } fn.SetTensor("X", X.shape, Pin(X).buffer); fn.SetTensor("O", O.shape, Pin(O).buffer); fn.SetTensorDecl("W", W.shape, Pin(W).offset); fn.SetTensorDecl("B", B.shape, Pin(B).offset); Assert.AreEqual(Pin(W).buffer, Pin(B).buffer); fn.SetTensorBuffer("WBK", Pin(W).buffer); fn.Dispatch(); return(O); }
public override Tensor ElementwiseWithBroadcast(string kernelName, Tensor[] tensors) { Assert.IsTrue(tensors.Length > 0); Tensor outputTensor1 = NewTensor(TensorExtensions.MaxShape(tensors)); Tensor outputTensor2 = null; if (tensors.Length > 2) { outputTensor2 = NewTensor(TensorExtensions.MaxShape(tensors)); } var X = tensors[0]; var fn = BestKernel(ComputeKernelLibrary.Broadcast(X.shape, outputTensor1.shape, kernelName)); Tensor O = null; for (int t = 1; t < tensors.Length; ++t) { var B = tensors[t]; O = (t % 2 == 1)?outputTensor1:outputTensor2; fn.SetTensor("X", X.shape, Pin(X).buffer); fn.SetTensor("O", O.shape, Pin(O).buffer); fn.SetTensor("B", B.shape, Pin(B).buffer, Pin(B).offset); fn.Dispatch(); X = O; } return(O); }
public override Tensor LogicalNot(Tensor X) { var O = NewTensor(X.shape); var fn = BestKernel(ComputeKernelLibrary.Activation(X.shape, O.shape, "LogicalNot")); fn.SetTensor("X", X.shape, Pin(X).buffer); fn.SetTensor("O", O.shape, Pin(O).buffer); fn.Dispatch(); return(O); }
public override Tensor GlobalAvgVariancePool2D(Tensor X) { var O = NewTensor(X.batch, 2, 1, X.channels); var fn = BestKernel(ComputeKernelLibrary.GlobalPool2D(X.shape, O.shape, "GlobalAvgVariancePool2D")); fn.SetTensor("X", X.shape, Pin(X).buffer); fn.SetTensor("O", O.shape, Pin(O).buffer); fn.Dispatch(); return(O); }
public override Tensor LogSoftmax(Tensor X) { var O = NewTensor(X.shape); var fn = BestKernel(ComputeKernelLibrary.LogSoftmax(X.shape, O.shape)); fn.SetTensor("X", X.shape, Pin(X).buffer); fn.SetTensor("O", O.shape, Pin(O).buffer); fn.Dispatch(); return(O); }
protected override Tensor Activation(string kernelName, Tensor X, float alpha = 0f, float beta = 0f) { var O = NewTensor(X.shape); var fn = BestKernel(ComputeKernelLibrary.Activation(X.shape, O.shape, kernelName)); if (printKernels) { D.Log(fn.func.kernelName); } fn.SetTensor("X", X.shape, Pin(X).buffer); fn.SetTensor("O", O.shape, Pin(O).buffer); fn.shader.SetFloat("_Alpha", alpha); fn.shader.SetFloat("_Beta", beta); fn.Dispatch(); return(O); }
public override Tensor PRelu(Tensor X, Tensor S) { Assert.IsTrue((X.flatWidth == S.flatWidth) || (S.flatWidth == 1)); var O = NewTensor(X.shape); var fn = BestKernel(ComputeKernelLibrary.PRelu(X.shape, O.shape)); if (printKernels) { D.Log(fn.func.kernelName); } fn.SetTensor("X", X.shape, Pin(X).buffer); fn.SetTensor("O", O.shape, Pin(O).buffer); fn.SetTensor("W", S.shape, Pin(S).buffer); fn.Dispatch(); return(O); }
public override Tensor Upsample2D(Tensor X, int[] size) { Assert.AreEqual(size.Length, 2); var O = NewTensor(X.batch, X.height * size[1], X.width * size[0], X.channels); var fn = BestKernel(ComputeKernelLibrary.Upsample2D(X.shape, O.shape)); if (printKernels) { D.Log($"{fn.func.kernelName}: {O.shape} = {X.shape} ^ size: {size[0]},{size[1]}"); } fn.SetTensor("X", X.shape, Pin(X).buffer); fn.SetTensor("O", O.shape, Pin(O).buffer); fn.shader.SetInts("_Pool", size); fn.Dispatch(); return(O); }
protected override Tensor CopyAndReshape(Tensor X, TensorShape newShape) { var copyShape = X.shape; Assert.AreEqual(copyShape.length, newShape.length); // NOTE: "Copy" kernel copies tensor data while preserving the shape // However here in CopyAndReshape we want to both copy and change the shape, // To be able to piggyback "Copy" kernel we specify new shape when allocating destination tensor, // but use shape identical to source when copying. var O = NewTensor(newShape); var fn = BestKernel(ComputeKernelLibrary.Copy(copyShape, copyShape)); fn.SetTensor("X", copyShape, Pin(X).buffer); fn.SetTensor("O", copyShape, Pin(O).buffer); fn.shader.SetInts("_Pad", new int[] { 0, 0, 0, 0 }); fn.Dispatch(); return(O); }
public override Tensor ScaleBias(Tensor X, Tensor S, Tensor B) { Assert.AreEqual(X.channels, B.channels); Assert.AreEqual(X.channels, S.channels); Assert.AreEqual(B.length, B.channels); Assert.AreEqual(S.length, S.channels); var O = NewTensor(X.shape); var fn = BestKernel(ComputeKernelLibrary.ScaleBias(X.shape, O.shape)); if (printKernels) { D.Log(fn.func.kernelName); } fn.SetTensor("X", X.shape, Pin(X).buffer); fn.SetTensor("O", O.shape, Pin(O).buffer); fn.SetTensorDecl("W", S.shape, Pin(S).offset); fn.SetTensorDecl("B", B.shape, Pin(B).offset); Assert.AreEqual(Pin(S).buffer, Pin(B).buffer); fn.SetTensorBuffer("WBK", Pin(S).buffer); fn.Dispatch(); return(O); }
protected virtual Tensor GlobalPool2D(string smallKernelName, string globalKernelName, Tensor X) { // downsample with pyramid approach while (X.height * X.width >= 256) { var pool = new [] { 4, 4 }; var stride = pool; var noPad = new[] { 0, 0, 0, 0 }; var lastLength = X.length; X = Pool2D(smallKernelName, X, pool, stride, noPad); Assert.IsTrue(X.length < lastLength); } var O = NewTensor(X.batch, 1, 1, X.channels); var fn = BestKernel(ComputeKernelLibrary.GlobalPool2D(X.shape, O.shape, globalKernelName)); fn.SetTensor("X", X.shape, Pin(X).buffer); fn.SetTensor("O", O.shape, Pin(O).buffer); fn.Dispatch(); return(O); }
public override Tensor Conv2DTrans(Tensor X, Tensor K, Tensor B, int[] stride, int[] pad, int[] outputAdjustment) { Assert.AreEqual(X.channels, K.kernelDepth); Assert.AreEqual(K.kernelCount, B.flatWidth); Assert.AreEqual(B.flatWidth, B.length); Assert.AreEqual(stride.Length, 2); Assert.AreEqual(pad.Length, 4); var O = NewTensor(X.shape.ApplyKernelInverse(K.shape, stride, pad, outputAdjustment)); var fn = BestKernel(ComputeKernelLibrary.Conv2DTrans(X.shape, K.shape, O.shape)); if (printKernels) { D.Log($"{fn.func.kernelName}: {O.shape} = {X.shape} @ {K.shape} stride: {stride[0]},{stride[1]} pad:{pad[0]},{pad[1]}"); } pad = new int[] { K.kernelWidth - pad[0] - 1, K.kernelHeight - pad[1] - 1, K.kernelWidth - pad[2] - 1, K.kernelHeight - pad[3] - 1 }; fn.SetTensor("X", X.shape, Pin(X).buffer); fn.SetTensor("O", O.shape, Pin(O).buffer); fn.SetTensorDecl("K", K.shape, Pin(K).offset); fn.SetTensorDecl("B", B.shape, Pin(B).offset); Assert.AreEqual(Pin(K).buffer, Pin(B).buffer); fn.SetTensorBuffer("WBK", Pin(K).buffer); fn.shader.SetInts("_Pad", pad); fn.shader.SetInts("_Stride", stride); fn.Dispatch(); return(O); }
public override Tensor Concat(Tensor[] tensors, int axis) { var O = NewTensor(TensorExtensions.Concat(tensors.Select(t => t.shape).ToArray(), axis)); var offsets = new int[] { 0, 0, 0, 0 }; axis = O.shape.Axis(axis); foreach (var X in tensors) { var fn = BestKernel(ComputeKernelLibrary.Copy(X.shape, O.shape)); fn.SetTensor("X", X.shape, Pin(X).buffer); fn.SetTensor("O", O.shape, Pin(O).buffer); fn.shader.SetInts("_Pad", offsets); fn.Dispatch(); offsets[axis] += X.shape[axis]; } return(O); }
public virtual void PrepareModel(Model model, IDictionary <string, TensorShape> inputShapes) { var modelHash = CalcModelWithInputsHashCode(model, inputShapes); if (modelHash == m_CachedModelHash) { return; } m_CachedModelHash = modelHash; m_CompiledLayers.Clear(); IDictionary <string, TensorShape> shapesByName; ModelAnalyzer.ListTemporaryTensorShapes(model, inputShapes, out shapesByName); foreach (var l in model.layers) { if (m_CompiledLayers.ContainsKey(l)) { continue; // already compiled } if (l.inputs.Length == 0) { continue; // don't need to compile layers without inputs, so far all of them are CPU only } var X = shapesByName[l.inputs[0]]; var O = shapesByName[l.name]; ComputeKernel kernel = new ComputeKernel(); if (l.type == Layer.Type.Dense) { var itemSize = 4; // @TODO: itemSizeInBytes == 2 | float16 kernel = BestKernel( ComputeKernelLibrary.Dense(X, l.datasets[0].shape, O, itemSize >> 2)); } else if ( l.type == Layer.Type.Conv2D) { Assert.IsNotNull(l.stride); Assert.IsNotNull(l.pad); kernel = BestKernel( ComputeKernelLibrary.Conv2D(X, l.datasets[0].shape, O, l.stride, l.pad)); } else if ( l.type == Layer.Type.DepthwiseConv2D) { kernel = BestKernel( ComputeKernelLibrary.DepthwiseConv2D(X, l.datasets[0].shape, O)); } else if ( l.type == Layer.Type.Conv2DTrans) { kernel = BestKernel( ComputeKernelLibrary.Conv2DTrans(X, l.datasets[0].shape, O)); } else if ( l.type == Layer.Type.Upsample2D) { kernel = BestKernel( ComputeKernelLibrary.Upsample2D(X, O)); } else if ( l.type == Layer.Type.MaxPool2D || l.type == Layer.Type.AvgPool2D) { var kernelName = l.type.ToString(); Assert.IsNotNull(l.pool); Assert.IsNotNull(l.stride); Assert.IsNotNull(l.pad); var pad = X.AdjustPadToPool(l.pool, l.stride, l.pad); if (pad[0] == 0 && pad[1] == 0 && pad[2] == 0 && pad[3] == 0) { kernelName += "_NoPads"; } kernel = BestKernel( ComputeKernelLibrary.Pool2D(X, O, kernelName)); } // @TODO: reimplement GlobalPools, currently require different kernels for each pyramid step //else if ( // l.type == Layer.Type.GlobalMaxPool2D || // l.type == Layer.Type.GlobalAvgPool2D) //{ // var kernelName = l.type.ToString(); // kernel = BestKernel( // ComputeKernelLibrary.GlobalPool2D(X, O, kernelName)); //} else if ( l.type == Layer.Type.ScaleBias) { kernel = BestKernel( ComputeKernelLibrary.ScaleBias(X, O)); } // @TODO: reimplement Normalization, which became a multi-kernel operation after optimizations //else if ( // l.type == Layer.Type.Normalization) //{ // kernel = BestKernel( // ComputeKernelLibrary.Normalization(X, O)); //} else if ( l.type == Layer.Type.Add || l.type == Layer.Type.Sub || l.type == Layer.Type.Mul || l.type == Layer.Type.Div || l.type == Layer.Type.Pow || l.type == Layer.Type.Min || l.type == Layer.Type.Max // || l.type == Layer.Type.Mean @TODO: implement BroadcastMean ) { var kernelName = "Broadcast" + l.type; kernel = BestKernel( ComputeKernelLibrary.Broadcast(X, O, kernelName)); } // @TODO: implement Concat, currently might require different kernel for each tensor //else if ( // l.type == Layer.Type.Concat) {} // Activations else if (l.type == Layer.Type.Activation) { if (l.activation == Layer.Activation.Softmax) { kernel = BestKernel( ComputeKernelLibrary.Softmax(X, O)); } else if (l.activation == Layer.Activation.LogSoftmax) { kernel = BestKernel( ComputeKernelLibrary.LogSoftmax(X, O)); } else if (l.activation == Layer.Activation.PRelu) { kernel = BestKernel( ComputeKernelLibrary.PRelu(X, O)); } else if (l.activation != Layer.Activation.None) { var kernelName = l.activation.ToString(); kernel = BestKernel( ComputeKernelLibrary.Activation(X, O, kernelName)); } } m_CompiledLayers.Add(l, new CompiledLayer { kernel = kernel, shape = O }); } }