// --------------------------------------------------------------------------------- protected ComputeKernel BestKernel(ComputeKernelLibrary.Entry[] entrees) { return(ComputeKernel.BestKernel(m_Kernels, entrees, m_Verbose)); }
public virtual void PrepareModel(Model model, IDictionary <string, TensorShape> inputShapes) { var modelHash = CalcModelWithInputsHashCode(model, inputShapes); if (modelHash == m_CachedModelHash) { return; } m_CachedModelHash = modelHash; m_CompiledLayers.Clear(); IDictionary <string, TensorShape> shapesByName; ModelAnalyzer.ListTemporaryTensorShapes(model, inputShapes, out shapesByName); foreach (var l in model.layers) { if (m_CompiledLayers.ContainsKey(l)) { continue; // already compiled } if (l.inputs.Length == 0) { continue; // don't need to compile layers without inputs, so far all of them are CPU only } var X = shapesByName[l.inputs[0]]; var O = shapesByName[l.name]; ComputeKernel kernel = new ComputeKernel(); if (l.type == Layer.Type.Dense) { var itemSize = 4; // @TODO: itemSizeInBytes == 2 | float16 kernel = BestKernel( ComputeKernelLibrary.Dense(X, l.datasets[0].shape, O, itemSize >> 2)); } else if ( l.type == Layer.Type.Conv2D) { Assert.IsNotNull(l.stride); Assert.IsNotNull(l.pad); kernel = BestKernel( ComputeKernelLibrary.Conv2D(X, l.datasets[0].shape, O, l.stride, l.pad)); } else if ( l.type == Layer.Type.DepthwiseConv2D) { kernel = BestKernel( ComputeKernelLibrary.DepthwiseConv2D(X, l.datasets[0].shape, O)); } else if ( l.type == Layer.Type.Conv2DTrans) { kernel = BestKernel( ComputeKernelLibrary.Conv2DTrans(X, l.datasets[0].shape, O)); } else if ( l.type == Layer.Type.Upsample2D) { kernel = BestKernel( ComputeKernelLibrary.Upsample2D(X, O)); } else if ( l.type == Layer.Type.MaxPool2D || l.type == Layer.Type.AvgPool2D) { var kernelName = l.type.ToString(); Assert.IsNotNull(l.pool); Assert.IsNotNull(l.stride); Assert.IsNotNull(l.pad); var pad = X.AdjustPadToPool(l.pool, l.stride, l.pad); if (pad[0] == 0 && pad[1] == 0 && pad[2] == 0 && pad[3] == 0) { kernelName += "_NoPads"; } kernel = BestKernel( ComputeKernelLibrary.Pool2D(X, O, kernelName)); } // @TODO: reimplement GlobalPools, currently require different kernels for each pyramid step //else if ( // l.type == Layer.Type.GlobalMaxPool2D || // l.type == Layer.Type.GlobalAvgPool2D) //{ // var kernelName = l.type.ToString(); // kernel = BestKernel( // ComputeKernelLibrary.GlobalPool2D(X, O, kernelName)); //} else if ( l.type == Layer.Type.ScaleBias) { kernel = BestKernel( ComputeKernelLibrary.ScaleBias(X, O)); } // @TODO: reimplement Normalization, which became a multi-kernel operation after optimizations //else if ( // l.type == Layer.Type.Normalization) //{ // kernel = BestKernel( // ComputeKernelLibrary.Normalization(X, O)); //} else if ( l.type == Layer.Type.Add || l.type == Layer.Type.Sub || l.type == Layer.Type.Mul || l.type == Layer.Type.Div || l.type == Layer.Type.Pow || l.type == Layer.Type.Min || l.type == Layer.Type.Max // || l.type == Layer.Type.Mean @TODO: implement BroadcastMean ) { var kernelName = "Broadcast" + l.type; kernel = BestKernel( ComputeKernelLibrary.Broadcast(X, O, kernelName)); } // @TODO: implement Concat, currently might require different kernel for each tensor //else if ( // l.type == Layer.Type.Concat) {} // Activations else if (l.type == Layer.Type.Activation) { if (l.activation == Layer.Activation.Softmax) { kernel = BestKernel( ComputeKernelLibrary.Softmax(X, O)); } else if (l.activation == Layer.Activation.LogSoftmax) { kernel = BestKernel( ComputeKernelLibrary.LogSoftmax(X, O)); } else if (l.activation == Layer.Activation.PRelu) { kernel = BestKernel( ComputeKernelLibrary.PRelu(X, O)); } else if (l.activation != Layer.Activation.None) { var kernelName = l.activation.ToString(); kernel = BestKernel( ComputeKernelLibrary.Activation(X, O, kernelName)); } } m_CompiledLayers.Add(l, new CompiledLayer { kernel = kernel, shape = O }); } }