internal static long[] FromBarracuda(Barracuda.TensorShape src)
    {
        if (src.height == 1 && src.width == 1)
        {
            return new long[2] {
                       src.batch, src.channels
            }
        }
        ;

        return(new long[4] {
            src.batch, src.height, src.width, src.channels
        });
    }
        public virtual IEnumerator ExecuteAsync()
        {
            Profiler.BeginSample("Barracuda.Execute");

            ResetAllocatorIfRequested();
            m_Vars.PrepareStorage(m_Model, m_Ops, m_InputShapes);

            if (m_ModelCompiler != null)
            {
                m_ModelCompiler.PrepareModel(m_Model, m_InputShapes);
            }

            int idx = 0;

            foreach (var l in m_Model.layers)
            {
                idx++;

                m_Progress = idx / (float)m_Model.layers.Count;

                Profiler.BeginSample(l.name);
                var inputs = m_Vars.GatherInputs(l);

                Tensor X = new Tensor();
                if (inputs.Length > 0)
                {
                    X = inputs[0];
                }

                if (m_Verbose)
                {
                    D.Log("Layer: " + l.type + ((l.type == Layer.Type.Activation) ? ("." + l.activation) : "") + " " + l.name);
                }

                m_Vars.PrepareStorage(l);
                if (m_ModelCompiler != null)
                {
                    m_ModelCompiler.PreExecuteLayer(l, inputs);
                }

                // No operation, identity
                if (l.type == Layer.Type.Nop)
                {
                    Profiler.BeginSample("Barracuda.Nop");
                    X = X.ShallowCopy();
                }
                // Load const
                else if (l.type == Layer.Type.Load)
                {
                    Profiler.BeginSample("Barracuda.Load");
                }
                // GEMM
                else if (l.type == Layer.Type.Dense)
                {
                    Assert.AreEqual(inputs.Length, 3);
                    Profiler.BeginSample("Barracuda.Dense");
                    X = m_Ops.Dense(X, inputs[1], inputs[2]);
                }
                // 2D
                else if (l.type == Layer.Type.Conv2D)
                {
                    Assert.AreEqual(inputs.Length, 3);
                    Profiler.BeginSample("Barracuda.Conv2D");
                    var pad = X.AdjustPadToKernel(inputs[1], l.stride, l.pad);
                    X = m_Ops.Conv2D(X, inputs[1], inputs[2], l.stride, pad);
                }
                else if (l.type == Layer.Type.DepthwiseConv2D)
                {
                    Assert.AreEqual(inputs.Length, 3);
                    Profiler.BeginSample("Barracuda.DepthwiseConv2D");
                    var pad = X.AdjustPadToKernel(inputs[1], l.stride, l.pad);
                    X = m_Ops.DepthwiseConv2D(X, inputs[1], inputs[2], l.stride, pad);
                }
                else if (l.type == Layer.Type.Conv2DTrans)
                {
                    Assert.AreEqual(inputs.Length, 3);
                    Profiler.BeginSample("Barracuda.Conv2DTrans");
                    // pool size is treated as output_adjustment aka output_padding here
                    var outputAdjustment = l.pool;
                    var pad = X.AdjustPadToKernel(inputs[1], l.stride, l.pad);
                    X = m_Ops.Conv2DTrans(X, inputs[1], inputs[2], l.stride, pad, outputAdjustment);
                }
                else if (l.type == Layer.Type.Upsample2D)
                {
                    Profiler.BeginSample("Barracuda.Upsample2D");
                    // pool size is treated as upsample coefficient here
                    var size = l.pool;
                    X = m_Ops.Upsample2D(X, size);
                }
                else if (l.type == Layer.Type.MaxPool2D)
                {
                    Profiler.BeginSample("Barracuda.MaxPool2D");
                    var pad = X.AdjustPadToPool(l.pool, l.stride, l.pad);
                    X = m_Ops.MaxPool2D(X, l.pool, l.stride, pad);
                }
                else if (l.type == Layer.Type.AvgPool2D)
                {
                    Profiler.BeginSample("Barracuda.AvgPool2D");
                    var pad = X.AdjustPadToPool(l.pool, l.stride, l.pad);
                    X = m_Ops.AvgPool2D(X, l.pool, l.stride, pad);
                }
                else if (l.type == Layer.Type.GlobalMaxPool2D)
                {
                    Profiler.BeginSample("Barracuda.GlobalMaxPool2D");
                    X = m_Ops.GlobalMaxPool2D(X);
                }
                else if (l.type == Layer.Type.GlobalAvgPool2D)
                {
                    Profiler.BeginSample("Barracuda.GlobalAvgPool2D");
                    X = m_Ops.GlobalAvgPool2D(X);
                }
                else if (l.type == Layer.Type.Border2D)
                {
                    Profiler.BeginSample("Barracuda.Border2D");

                    Assert.IsNotNull(l.pad);
                    // NOTE: beta is used to retrieve fillin value
                    // because beta is 0 by default (while alpha is 1 by default)
                    // 0 value is more inline with zero padding
                    float fillValue = l.beta;
                    X = m_Ops.Border2D(X, l.pad, fillValue);
                }
                else if (l.type == Layer.Type.Pad2DReflect)
                {
                    Profiler.BeginSample("Barracuda.Pad2DReflect");

                    Assert.IsNotNull(l.pad);
                    X = m_Ops.Pad2DReflect(X, l.pad);
                }
                else if (l.type == Layer.Type.Pad2DSymmetric)
                {
                    Profiler.BeginSample("Barracuda.Pad2DSymmetric");

                    Assert.IsNotNull(l.pad);
                    X = m_Ops.Pad2DSymmetric(X, l.pad);
                }
                else if (l.type == Layer.Type.Pad2DEdge)
                {
                    Profiler.BeginSample("Barracuda.Pad2DEdge");

                    Assert.IsNotNull(l.pad);
                    X = m_Ops.Pad2DEdge(X, l.pad);
                }
                // 3D
                else if (l.type == Layer.Type.Conv3D ||
                         l.type == Layer.Type.Conv3DTrans ||
                         l.type == Layer.Type.Upsample3D ||
                         l.type == Layer.Type.MaxPool3D ||
                         l.type == Layer.Type.AvgPool3D ||
                         l.type == Layer.Type.GlobalMaxPool3D ||
                         l.type == Layer.Type.GlobalAvgPool3D ||
                         l.type == Layer.Type.Border3D)
                {
                    throw new NotImplementedException("3D operations are not implemented yet!");
                }
                else if (l.type == Layer.Type.ScaleBias)
                {
                    Assert.AreEqual(inputs.Length, 3);
                    Profiler.BeginSample("Barracuda.ScaleBias");
                    X = m_Ops.ScaleBias(X, inputs[1], inputs[2]);
                }
                else if (l.type == Layer.Type.Normalization)
                {
                    Assert.AreEqual(inputs.Length, 3);
                    Profiler.BeginSample("Barracuda.Normalization");
                    // @TODO: support other types of Normalization at test time.
                    // Currently supported only pool=1 (InstanceNormalization)

                    // NOTE: beta is used to retrieve epsilon value
                    // because beta is 0 by default (while alpha is 1 by default)
                    // 0 value is more inline with very small epsilon
                    var epsilon = l.beta;
                    if (epsilon == 0)
                    {
                        epsilon = Mathf.Epsilon; // safety check to prevent division by zero
                    }
                    X = m_Ops.Normalization(X, inputs[1], inputs[2], 1, l.axis, epsilon);
                }
                else if (l.type == Layer.Type.LRN)
                {
                    Profiler.BeginSample("Barracuda.LRN");

                    Assert.IsNotNull(l.pool);
                    Assert.AreEqual(l.pool.Length, 1);
                    int count = l.pool[0];
                    X = m_Ops.LRN(X, l.alpha, l.beta, 1.0f, count); // @TODO: bias
                }
                // Stochastic layers
                else if (l.type == Layer.Type.Dropout)
                {
                    Profiler.BeginSample("Barracuda.Dropout");

                    X = m_Ops.Dropout(X, l.alpha);
                }
                else if (l.type == Layer.Type.RandomNormal)
                {
                    Profiler.BeginSample("Barracuda.RandomNormal");

                    Assert.IsNotNull(l.pool);
                    // pool size is treated as shape constant, if not empty
                    // otherwise shape of the previous tensor is used
                    var shape = X.shape;
                    if (l.pool.Length > 0)
                    {
                        shape = new TensorShape(l.pool);
                    }

                    int   seed = (l.pad.Length > 0) ? l.pad[0] : 1337;
                    float scale = l.alpha, mean = l.beta;
                    X = m_Ops.RandomNormal(shape, mean, scale, seed);
                }
                else if (l.type == Layer.Type.RandomUniform)
                {
                    Profiler.BeginSample("Barracuda.RandomUniform");

                    Assert.IsNotNull(l.pool);
                    // pool size is treated as shape constant, if not empty
                    // otherwise shape of the previous tensor is used
                    var shape = X.shape;
                    if (l.pool.Length > 0)
                    {
                        shape = new TensorShape(l.pool);
                    }

                    int   seed = (l.pad.Length > 0) ? l.pad[0] : 1337;
                    float scale = l.alpha, mean = l.beta;
                    X = m_Ops.RandomUniform(shape, mean, scale, seed);
                }
                else if (l.type == Layer.Type.Multinomial)
                {
                    Profiler.BeginSample("Barracuda.Multinomial");

                    Assert.IsNotNull(l.pool);
                    Assert.AreEqual(l.pool.Length, 1);

                    int count = l.pool[0];
                    int seed  = (l.pad.Length > 0) ? l.pad[0] : 1337;
                    X = m_Ops.Multinomial(X, count, seed);
                }
                else if (l.type == Layer.Type.OneHot)
                {
                    Profiler.BeginSample("Barracuda.OneHot");

                    Assert.IsNotNull(l.pool);
                    Assert.AreEqual(l.pool.Length, 1);
                    int   depth = l.pool[0];
                    float on = l.alpha, off = l.beta;
                    X = m_Ops.OneHot(X, depth, on, off);
                }
                // Broadcast layers
                else if (l.type == Layer.Type.Add)
                {
                    Profiler.BeginSample("Barracuda.Add");

                    X = m_Ops.Add(inputs);
                }
                else if (l.type == Layer.Type.Sub)
                {
                    Profiler.BeginSample("Barracuda.Sub");

                    X = m_Ops.Sub(inputs);
                }
                else if (l.type == Layer.Type.Mul)
                {
                    Profiler.BeginSample("Barracuda.Mul");

                    X = m_Ops.Mul(inputs);
                }
                else if (l.type == Layer.Type.Div)
                {
                    Profiler.BeginSample("Barracuda.Div");

                    X = m_Ops.Div(inputs);
                }
                else if (l.type == Layer.Type.Pow)
                {
                    Profiler.BeginSample("Barracuda.Pow");

                    X = m_Ops.Pow(inputs);
                }
                else if (l.type == Layer.Type.Min)
                {
                    Profiler.BeginSample("Barracuda.Min");

                    X = m_Ops.Min(inputs);
                }
                else if (l.type == Layer.Type.Max)
                {
                    Profiler.BeginSample("Barracuda.Max");

                    X = m_Ops.Max(inputs);
                }
                else if (l.type == Layer.Type.Mean)
                {
                    Profiler.BeginSample("Barracuda.Mean");

                    X = m_Ops.Mean(inputs);
                }
                // Reduction layers
                else if (l.type == Layer.Type.ReduceMax)
                {
                    Profiler.BeginSample("Barracuda.ReduceMax");

                    X = m_Ops.ReduceMax(X, l.axis);
                }
                else if (l.type == Layer.Type.ReduceMean)
                {
                    Profiler.BeginSample("Barracuda.ReduceMean");

                    X = m_Ops.ReduceMean(X, l.axis);
                }
                else if (l.type == Layer.Type.ReduceMin)
                {
                    Profiler.BeginSample("Barracuda.ReduceMin");

                    X = m_Ops.ReduceMin(X, l.axis);
                }
                else if (l.type == Layer.Type.ReduceProd)
                {
                    Profiler.BeginSample("Barracuda.ReduceProd");

                    X = m_Ops.ReduceProd(X, l.axis);
                }
                else if (l.type == Layer.Type.ReduceSum)
                {
                    Profiler.BeginSample("Barracuda.ReduceSum");

                    X = m_Ops.ReduceSum(X, l.axis);
                }
                else if (
                    l.type == Layer.Type.ReduceL1 ||
                    l.type == Layer.Type.ReduceL2 ||
                    l.type == Layer.Type.ReduceLogSum ||
                    l.type == Layer.Type.ReduceLogSumExp ||
                    l.type == Layer.Type.ReduceSumSquare)
                {
                    throw new NotImplementedException("This reduction operation is not implemented yet!");
                }
                // Logical operators with broadcast
                else if (l.type == Layer.Type.Greater)
                {
                    Assert.AreEqual(inputs.Length, 2);
                    Profiler.BeginSample("Barracuda.Greater");
                    X = m_Ops.Greater(X, inputs[1]);
                }
                else if (l.type == Layer.Type.GreaterEqual)
                {
                    Assert.AreEqual(inputs.Length, 2);
                    Profiler.BeginSample("Barracuda.GreaterEqual");
                    X = m_Ops.GreaterEqual(X, inputs[1]);
                }
                else if (l.type == Layer.Type.Less)
                {
                    Assert.AreEqual(inputs.Length, 2);
                    Profiler.BeginSample("Barracuda.Less");
                    X = m_Ops.Less(X, inputs[1]);
                }
                else if (l.type == Layer.Type.LessEqual)
                {
                    Assert.AreEqual(inputs.Length, 2);
                    Profiler.BeginSample("Barracuda.LessEqual");
                    X = m_Ops.LessEqual(X, inputs[1]);
                }
                else if (l.type == Layer.Type.Equal)
                {
                    Assert.AreEqual(inputs.Length, 2);
                    Profiler.BeginSample("Barracuda.Equal");
                    X = m_Ops.Equal(X, inputs[1]);
                }
                else if (l.type == Layer.Type.LogicalOr)
                {
                    Assert.AreEqual(inputs.Length, 2);
                    Profiler.BeginSample("Barracuda.LogicalOr");
                    X = m_Ops.LogicalOr(X, inputs[1]);
                }
                else if (l.type == Layer.Type.LogicalAnd)
                {
                    Assert.AreEqual(inputs.Length, 2);
                    Profiler.BeginSample("Barracuda.LogicalAnd");
                    X = m_Ops.LogicalAnd(X, inputs[1]);
                }
                else if (l.type == Layer.Type.LogicalXor)
                {
                    Assert.AreEqual(inputs.Length, 2);
                    Profiler.BeginSample("Barracuda.LogicalXor");
                    X = m_Ops.LogicalXor(X, inputs[1]);
                }
                else if (l.type == Layer.Type.LogicalNot)
                {
                    Profiler.BeginSample("Barracuda.LogicalNot");
                    X = m_Ops.LogicalNot(X);
                }
                // Shape affecting layers
                else if (l.type == Layer.Type.Flatten)
                {
                    Profiler.BeginSample("Barracuda.Flatten");
                    X = m_Ops.Flatten(X);
                }
                else if (l.type == Layer.Type.Reshape)
                {
                    Profiler.BeginSample("Barracuda.Reshape");

                    // pool size is treated as reshape coefficient, if not empty
                    // otherwise shape of the 2nd input tensor is used
                    var size = l.pool;

                    Assert.IsNotNull(size);
                    if (size.Length == 0 && inputs.Length > 1)
                    {
                        size = inputs[1].shape.ToArray();
                    }

                    var newShape = X.shape.Reshape(size);
                    X = m_Ops.Reshape(X, newShape);
                }
                else if (l.type == Layer.Type.Transpose)
                {
                    Profiler.BeginSample("Barracuda.Transpose");
                    X = m_Ops.Transpose(X);
                }
                else if (l.type == Layer.Type.Squeeze ||
                         l.type == Layer.Type.Unsqueeze)
                {
                    throw new NotImplementedException();
                }
                else if (l.type == Layer.Type.Concat)
                {
                    Profiler.BeginSample("Barracuda.Concat");

                    X = m_Ops.Concat(inputs, l.axis);
                }
                else if (l.type == Layer.Type.StridedSlice)
                {
                    Profiler.BeginSample("Barracuda.StridedSlice");

                    Assert.IsNotNull(l.pad);
                    Assert.IsNotNull(l.pool);
                    Assert.IsNotNull(l.stride);
                    X = m_Ops.StridedSlice(X, l.pad, l.pool, l.stride);
                }
                else if (l.type == Layer.Type.Tile)
                {
                    throw new NotImplementedException();
                }
                // Activations
                else if (l.type == Layer.Type.Activation)
                {
                    Profiler.BeginSample("Barracuda.Activation");

                    if (l.activation == Layer.Activation.Relu)
                    {
                        X = m_Ops.Relu(X);
                    }
                    else if (l.activation == Layer.Activation.Softmax)
                    {
                        X = m_Ops.Softmax(X);
                    }
                    else if (l.activation == Layer.Activation.LogSoftmax)
                    {
                        X = m_Ops.LogSoftmax(X);
                    }
                    else if (l.activation == Layer.Activation.Tanh)
                    {
                        X = m_Ops.Tanh(X);
                    }
                    else if (l.activation == Layer.Activation.Sigmoid)
                    {
                        X = m_Ops.Sigmoid(X);
                    }
                    else if (l.activation == Layer.Activation.Relu6)
                    {
                        X = m_Ops.Relu6(X);
                    }
                    else if (l.activation == Layer.Activation.Elu)
                    {
                        X = m_Ops.Elu(X, l.alpha);
                    }
                    else if (l.activation == Layer.Activation.LeakyRelu)
                    {
                        X = m_Ops.LeakyRelu(X, l.alpha);
                    }
                    else if (l.activation == Layer.Activation.Selu)
                    {
                        X = m_Ops.Selu(X, l.alpha, l.beta);
                    }
                    else if (l.activation == Layer.Activation.Swish)
                    {
                        X = m_Ops.Swish(X);
                    }
                    else if (l.activation == Layer.Activation.PRelu)
                    {
                        Assert.AreEqual(inputs.Length, 2);
                        Profiler.BeginSample("Barracuda.PRelu");
                        X = m_Ops.PRelu(X, inputs[1]);
                    }
                    else if (
                        l.activation == Layer.Activation.Softplus ||
                        l.activation == Layer.Activation.Softsign ||
                        l.activation == Layer.Activation.Hardmax ||
                        l.activation == Layer.Activation.HardSigmoid)
                    {
                        throw new NotImplementedException("This activation function is not implemented yet!");
                    }
                    else if (l.activation == Layer.Activation.Abs)
                    {
                        X = m_Ops.Abs(X);
                    }
                    else if (l.activation == Layer.Activation.Neg)
                    {
                        X = m_Ops.Neg(X);
                    }
                    else if (l.activation == Layer.Activation.Ceil)
                    {
                        X = m_Ops.Ceil(X);
                    }
                    else if (l.activation == Layer.Activation.Clip)
                    {
                        X = m_Ops.Clip(X, l.alpha, l.beta);
                    }
                    else if (l.activation == Layer.Activation.Floor)
                    {
                        X = m_Ops.Floor(X);
                    }
                    else if (l.activation == Layer.Activation.Reciprocal)
                    {
                        X = m_Ops.Reciprocal(X);
                    }
                    else if (l.activation == Layer.Activation.Pow)
                    {
                        X = m_Ops.Pow(X, l.alpha);
                    }
                    else if (l.activation == Layer.Activation.Exp)
                    {
                        X = m_Ops.Exp(X);
                    }
                    else if (l.activation == Layer.Activation.Log)
                    {
                        X = m_Ops.Log(X);
                    }
                    else if (l.activation == Layer.Activation.Sqrt)
                    {
                        X = m_Ops.Sqrt(X);
                    }
                    else if ((int)l.activation >= (int)Layer.Activation.Acos &&
                             (int)l.activation <= (int)Layer.Activation.Tan)
                    {
                        throw new NotImplementedException("Trig functions are not implemented yet!");
                    }
                    else
                    {
                        X = X.ShallowCopy();
                    }
                }
                else
                {
                    Profiler.BeginSample("Barracuda.Dummy");
                    Assert.AreEqual(l.activation, Layer.Activation.None);
                }

                m_Vars.Store(l, X);
                m_SyncTensor = X;

                // optype
                Profiler.EndSample();

                // layer.name
                Profiler.EndSample();

                yield return(null);
            }

            // request ResetAllocator before next Execute() starts
            m_RequestResetAllocator = true;
            Profiler.EndSample();

            if (m_Verbose)
            {
                D.Log(m_Vars.GetAllocator());
            }
        }