// Update Network Weights based on computed errors
        public void BackPropagation(ManagedArray batch)
        {
            var n      = Layers.Count;
            var last   = n - 1;
            var batchz = Layers[last].Activation.z;

            // backprop deltas
            ManagedOps.Free(OutputDelta, OutputError);

            OutputDelta = new ManagedArray(Output, false);
            OutputError = new ManagedArray(Output, false);

            for (var x = 0; x < Output.Length(); x++)
            {
                // error
                OutputError[x] = Output[x] - batch[x];

                // output delta
                OutputDelta[x] = OutputError[x] * (Output[x] * (1 - Output[x]));
            }

            // Loss Function
            L = 0.5 * ManagedMatrix.SquareSum(OutputError) / batch.x;

            ManagedOps.Free(WeightsTransposed, FeatureVectorDelta);

            FeatureVectorDelta = new ManagedArray(FeatureVector, false);
            WeightsTransposed  = new ManagedArray(Weights, false);

            // feature vector delta
            ManagedMatrix.Transpose(WeightsTransposed, Weights);
            ManagedMatrix.Multiply(FeatureVectorDelta, WeightsTransposed, OutputDelta);

            // only conv layers has sigm function
            if (Layers[last].Type == LayerTypes.Convolution)
            {
                for (var x = 0; x < FeatureVectorDelta.Length(); x++)
                {
                    FeatureVectorDelta[x] = FeatureVectorDelta[x] * FeatureVector[x] * (1 - FeatureVector[x]);
                }
            }

            // reshape feature vector deltas into output map style
            var MapSize = Layers[last].Activation.x * Layers[last].Activation.y;
            var temp1D  = new ManagedArray(1, MapSize, false);
            var temp2D  = new ManagedArray(Layers[last].Activation.x, Layers[last].Activation.y, false);

            ManagedOps.Free(Layers[last].Delta);
            Layers[last].Delta = new ManagedArray(Layers[last].Activation, false);

            for (var j = 0; j < Layers[last].Activation.i; j++)
            {
                for (var ii = 0; ii < batchz; ii++)
                {
                    ManagedOps.Copy2D(temp1D, FeatureVectorDelta, ii, j * MapSize);
                    temp1D.Reshape(Layers[last].Activation.x, Layers[last].Activation.y);
                    ManagedMatrix.Transpose(temp2D, temp1D);
                    ManagedOps.Copy2D4D(Layers[last].Delta, temp2D, ii, j);
                    temp1D.Reshape(1, MapSize);
                }
            }

            ManagedOps.Free(temp1D, temp2D);

            for (var l = n - 2; l >= 0; l--)
            {
                var next = l + 1;

                if (Layers[l].Type == LayerTypes.Convolution)
                {
                    ManagedOps.Free(Layers[l].Delta);
                    Layers[l].Delta = new ManagedArray(Layers[l].Activation, false);

                    var xx = Layers[next].Scale * Layers[next].Activation.x;
                    var yy = Layers[next].Scale * Layers[next].Activation.y;

                    var FeatureMap         = new ManagedArray(Layers[next].Activation.x, Layers[next].Activation.y, false);
                    var FeatureMapExpanded = new ManagedArray(xx, yy, false);
                    var Activation         = new ManagedArray(xx, yy, false);
                    var Delta = new ManagedArray(xx, yy, false);

                    var Scale = (1.0 / (Layers[next].Scale * Layers[next].Scale));

                    for (var j = 0; j < Layers[l].Activation.i; j++)
                    {
                        for (var z = 0; z < batchz; z++)
                        {
                            ManagedOps.Copy4D2D(FeatureMap, Layers[next].Delta, z, j);
                            ManagedMatrix.Expand(FeatureMap, Layers[next].Scale, Layers[next].Scale, FeatureMapExpanded);
                            ManagedOps.Copy4D2D(Activation, Layers[l].Activation, z, j);

                            for (var x = 0; x < Delta.Length(); x++)
                            {
                                Delta[x] = Activation[x] * (1 - Activation[x]) * FeatureMapExpanded[x] * Scale;
                            }

                            ManagedOps.Copy2D4D(Layers[l].Delta, Delta, z, j);
                        }
                    }

                    ManagedOps.Free(FeatureMap, FeatureMapExpanded, Activation, Delta);
                }
                else if (Layers[l].Type == LayerTypes.Subsampling)
                {
                    ManagedOps.Free(Layers[l].Delta);
                    Layers[l].Delta = new ManagedArray(Layers[l].Activation, false);

                    var Delta      = new ManagedArray(Layers[next].Activation.x, Layers[next].Activation.y, batchz);
                    var FeatureMap = new ManagedArray(Layers[next].KernelSize, Layers[next].KernelSize, false);
                    var rot180     = new ManagedArray(Layers[next].KernelSize, Layers[next].KernelSize, false);
                    var z          = new ManagedArray(Layers[l].Activation.x, Layers[l].Activation.y, batchz);
                    var ztemp      = new ManagedArray(Layers[l].Activation.x, Layers[l].Activation.y, batchz, false);

                    for (var i = 0; i < Layers[l].Activation.i; i++)
                    {
                        ManagedOps.Set(z, 0.0);

                        for (var j = 0; j < Layers[next].Activation.i; j++)
                        {
                            ManagedOps.Copy4DIJ2D(FeatureMap, Layers[next].FeatureMap, i, j);
                            ManagedMatrix.Rotate180(rot180, FeatureMap);

                            ManagedOps.Copy4D3D(Delta, Layers[next].Delta, j);
                            ManagedConvolution.Full(Delta, rot180, ztemp);
                            ManagedMatrix.Add(z, ztemp);
                        }

                        ManagedOps.Copy3D4D(Layers[l].Delta, z, i);
                    }

                    ManagedOps.Free(Delta, FeatureMap, rot180, z, ztemp);
                }
            }

            // calc gradients
            for (var l = 1; l < n; l++)
            {
                var prev = l - 1;

                if (Layers[l].Type == LayerTypes.Convolution)
                {
                    ManagedOps.Free(Layers[l].DeltaFeatureMap, Layers[l].DeltaBias);

                    Layers[l].DeltaFeatureMap = new ManagedArray(Layers[l].FeatureMap, false);
                    Layers[l].DeltaBias       = new ManagedArray(Layers[l].OutputMaps, false);

                    var FeatureMapDelta = new ManagedArray(Layers[l].FeatureMap.x, Layers[l].FeatureMap.y, Layers[l].FeatureMap.z, false);

                    // d[j]
                    var dtemp = new ManagedArray(Layers[l].Activation.x, Layers[l].Activation.y, batchz, false);

                    // a[i] and flipped
                    var atemp = new ManagedArray(Layers[prev].Activation.x, Layers[prev].Activation.y, batchz, false);
                    var ftemp = new ManagedArray(Layers[prev].Activation.x, Layers[prev].Activation.y, batchz, false);

                    for (var j = 0; j < Layers[l].Activation.i; j++)
                    {
                        ManagedOps.Copy4D3D(dtemp, Layers[l].Delta, j);

                        for (var i = 0; i < Layers[prev].Activation.i; i++)
                        {
                            ManagedOps.Copy4D3D(atemp, Layers[prev].Activation, i);
                            ManagedMatrix.FlipAll(ftemp, atemp);
                            ManagedConvolution.Valid(ftemp, dtemp, FeatureMapDelta);
                            ManagedMatrix.Multiply(FeatureMapDelta, 1.0 / batchz);

                            ManagedOps.Copy2D4DIJ(Layers[l].DeltaFeatureMap, FeatureMapDelta, i, j);
                        }

                        Layers[l].DeltaBias[j] = ManagedMatrix.Sum(dtemp) / batchz;
                    }

                    ManagedOps.Free(FeatureMapDelta, dtemp, atemp, ftemp);
                }
            }

            var FeatureVectorTransposed = new ManagedArray(FeatureVector, false);

            ManagedMatrix.Transpose(FeatureVectorTransposed, FeatureVector);

            ManagedOps.Free(WeightsDelta, BiasDelta);

            WeightsDelta = new ManagedArray(Weights, false);
            BiasDelta    = new ManagedArray(Bias, false);

            ManagedMatrix.Multiply(WeightsDelta, OutputDelta, FeatureVectorTransposed);
            ManagedMatrix.Multiply(WeightsDelta, 1.0 / batchz);
            ManagedMatrix.Mean(BiasDelta, OutputDelta, 0);

            ManagedOps.Free(FeatureVectorTransposed);
        }