// Backward propagation
        public void BackPropagation(ManagedArray training)
        {
            // add bias column to input layer
            var InputBias = new ManagedArray(1, training.y);

            ManagedOps.Set(InputBias, 1.0);

            // x = cbind(array(1, c(nrow(training_set), 1)), training_set)
            var x = ManagedMatrix.CBind(InputBias, training);

            // compute intermediate delta values per layer

            // d3 = y_k - y_matrix
            var D3 = ManagedMatrix.Diff(Yk, Y_output);

            //  d2 = d3 %*% w_kj[, 2:ncol(w_kj)] * nnet_dsigmoid(z_2)
            var sWkj = new ManagedArray(Wkj.x - 1, Wkj.y);

            ManagedOps.Copy2DOffsetReverse(sWkj, Wkj, 1, 0);

            var D2 = new ManagedArray(sWkj.x, D3.y);

            ManagedMatrix.Multiply(D2, D3, sWkj);

            var DZ2 = ManagedMatrix.DSigm(Z2);

            ManagedMatrix.Multiply(D2, DZ2);

            // dWji = (t(d2) %*% x)
            // dWkj = (t(d3) % *% a_2)
            var tD2 = new ManagedArray(D2.y, D2.x);
            var tD3 = new ManagedArray(D3.y, D3.x);

            ManagedMatrix.Transpose(tD2, D2);
            ManagedMatrix.Transpose(tD3, D3);

            DeltaWji = new ManagedArray(Wji.x, Wji.y);
            DeltaWkj = new ManagedArray(Wkj.x, Wkj.y);

            ManagedMatrix.Multiply(DeltaWji, tD2, x);
            ManagedMatrix.Multiply(DeltaWkj, tD3, A2);

            // cost = sum(-y_matrix * log(y_k) - (1 - y_matrix) * log(1 - y_k))
            Cost = 0.0;
            L2   = 0.0;

            for (int i = 0; i < Y_output.Length(); i++)
            {
                L2   += Math.Sqrt(D3[i] * D3[i]);
                Cost += (-Y_output[i] * Math.Log(Yk[i]) - (1.0 - Y_output[i]) * Math.Log(1.0 - Yk[i]));
            }

            // cost = cost / m
            // dWji = dWji / m
            // dWkj = dWkj / m
            Cost /= training.y;
            L2   /= training.y;

            ManagedMatrix.Multiply(DeltaWji, 1.0 / training.y);
            ManagedMatrix.Multiply(DeltaWkj, 1.0 / training.y);

            // cleanup
            ManagedOps.Free(D2, D3, DZ2, InputBias);
            ManagedOps.Free(sWkj, tD2, tD3, x);

            // cleanup of arrays allocated in Forward
            ManagedOps.Free(A2, Yk, Z2);
        }