// Backward propagation public void BackPropagation(ManagedArray input) { var last = Weights.GetLength(0) - 1; D[0] = ManagedMatrix.Diff(Y, Y_true); var current = 1; for (var layer = last - 1; layer >= 0; layer--) { var prev = current - 1; var W = new ManagedArray(Weights[layer + 1].x - 1, Weights[layer + 1].y, false); var DZ = ManagedMatrix.DSigm(Z[layer]); D[current] = (new ManagedArray(W.x, D[prev].y, false)); ManagedOps.Copy2D(W, Weights[layer + 1], 1, 0); ManagedMatrix.Multiply(D[current], D[prev], W); ManagedMatrix.Product(D[current], DZ); ManagedOps.Free(W, DZ); current++; } for (var layer = 0; layer < Weights.GetLength(0); layer++) { var tD = ManagedMatrix.Transpose(D[Weights.GetLength(0) - layer - 1]); Deltas[layer] = (new ManagedArray(Weights[layer].x, Weights[layer].y, false)); ManagedMatrix.Multiply(Deltas[layer], tD, X[layer]); ManagedMatrix.Multiply(Deltas[layer], 1.0 / input.y); ManagedOps.Free(tD); } Cost = 0.0; L2 = 0.0; for (var i = 0; i < Y_true.Length(); i++) { L2 += 0.5 * (D[0][i] * D[0][i]); Cost += (-Y_true[i] * Math.Log(Y[i]) - (1 - Y_true[i]) * Math.Log(1 - Y[i])); } Cost /= input.y; L2 /= input.y; // Cleanup for (var layer = 0; layer < Weights.GetLength(0); layer++) { ManagedOps.Free(D[layer], X[layer], Z[layer]); } }
// Backward propagation public void BackPropagation(ManagedArray training) { // add bias column to input layer var InputBias = new ManagedArray(1, training.y); ManagedOps.Set(InputBias, 1.0); // x = cbind(array(1, c(nrow(training_set), 1)), training_set) var x = ManagedMatrix.CBind(InputBias, training); // compute intermediate delta values per layer // d3 = y_k - y_matrix var D3 = ManagedMatrix.Diff(Yk, Y_output); // d2 = d3 %*% w_kj[, 2:ncol(w_kj)] * nnet_dsigmoid(z_2) var sWkj = new ManagedArray(Wkj.x - 1, Wkj.y); ManagedOps.Copy2D(sWkj, Wkj, 1, 0); var D2 = new ManagedArray(sWkj.x, D3.y); ManagedMatrix.Multiply(D2, D3, sWkj); var DZ2 = ManagedMatrix.DSigm(Z2); ManagedMatrix.Product(D2, DZ2); // dWji = (t(d2) %*% x) // dWkj = (t(d3) % *% a_2) var tD2 = new ManagedArray(D2.y, D2.x); var tD3 = new ManagedArray(D3.y, D3.x); ManagedMatrix.Transpose(tD2, D2); ManagedMatrix.Transpose(tD3, D3); DeltaWji = new ManagedArray(Wji.x, Wji.y); DeltaWkj = new ManagedArray(Wkj.x, Wkj.y); ManagedMatrix.Multiply(DeltaWji, tD2, x); ManagedMatrix.Multiply(DeltaWkj, tD3, A2); // cost = sum(-y_matrix * log(y_k) - (1 - y_matrix) * log(1 - y_k)) Cost = 0.0; L2 = 0.0; for (var i = 0; i < Y_output.Length(); i++) { L2 += 0.5 * (D3[i] * D3[i]); Cost += (-Y_output[i] * Math.Log(Yk[i]) - (1 - Y_output[i]) * Math.Log(1 - Yk[i])); } // cost = cost / m // dWji = dWji / m // dWkj = dWkj / m Cost /= training.y; L2 /= training.y; ManagedMatrix.Multiply(DeltaWji, 1.0 / training.y); ManagedMatrix.Multiply(DeltaWkj, 1.0 / training.y); // cleanup ManagedOps.Free(D2, D3, DZ2, InputBias); ManagedOps.Free(sWkj, tD2, tD3, x); // cleanup of arrays allocated in Forward ManagedOps.Free(A2, Yk, Z2); }