// Forward Propagation public void Forward(ManagedArray training) { // add bias column to input layer var InputBias = new ManagedArray(1, training.y); ManagedOps.Set(InputBias, 1.0); // x = cbind(array(1, c(nrow(training_set), 1)), training_set) var x = ManagedMatrix.CBind(InputBias, training); // compute hidden layer activation // z_2 = x %*% t(w_ji) var tWji = new ManagedArray(Wji.y, Wji.x); ManagedMatrix.Transpose(tWji, Wji); Z2 = new ManagedArray(tWji.x, x.y); ManagedMatrix.Multiply(Z2, x, tWji); // z_j = nnet_sigmoid(z_2) var Zj = ManagedMatrix.Sigm(Z2); // add bias column to hidden layer output var HiddenBias = new ManagedArray(1, Zj.y); ManagedOps.Set(HiddenBias, 1.0); // a_2 = cbind(array(1, c(nrow(z_j), 1)), z_j) A2 = ManagedMatrix.CBind(HiddenBias, Zj); // compute output layer var tWkj = new ManagedArray(Wkj.y, Wkj.x); ManagedMatrix.Transpose(tWkj, Wkj); // y_k = nnet_sigmoid(a_2 %*% t(w_kj)) var A2Wkj = new ManagedArray(tWkj.x, A2.y); ManagedMatrix.Multiply(A2Wkj, A2, tWkj); Yk = ManagedMatrix.Sigm(A2Wkj); // cleanup ManagedOps.Free(A2Wkj, HiddenBias, InputBias); ManagedOps.Free(tWkj, tWji, x, Zj); }
// Forward Propagation public void Forward(ManagedArray input) { // create bias column var InputBias = new ManagedArray(1, input.y, false); ManagedOps.Set(InputBias, 1.0); // Compute input activations var last = Weights.GetLength(0) - 1; for (var layer = 0; layer < Weights.GetLength(0); layer++) { var XX = layer == 0 ? ManagedMatrix.CBind(InputBias, input) : ManagedMatrix.CBind(InputBias, Activations[layer - 1]); var tW = ManagedMatrix.Transpose(Weights[layer]); var ZZ = ManagedMatrix.Multiply(XX, tW); X[layer] = XX; Z[layer] = ZZ; if (layer != last) { var SS = ManagedMatrix.Sigm(ZZ); Activations[layer] = SS; } else { ManagedOps.Free(Y); Y = ManagedMatrix.Sigm(ZZ); } ManagedOps.Free(tW); } // Cleanup for (var layer = 0; layer < Activations.GetLength(0); layer++) { ManagedOps.Free(Activations[layer]); } ManagedOps.Free(InputBias); }
// Backward propagation public void BackPropagation(ManagedArray training) { // add bias column to input layer var InputBias = new ManagedArray(1, training.y); ManagedOps.Set(InputBias, 1.0); // x = cbind(array(1, c(nrow(training_set), 1)), training_set) var x = ManagedMatrix.CBind(InputBias, training); // compute intermediate delta values per layer // d3 = y_k - y_matrix var D3 = ManagedMatrix.Diff(Yk, Y_output); // d2 = d3 %*% w_kj[, 2:ncol(w_kj)] * nnet_dsigmoid(z_2) var sWkj = new ManagedArray(Wkj.x - 1, Wkj.y); ManagedOps.Copy2D(sWkj, Wkj, 1, 0); var D2 = new ManagedArray(sWkj.x, D3.y); ManagedMatrix.Multiply(D2, D3, sWkj); var DZ2 = ManagedMatrix.DSigm(Z2); ManagedMatrix.Product(D2, DZ2); // dWji = (t(d2) %*% x) // dWkj = (t(d3) % *% a_2) var tD2 = new ManagedArray(D2.y, D2.x); var tD3 = new ManagedArray(D3.y, D3.x); ManagedMatrix.Transpose(tD2, D2); ManagedMatrix.Transpose(tD3, D3); DeltaWji = new ManagedArray(Wji.x, Wji.y); DeltaWkj = new ManagedArray(Wkj.x, Wkj.y); ManagedMatrix.Multiply(DeltaWji, tD2, x); ManagedMatrix.Multiply(DeltaWkj, tD3, A2); // cost = sum(-y_matrix * log(y_k) - (1 - y_matrix) * log(1 - y_k)) Cost = 0.0; L2 = 0.0; for (var i = 0; i < Y_output.Length(); i++) { L2 += 0.5 * (D3[i] * D3[i]); Cost += (-Y_output[i] * Math.Log(Yk[i]) - (1 - Y_output[i]) * Math.Log(1 - Yk[i])); } // cost = cost / m // dWji = dWji / m // dWkj = dWkj / m Cost /= training.y; L2 /= training.y; ManagedMatrix.Multiply(DeltaWji, 1.0 / training.y); ManagedMatrix.Multiply(DeltaWkj, 1.0 / training.y); // cleanup ManagedOps.Free(D2, D3, DZ2, InputBias); ManagedOps.Free(sWkj, tD2, tD3, x); // cleanup of arrays allocated in Forward ManagedOps.Free(A2, Yk, Z2); }
// Update Network Weights based on computed errors public void BackPropagation(ManagedArray batch) { var n = Layers.Count; var last = n - 1; var batchz = Layers[last].Activation.z; // backprop deltas ManagedOps.Free(OutputDelta, OutputError); OutputDelta = new ManagedArray(Output, false); OutputError = new ManagedArray(Output, false); for (var x = 0; x < Output.Length(); x++) { // error OutputError[x] = Output[x] - batch[x]; // output delta OutputDelta[x] = OutputError[x] * (Output[x] * (1 - Output[x])); } // Loss Function L = 0.5 * ManagedMatrix.SquareSum(OutputError) / batch.x; ManagedOps.Free(WeightsTransposed, FeatureVectorDelta); FeatureVectorDelta = new ManagedArray(FeatureVector, false); WeightsTransposed = new ManagedArray(Weights, false); // feature vector delta ManagedMatrix.Transpose(WeightsTransposed, Weights); ManagedMatrix.Multiply(FeatureVectorDelta, WeightsTransposed, OutputDelta); // only conv layers has sigm function if (Layers[last].Type == LayerTypes.Convolution) { for (var x = 0; x < FeatureVectorDelta.Length(); x++) { FeatureVectorDelta[x] = FeatureVectorDelta[x] * FeatureVector[x] * (1 - FeatureVector[x]); } } // reshape feature vector deltas into output map style var MapSize = Layers[last].Activation.x * Layers[last].Activation.y; var temp1D = new ManagedArray(1, MapSize, false); var temp2D = new ManagedArray(Layers[last].Activation.x, Layers[last].Activation.y, false); ManagedOps.Free(Layers[last].Delta); Layers[last].Delta = new ManagedArray(Layers[last].Activation, false); for (var j = 0; j < Layers[last].Activation.i; j++) { for (var ii = 0; ii < batchz; ii++) { ManagedOps.Copy2D(temp1D, FeatureVectorDelta, ii, j * MapSize); temp1D.Reshape(Layers[last].Activation.x, Layers[last].Activation.y); ManagedMatrix.Transpose(temp2D, temp1D); ManagedOps.Copy2D4D(Layers[last].Delta, temp2D, ii, j); temp1D.Reshape(1, MapSize); } } ManagedOps.Free(temp1D, temp2D); for (var l = n - 2; l >= 0; l--) { var next = l + 1; if (Layers[l].Type == LayerTypes.Convolution) { ManagedOps.Free(Layers[l].Delta); Layers[l].Delta = new ManagedArray(Layers[l].Activation, false); var xx = Layers[next].Scale * Layers[next].Activation.x; var yy = Layers[next].Scale * Layers[next].Activation.y; var FeatureMap = new ManagedArray(Layers[next].Activation.x, Layers[next].Activation.y, false); var FeatureMapExpanded = new ManagedArray(xx, yy, false); var Activation = new ManagedArray(xx, yy, false); var Delta = new ManagedArray(xx, yy, false); var Scale = (1.0 / (Layers[next].Scale * Layers[next].Scale)); for (var j = 0; j < Layers[l].Activation.i; j++) { for (var z = 0; z < batchz; z++) { ManagedOps.Copy4D2D(FeatureMap, Layers[next].Delta, z, j); ManagedMatrix.Expand(FeatureMap, Layers[next].Scale, Layers[next].Scale, FeatureMapExpanded); ManagedOps.Copy4D2D(Activation, Layers[l].Activation, z, j); for (var x = 0; x < Delta.Length(); x++) { Delta[x] = Activation[x] * (1 - Activation[x]) * FeatureMapExpanded[x] * Scale; } ManagedOps.Copy2D4D(Layers[l].Delta, Delta, z, j); } } ManagedOps.Free(FeatureMap, FeatureMapExpanded, Activation, Delta); } else if (Layers[l].Type == LayerTypes.Subsampling) { ManagedOps.Free(Layers[l].Delta); Layers[l].Delta = new ManagedArray(Layers[l].Activation, false); var Delta = new ManagedArray(Layers[next].Activation.x, Layers[next].Activation.y, batchz); var FeatureMap = new ManagedArray(Layers[next].KernelSize, Layers[next].KernelSize, false); var rot180 = new ManagedArray(Layers[next].KernelSize, Layers[next].KernelSize, false); var z = new ManagedArray(Layers[l].Activation.x, Layers[l].Activation.y, batchz); var ztemp = new ManagedArray(Layers[l].Activation.x, Layers[l].Activation.y, batchz, false); for (var i = 0; i < Layers[l].Activation.i; i++) { ManagedOps.Set(z, 0.0); for (var j = 0; j < Layers[next].Activation.i; j++) { ManagedOps.Copy4DIJ2D(FeatureMap, Layers[next].FeatureMap, i, j); ManagedMatrix.Rotate180(rot180, FeatureMap); ManagedOps.Copy4D3D(Delta, Layers[next].Delta, j); ManagedConvolution.Full(Delta, rot180, ztemp); ManagedMatrix.Add(z, ztemp); } ManagedOps.Copy3D4D(Layers[l].Delta, z, i); } ManagedOps.Free(Delta, FeatureMap, rot180, z, ztemp); } } // calc gradients for (var l = 1; l < n; l++) { var prev = l - 1; if (Layers[l].Type == LayerTypes.Convolution) { ManagedOps.Free(Layers[l].DeltaFeatureMap, Layers[l].DeltaBias); Layers[l].DeltaFeatureMap = new ManagedArray(Layers[l].FeatureMap, false); Layers[l].DeltaBias = new ManagedArray(Layers[l].OutputMaps, false); var FeatureMapDelta = new ManagedArray(Layers[l].FeatureMap.x, Layers[l].FeatureMap.y, Layers[l].FeatureMap.z, false); // d[j] var dtemp = new ManagedArray(Layers[l].Activation.x, Layers[l].Activation.y, batchz, false); // a[i] and flipped var atemp = new ManagedArray(Layers[prev].Activation.x, Layers[prev].Activation.y, batchz, false); var ftemp = new ManagedArray(Layers[prev].Activation.x, Layers[prev].Activation.y, batchz, false); for (var j = 0; j < Layers[l].Activation.i; j++) { ManagedOps.Copy4D3D(dtemp, Layers[l].Delta, j); for (var i = 0; i < Layers[prev].Activation.i; i++) { ManagedOps.Copy4D3D(atemp, Layers[prev].Activation, i); ManagedMatrix.FlipAll(ftemp, atemp); ManagedConvolution.Valid(ftemp, dtemp, FeatureMapDelta); ManagedMatrix.Multiply(FeatureMapDelta, 1.0 / batchz); ManagedOps.Copy2D4DIJ(Layers[l].DeltaFeatureMap, FeatureMapDelta, i, j); } Layers[l].DeltaBias[j] = ManagedMatrix.Sum(dtemp) / batchz; } ManagedOps.Free(FeatureMapDelta, dtemp, atemp, ftemp); } } var FeatureVectorTransposed = new ManagedArray(FeatureVector, false); ManagedMatrix.Transpose(FeatureVectorTransposed, FeatureVector); ManagedOps.Free(WeightsDelta, BiasDelta); WeightsDelta = new ManagedArray(Weights, false); BiasDelta = new ManagedArray(Bias, false); ManagedMatrix.Multiply(WeightsDelta, OutputDelta, FeatureVectorTransposed); ManagedMatrix.Multiply(WeightsDelta, 1.0 / batchz); ManagedMatrix.Mean(BiasDelta, OutputDelta, 0); ManagedOps.Free(FeatureVectorTransposed); }
// Compute Forward Transform on 3D Input public void FeedForward(ManagedArray batch, bool pool = false) { var n = Layers.Count; var last = n - 1; var InputMaps = 1; ManagedOps.Free(Layers[0].Activation); Layers[0].Activation = new ManagedArray(batch, false); ManagedOps.Copy4D3D(Layers[0].Activation, batch, 0); for (var l = 1; l < n; l++) { var prev = l - 1; if (Layers[l].Type == LayerTypes.Convolution) { var zx = Layers[prev].Activation.x - Layers[l].KernelSize + 1; var zy = Layers[prev].Activation.y - Layers[l].KernelSize + 1; var zz = batch.z; ManagedOps.Free(Layers[l].Activation); Layers[l].Activation = new ManagedArray(zx, zy, zz, Layers[l].OutputMaps, 1, false); var Activation = new ManagedArray(Layers[prev].Activation.x, Layers[prev].Activation.y, batch.z, false); var FeatureMap = new ManagedArray(Layers[l].KernelSize, Layers[l].KernelSize, false); // create temp output map var z = new ManagedArray(zx, zy, zz); var ztemp = new ManagedArray(zx, zy, zz, false); // !!below can probably be handled by insane matrix operations for (var j = 0; j < Layers[l].OutputMaps; j++) // for each output map { ManagedOps.Set(z, 0.0); for (var i = 0; i < InputMaps; i++) { // copy Layers ManagedOps.Copy4D3D(Activation, Layers[prev].Activation, i); ManagedOps.Copy4DIJ2D(FeatureMap, Layers[l].FeatureMap, i, j); // convolve with corresponding kernel and add to temp output map ManagedConvolution.Valid(Activation, FeatureMap, ztemp); ManagedMatrix.Add(z, ztemp); } // add bias, pass through nonlinearity ManagedMatrix.Add(z, Layers[l].Bias[j]); var sigm = ManagedMatrix.Sigm(z); ManagedOps.Copy3D4D(Layers[l].Activation, sigm, j); ManagedOps.Free(sigm); } ManagedOps.Free(Activation, FeatureMap, z, ztemp); InputMaps = Layers[l].OutputMaps; } else if (Layers[l].Type == LayerTypes.Subsampling) { // downsample // generate downsampling kernel var scale = (double)(Layers[l].Scale * Layers[l].Scale); var FeatureMap = new ManagedArray(Layers[l].Scale, Layers[l].Scale, false); ManagedOps.Set(FeatureMap, 1.0 / scale); ManagedOps.Free(Layers[l].Activation); Layers[l].Activation = new ManagedArray(Layers[prev].Activation.x / Layers[l].Scale, Layers[prev].Activation.y / Layers[l].Scale, batch.z, InputMaps, 1); var Activation = new ManagedArray(Layers[prev].Activation.x, Layers[prev].Activation.y, batch.z, false); var z = new ManagedArray(Layers[prev].Activation.x - Layers[l].Scale + 1, Layers[prev].Activation.y - Layers[l].Scale + 1, batch.z, false); for (var j = 0; j < InputMaps; j++) { // copy Layers ManagedOps.Copy4D3D(Activation, Layers[prev].Activation, j); // Subsample ManagedConvolution.Valid(Activation, FeatureMap, z); if (pool) { ManagedOps.Pool3D4D(Layers[l].Activation, z, j, Layers[l].Scale); } else { ManagedOps.Copy3D4D(Layers[l].Activation, z, j, Layers[l].Scale); } } ManagedOps.Free(Activation, FeatureMap, z); } } var MapSize = Layers[last].Activation.x * Layers[last].Activation.y; ManagedOps.Free(FeatureVector); FeatureVector = new ManagedArray(batch.z, MapSize * Layers[last].Activation.i); var temp1D = new ManagedArray(Layers[last].Activation.y, Layers[last].Activation.x, false); var temp2D = new ManagedArray(Layers[last].Activation.x, Layers[last].Activation.y, false); // concatenate all end layer feature maps into vector for (var j = 0; j < Layers[last].Activation.i; j++) { for (var ii = 0; ii < batch.z; ii++) { // Use Row-major in flattening the feature map ManagedOps.Copy4D2D(temp2D, Layers[last].Activation, ii, j); ManagedMatrix.Transpose(temp1D, temp2D); temp1D.Reshape(1, MapSize); ManagedOps.Copy2DOffset(FeatureVector, temp1D, ii, j * MapSize); } } var WeightsFeatureVector = new ManagedArray(FeatureVector.x, Weights.y, false); ManagedMatrix.Multiply(WeightsFeatureVector, Weights, FeatureVector); var repmat = new ManagedArray(batch.z, Bias.Length(), false); ManagedMatrix.Expand(Bias, batch.z, 1, repmat); ManagedMatrix.Add(WeightsFeatureVector, repmat); // feedforward into output perceptrons ManagedOps.Free(Output); Output = ManagedMatrix.Sigm(WeightsFeatureVector); ManagedOps.Free(WeightsFeatureVector, repmat, temp1D, temp2D); }