// Update Network Weights based on computed errors public void BackPropagation(ManagedArray batch) { var n = Layers.Count; var last = n - 1; var batchz = Layers[last].Activation.z; // backprop deltas ManagedOps.Free(OutputDelta, OutputError); OutputDelta = new ManagedArray(Output, false); OutputError = new ManagedArray(Output, false); for (var x = 0; x < Output.Length(); x++) { // error OutputError[x] = Output[x] - batch[x]; // output delta OutputDelta[x] = OutputError[x] * (Output[x] * (1 - Output[x])); } // Loss Function L = 0.5 * ManagedMatrix.SquareSum(OutputError) / batch.x; ManagedOps.Free(WeightsTransposed, FeatureVectorDelta); FeatureVectorDelta = new ManagedArray(FeatureVector, false); WeightsTransposed = new ManagedArray(Weights, false); // feature vector delta ManagedMatrix.Transpose(WeightsTransposed, Weights); ManagedMatrix.Multiply(FeatureVectorDelta, WeightsTransposed, OutputDelta); // only conv layers has sigm function if (Layers[last].Type == LayerTypes.Convolution) { for (var x = 0; x < FeatureVectorDelta.Length(); x++) { FeatureVectorDelta[x] = FeatureVectorDelta[x] * FeatureVector[x] * (1 - FeatureVector[x]); } } // reshape feature vector deltas into output map style var MapSize = Layers[last].Activation.x * Layers[last].Activation.y; var temp1D = new ManagedArray(1, MapSize, false); var temp2D = new ManagedArray(Layers[last].Activation.x, Layers[last].Activation.y, false); ManagedOps.Free(Layers[last].Delta); Layers[last].Delta = new ManagedArray(Layers[last].Activation, false); for (var j = 0; j < Layers[last].Activation.i; j++) { for (var ii = 0; ii < batchz; ii++) { ManagedOps.Copy2D(temp1D, FeatureVectorDelta, ii, j * MapSize); temp1D.Reshape(Layers[last].Activation.x, Layers[last].Activation.y); ManagedMatrix.Transpose(temp2D, temp1D); ManagedOps.Copy2D4D(Layers[last].Delta, temp2D, ii, j); temp1D.Reshape(1, MapSize); } } ManagedOps.Free(temp1D, temp2D); for (var l = n - 2; l >= 0; l--) { var next = l + 1; if (Layers[l].Type == LayerTypes.Convolution) { ManagedOps.Free(Layers[l].Delta); Layers[l].Delta = new ManagedArray(Layers[l].Activation, false); var xx = Layers[next].Scale * Layers[next].Activation.x; var yy = Layers[next].Scale * Layers[next].Activation.y; var FeatureMap = new ManagedArray(Layers[next].Activation.x, Layers[next].Activation.y, false); var FeatureMapExpanded = new ManagedArray(xx, yy, false); var Activation = new ManagedArray(xx, yy, false); var Delta = new ManagedArray(xx, yy, false); var Scale = (1.0 / (Layers[next].Scale * Layers[next].Scale)); for (var j = 0; j < Layers[l].Activation.i; j++) { for (var z = 0; z < batchz; z++) { ManagedOps.Copy4D2D(FeatureMap, Layers[next].Delta, z, j); ManagedMatrix.Expand(FeatureMap, Layers[next].Scale, Layers[next].Scale, FeatureMapExpanded); ManagedOps.Copy4D2D(Activation, Layers[l].Activation, z, j); for (var x = 0; x < Delta.Length(); x++) { Delta[x] = Activation[x] * (1 - Activation[x]) * FeatureMapExpanded[x] * Scale; } ManagedOps.Copy2D4D(Layers[l].Delta, Delta, z, j); } } ManagedOps.Free(FeatureMap, FeatureMapExpanded, Activation, Delta); } else if (Layers[l].Type == LayerTypes.Subsampling) { ManagedOps.Free(Layers[l].Delta); Layers[l].Delta = new ManagedArray(Layers[l].Activation, false); var Delta = new ManagedArray(Layers[next].Activation.x, Layers[next].Activation.y, batchz); var FeatureMap = new ManagedArray(Layers[next].KernelSize, Layers[next].KernelSize, false); var rot180 = new ManagedArray(Layers[next].KernelSize, Layers[next].KernelSize, false); var z = new ManagedArray(Layers[l].Activation.x, Layers[l].Activation.y, batchz); var ztemp = new ManagedArray(Layers[l].Activation.x, Layers[l].Activation.y, batchz, false); for (var i = 0; i < Layers[l].Activation.i; i++) { ManagedOps.Set(z, 0.0); for (var j = 0; j < Layers[next].Activation.i; j++) { ManagedOps.Copy4DIJ2D(FeatureMap, Layers[next].FeatureMap, i, j); ManagedMatrix.Rotate180(rot180, FeatureMap); ManagedOps.Copy4D3D(Delta, Layers[next].Delta, j); ManagedConvolution.Full(Delta, rot180, ztemp); ManagedMatrix.Add(z, ztemp); } ManagedOps.Copy3D4D(Layers[l].Delta, z, i); } ManagedOps.Free(Delta, FeatureMap, rot180, z, ztemp); } } // calc gradients for (var l = 1; l < n; l++) { var prev = l - 1; if (Layers[l].Type == LayerTypes.Convolution) { ManagedOps.Free(Layers[l].DeltaFeatureMap, Layers[l].DeltaBias); Layers[l].DeltaFeatureMap = new ManagedArray(Layers[l].FeatureMap, false); Layers[l].DeltaBias = new ManagedArray(Layers[l].OutputMaps, false); var FeatureMapDelta = new ManagedArray(Layers[l].FeatureMap.x, Layers[l].FeatureMap.y, Layers[l].FeatureMap.z, false); // d[j] var dtemp = new ManagedArray(Layers[l].Activation.x, Layers[l].Activation.y, batchz, false); // a[i] and flipped var atemp = new ManagedArray(Layers[prev].Activation.x, Layers[prev].Activation.y, batchz, false); var ftemp = new ManagedArray(Layers[prev].Activation.x, Layers[prev].Activation.y, batchz, false); for (var j = 0; j < Layers[l].Activation.i; j++) { ManagedOps.Copy4D3D(dtemp, Layers[l].Delta, j); for (var i = 0; i < Layers[prev].Activation.i; i++) { ManagedOps.Copy4D3D(atemp, Layers[prev].Activation, i); ManagedMatrix.FlipAll(ftemp, atemp); ManagedConvolution.Valid(ftemp, dtemp, FeatureMapDelta); ManagedMatrix.Multiply(FeatureMapDelta, 1.0 / batchz); ManagedOps.Copy2D4DIJ(Layers[l].DeltaFeatureMap, FeatureMapDelta, i, j); } Layers[l].DeltaBias[j] = ManagedMatrix.Sum(dtemp) / batchz; } ManagedOps.Free(FeatureMapDelta, dtemp, atemp, ftemp); } } var FeatureVectorTransposed = new ManagedArray(FeatureVector, false); ManagedMatrix.Transpose(FeatureVectorTransposed, FeatureVector); ManagedOps.Free(WeightsDelta, BiasDelta); WeightsDelta = new ManagedArray(Weights, false); BiasDelta = new ManagedArray(Bias, false); ManagedMatrix.Multiply(WeightsDelta, OutputDelta, FeatureVectorTransposed); ManagedMatrix.Multiply(WeightsDelta, 1.0 / batchz); ManagedMatrix.Mean(BiasDelta, OutputDelta, 0); ManagedOps.Free(FeatureVectorTransposed); }
// Compute Forward Transform on 3D Input public void FeedForward(ManagedArray batch, bool pool = false) { var n = Layers.Count; var last = n - 1; var InputMaps = 1; ManagedOps.Free(Layers[0].Activation); Layers[0].Activation = new ManagedArray(batch, false); ManagedOps.Copy4D3D(Layers[0].Activation, batch, 0); for (var l = 1; l < n; l++) { var prev = l - 1; if (Layers[l].Type == LayerTypes.Convolution) { var zx = Layers[prev].Activation.x - Layers[l].KernelSize + 1; var zy = Layers[prev].Activation.y - Layers[l].KernelSize + 1; var zz = batch.z; ManagedOps.Free(Layers[l].Activation); Layers[l].Activation = new ManagedArray(zx, zy, zz, Layers[l].OutputMaps, 1, false); var Activation = new ManagedArray(Layers[prev].Activation.x, Layers[prev].Activation.y, batch.z, false); var FeatureMap = new ManagedArray(Layers[l].KernelSize, Layers[l].KernelSize, false); // create temp output map var z = new ManagedArray(zx, zy, zz); var ztemp = new ManagedArray(zx, zy, zz, false); // !!below can probably be handled by insane matrix operations for (var j = 0; j < Layers[l].OutputMaps; j++) // for each output map { ManagedOps.Set(z, 0.0); for (var i = 0; i < InputMaps; i++) { // copy Layers ManagedOps.Copy4D3D(Activation, Layers[prev].Activation, i); ManagedOps.Copy4DIJ2D(FeatureMap, Layers[l].FeatureMap, i, j); // convolve with corresponding kernel and add to temp output map ManagedConvolution.Valid(Activation, FeatureMap, ztemp); ManagedMatrix.Add(z, ztemp); } // add bias, pass through nonlinearity ManagedMatrix.Add(z, Layers[l].Bias[j]); var sigm = ManagedMatrix.Sigm(z); ManagedOps.Copy3D4D(Layers[l].Activation, sigm, j); ManagedOps.Free(sigm); } ManagedOps.Free(Activation, FeatureMap, z, ztemp); InputMaps = Layers[l].OutputMaps; } else if (Layers[l].Type == LayerTypes.Subsampling) { // downsample // generate downsampling kernel var scale = (double)(Layers[l].Scale * Layers[l].Scale); var FeatureMap = new ManagedArray(Layers[l].Scale, Layers[l].Scale, false); ManagedOps.Set(FeatureMap, 1.0 / scale); ManagedOps.Free(Layers[l].Activation); Layers[l].Activation = new ManagedArray(Layers[prev].Activation.x / Layers[l].Scale, Layers[prev].Activation.y / Layers[l].Scale, batch.z, InputMaps, 1); var Activation = new ManagedArray(Layers[prev].Activation.x, Layers[prev].Activation.y, batch.z, false); var z = new ManagedArray(Layers[prev].Activation.x - Layers[l].Scale + 1, Layers[prev].Activation.y - Layers[l].Scale + 1, batch.z, false); for (var j = 0; j < InputMaps; j++) { // copy Layers ManagedOps.Copy4D3D(Activation, Layers[prev].Activation, j); // Subsample ManagedConvolution.Valid(Activation, FeatureMap, z); if (pool) { ManagedOps.Pool3D4D(Layers[l].Activation, z, j, Layers[l].Scale); } else { ManagedOps.Copy3D4D(Layers[l].Activation, z, j, Layers[l].Scale); } } ManagedOps.Free(Activation, FeatureMap, z); } } var MapSize = Layers[last].Activation.x * Layers[last].Activation.y; ManagedOps.Free(FeatureVector); FeatureVector = new ManagedArray(batch.z, MapSize * Layers[last].Activation.i); var temp1D = new ManagedArray(Layers[last].Activation.y, Layers[last].Activation.x, false); var temp2D = new ManagedArray(Layers[last].Activation.x, Layers[last].Activation.y, false); // concatenate all end layer feature maps into vector for (var j = 0; j < Layers[last].Activation.i; j++) { for (var ii = 0; ii < batch.z; ii++) { // Use Row-major in flattening the feature map ManagedOps.Copy4D2D(temp2D, Layers[last].Activation, ii, j); ManagedMatrix.Transpose(temp1D, temp2D); temp1D.Reshape(1, MapSize); ManagedOps.Copy2DOffset(FeatureVector, temp1D, ii, j * MapSize); } } var WeightsFeatureVector = new ManagedArray(FeatureVector.x, Weights.y, false); ManagedMatrix.Multiply(WeightsFeatureVector, Weights, FeatureVector); var repmat = new ManagedArray(batch.z, Bias.Length(), false); ManagedMatrix.Expand(Bias, batch.z, 1, repmat); ManagedMatrix.Add(WeightsFeatureVector, repmat); // feedforward into output perceptrons ManagedOps.Free(Output); Output = ManagedMatrix.Sigm(WeightsFeatureVector); ManagedOps.Free(WeightsFeatureVector, repmat, temp1D, temp2D); }