// Update Network Weights based on computed errors public void BackPropagation(ManagedArray batch) { var n = Layers.Count; var last = n - 1; var batchz = Layers[last].Activation.z; // backprop deltas ManagedOps.Free(OutputDelta, OutputError); OutputDelta = new ManagedArray(Output, false); OutputError = new ManagedArray(Output, false); for (var x = 0; x < Output.Length(); x++) { // error OutputError[x] = Output[x] - batch[x]; // output delta OutputDelta[x] = OutputError[x] * (Output[x] * (1 - Output[x])); } // Loss Function L = 0.5 * ManagedMatrix.SquareSum(OutputError) / batch.x; ManagedOps.Free(WeightsTransposed, FeatureVectorDelta); FeatureVectorDelta = new ManagedArray(FeatureVector, false); WeightsTransposed = new ManagedArray(Weights, false); // feature vector delta ManagedMatrix.Transpose(WeightsTransposed, Weights); ManagedMatrix.Multiply(FeatureVectorDelta, WeightsTransposed, OutputDelta); // only conv layers has sigm function if (Layers[last].Type == LayerTypes.Convolution) { for (var x = 0; x < FeatureVectorDelta.Length(); x++) { FeatureVectorDelta[x] = FeatureVectorDelta[x] * FeatureVector[x] * (1 - FeatureVector[x]); } } // reshape feature vector deltas into output map style var MapSize = Layers[last].Activation.x * Layers[last].Activation.y; var temp1D = new ManagedArray(1, MapSize, false); var temp2D = new ManagedArray(Layers[last].Activation.x, Layers[last].Activation.y, false); ManagedOps.Free(Layers[last].Delta); Layers[last].Delta = new ManagedArray(Layers[last].Activation, false); for (var j = 0; j < Layers[last].Activation.i; j++) { for (var ii = 0; ii < batchz; ii++) { ManagedOps.Copy2D(temp1D, FeatureVectorDelta, ii, j * MapSize); temp1D.Reshape(Layers[last].Activation.x, Layers[last].Activation.y); ManagedMatrix.Transpose(temp2D, temp1D); ManagedOps.Copy2D4D(Layers[last].Delta, temp2D, ii, j); temp1D.Reshape(1, MapSize); } } ManagedOps.Free(temp1D, temp2D); for (var l = n - 2; l >= 0; l--) { var next = l + 1; if (Layers[l].Type == LayerTypes.Convolution) { ManagedOps.Free(Layers[l].Delta); Layers[l].Delta = new ManagedArray(Layers[l].Activation, false); var xx = Layers[next].Scale * Layers[next].Activation.x; var yy = Layers[next].Scale * Layers[next].Activation.y; var FeatureMap = new ManagedArray(Layers[next].Activation.x, Layers[next].Activation.y, false); var FeatureMapExpanded = new ManagedArray(xx, yy, false); var Activation = new ManagedArray(xx, yy, false); var Delta = new ManagedArray(xx, yy, false); var Scale = (1.0 / (Layers[next].Scale * Layers[next].Scale)); for (var j = 0; j < Layers[l].Activation.i; j++) { for (var z = 0; z < batchz; z++) { ManagedOps.Copy4D2D(FeatureMap, Layers[next].Delta, z, j); ManagedMatrix.Expand(FeatureMap, Layers[next].Scale, Layers[next].Scale, FeatureMapExpanded); ManagedOps.Copy4D2D(Activation, Layers[l].Activation, z, j); for (var x = 0; x < Delta.Length(); x++) { Delta[x] = Activation[x] * (1 - Activation[x]) * FeatureMapExpanded[x] * Scale; } ManagedOps.Copy2D4D(Layers[l].Delta, Delta, z, j); } } ManagedOps.Free(FeatureMap, FeatureMapExpanded, Activation, Delta); } else if (Layers[l].Type == LayerTypes.Subsampling) { ManagedOps.Free(Layers[l].Delta); Layers[l].Delta = new ManagedArray(Layers[l].Activation, false); var Delta = new ManagedArray(Layers[next].Activation.x, Layers[next].Activation.y, batchz); var FeatureMap = new ManagedArray(Layers[next].KernelSize, Layers[next].KernelSize, false); var rot180 = new ManagedArray(Layers[next].KernelSize, Layers[next].KernelSize, false); var z = new ManagedArray(Layers[l].Activation.x, Layers[l].Activation.y, batchz); var ztemp = new ManagedArray(Layers[l].Activation.x, Layers[l].Activation.y, batchz, false); for (var i = 0; i < Layers[l].Activation.i; i++) { ManagedOps.Set(z, 0.0); for (var j = 0; j < Layers[next].Activation.i; j++) { ManagedOps.Copy4DIJ2D(FeatureMap, Layers[next].FeatureMap, i, j); ManagedMatrix.Rotate180(rot180, FeatureMap); ManagedOps.Copy4D3D(Delta, Layers[next].Delta, j); ManagedConvolution.Full(Delta, rot180, ztemp); ManagedMatrix.Add(z, ztemp); } ManagedOps.Copy3D4D(Layers[l].Delta, z, i); } ManagedOps.Free(Delta, FeatureMap, rot180, z, ztemp); } } // calc gradients for (var l = 1; l < n; l++) { var prev = l - 1; if (Layers[l].Type == LayerTypes.Convolution) { ManagedOps.Free(Layers[l].DeltaFeatureMap, Layers[l].DeltaBias); Layers[l].DeltaFeatureMap = new ManagedArray(Layers[l].FeatureMap, false); Layers[l].DeltaBias = new ManagedArray(Layers[l].OutputMaps, false); var FeatureMapDelta = new ManagedArray(Layers[l].FeatureMap.x, Layers[l].FeatureMap.y, Layers[l].FeatureMap.z, false); // d[j] var dtemp = new ManagedArray(Layers[l].Activation.x, Layers[l].Activation.y, batchz, false); // a[i] and flipped var atemp = new ManagedArray(Layers[prev].Activation.x, Layers[prev].Activation.y, batchz, false); var ftemp = new ManagedArray(Layers[prev].Activation.x, Layers[prev].Activation.y, batchz, false); for (var j = 0; j < Layers[l].Activation.i; j++) { ManagedOps.Copy4D3D(dtemp, Layers[l].Delta, j); for (var i = 0; i < Layers[prev].Activation.i; i++) { ManagedOps.Copy4D3D(atemp, Layers[prev].Activation, i); ManagedMatrix.FlipAll(ftemp, atemp); ManagedConvolution.Valid(ftemp, dtemp, FeatureMapDelta); ManagedMatrix.Multiply(FeatureMapDelta, 1.0 / batchz); ManagedOps.Copy2D4DIJ(Layers[l].DeltaFeatureMap, FeatureMapDelta, i, j); } Layers[l].DeltaBias[j] = ManagedMatrix.Sum(dtemp) / batchz; } ManagedOps.Free(FeatureMapDelta, dtemp, atemp, ftemp); } } var FeatureVectorTransposed = new ManagedArray(FeatureVector, false); ManagedMatrix.Transpose(FeatureVectorTransposed, FeatureVector); ManagedOps.Free(WeightsDelta, BiasDelta); WeightsDelta = new ManagedArray(Weights, false); BiasDelta = new ManagedArray(Bias, false); ManagedMatrix.Multiply(WeightsDelta, OutputDelta, FeatureVectorTransposed); ManagedMatrix.Multiply(WeightsDelta, 1.0 / batchz); ManagedMatrix.Mean(BiasDelta, OutputDelta, 0); ManagedOps.Free(FeatureVectorTransposed); }