public override double[][][][] BackwardPropagation(double[][][][] input, int startIndex = 0, int endIndex = 0) { // set indexes for propagating batch int numSamples = 0; if (startIndex == DEFAULT_INDEX && endIndex == startIndex) { startIndex = 0; endIndex = numSamples; } else { numSamples = endIndex - startIndex; } // softmax has prettier derivation so we need one step less if (activationFunction.GetType() == typeof(Softmax)) { return(input); } // value activations is overriden so be careful about it double[][][][] currGrad = MatOp.Cwise(input, activationFunction.Derivate(activations)); return(currGrad); }
public override void UpdateWeights(ILearnable learnableLayer, int iteration = 1) { // update all weights by stochastic gradient descent double[][][][] weights = learnableLayer.Weights; double[][][][] dWeights = learnableLayer.Dweights; int lx0 = weights.Length; int lx1 = weights[0].Length; int lx2 = weights[0][0].Length; int lx3 = weights[0][0][0].Length; int index = ((AbstractLayer)learnableLayer).Index; // if it is first iteration we will init fields if (!m.ContainsKey(index)) { m.Add(index, Utils.Init4dArr(lx0, lx1, lx2, lx3)); } if (!r.ContainsKey(index)) { r.Add(index, Utils.Init4dArr(lx0, lx1, lx2, lx3)); } m[index] = MatOp.Add(MatOp.MultiplyByConst(dWeights, 1d - beta1), MatOp.MultiplyByConst(m[index], beta1)); r[index] = MatOp.Add(MatOp.MultiplyByConst(MatOp.Cwise(dWeights, dWeights), 1d - beta2), MatOp.MultiplyByConst(r[index], beta2)); double[][][][] mExt = MatOp.DivideByConst(m[index], 1d - Math.Pow(beta1, iteration)); double[][][][] rExt = MatOp.DivideByConst(r[index], 1d - Math.Pow(beta2, iteration)); var a = MatOp.MultiplyByConst(mExt, learningRate); var b = MatOp.AddConst(MatOp.Sqrt(rExt), epsilon); var c = MatOp.Mwise(a, b); learnableLayer.Weights = MatOp.Substract(weights, c); // update biases double[] biases = learnableLayer.Biases; double[] dBiases = learnableLayer.Dbiases; for (int i = 0; i < biases.Length; i++) { biases[i] -= learningRate * dBiases[i]; } }