Ejemplo n.º 1
0
        public override double[][][][] BackwardPropagation(double[][][][] input, int startIndex = 0, int endIndex = 0)
        {
            // set indexes for propagating batch
            int numSamples = 0;

            if (startIndex == DEFAULT_INDEX && endIndex == startIndex)
            {
                startIndex = 0;
                endIndex   = numSamples;
            }
            else
            {
                numSamples = endIndex - startIndex;
            }

            // softmax has prettier derivation so we need one step less
            if (activationFunction.GetType() == typeof(Softmax))
            {
                return(input);
            }

            // value activations is overriden so be careful about it
            double[][][][] currGrad = MatOp.Cwise(input, activationFunction.Derivate(activations));

            return(currGrad);
        }
Ejemplo n.º 2
0
        public override void UpdateWeights(ILearnable learnableLayer, int iteration = 1)
        {
            // update all weights by stochastic gradient descent
            double[][][][] weights  = learnableLayer.Weights;
            double[][][][] dWeights = learnableLayer.Dweights;

            int lx0 = weights.Length;
            int lx1 = weights[0].Length;
            int lx2 = weights[0][0].Length;
            int lx3 = weights[0][0][0].Length;

            int index = ((AbstractLayer)learnableLayer).Index;

            // if it is first iteration we will init fields
            if (!m.ContainsKey(index))
            {
                m.Add(index, Utils.Init4dArr(lx0, lx1, lx2, lx3));
            }
            if (!r.ContainsKey(index))
            {
                r.Add(index, Utils.Init4dArr(lx0, lx1, lx2, lx3));
            }

            m[index] = MatOp.Add(MatOp.MultiplyByConst(dWeights, 1d - beta1), MatOp.MultiplyByConst(m[index], beta1));
            r[index] = MatOp.Add(MatOp.MultiplyByConst(MatOp.Cwise(dWeights, dWeights), 1d - beta2), MatOp.MultiplyByConst(r[index], beta2));

            double[][][][] mExt = MatOp.DivideByConst(m[index], 1d - Math.Pow(beta1, iteration));
            double[][][][] rExt = MatOp.DivideByConst(r[index], 1d - Math.Pow(beta2, iteration));

            var a = MatOp.MultiplyByConst(mExt, learningRate);
            var b = MatOp.AddConst(MatOp.Sqrt(rExt), epsilon);
            var c = MatOp.Mwise(a, b);

            learnableLayer.Weights = MatOp.Substract(weights, c);

            // update biases
            double[] biases  = learnableLayer.Biases;
            double[] dBiases = learnableLayer.Dbiases;

            for (int i = 0; i < biases.Length; i++)
            {
                biases[i] -= learningRate * dBiases[i];
            }
        }