/// <summary> /// Adjusts the current parameters with specified gradient and learning rate. /// </summary> /// <param name="node">The node.</param> /// <param name="bDelta">The bias delta values.</param> /// <param name="wDelta">The weight delta values.</param> /// <param name="learningRate">The learning rate.</param> public override void AdjustParameters(Core.Data.LinkedListNode <NnLayer> node, double[][] bDelta, double[][] wDelta, double learningRate) { const int rowIndex = 0; for (var i = 0; i < _biasVelocities[node.Depth][0].Length; i++) { _biasVelocities[node.Depth][rowIndex][i] = _momentum * _biasVelocities[node.Depth][rowIndex][i] + bDelta[rowIndex][i]; } for (var i = 0; i < _weightVelocities[node.Depth].Length; i++) { for (var j = 0; j < _weightVelocities[node.Depth][0].Length; j++) { _weightVelocities[node.Depth][i][j] = _momentum * _weightVelocities[node.Depth][i][j] + wDelta[i][j]; } } base.AdjustParameters(node, _biasVelocities[node.Depth], _weightVelocities[node.Depth], learningRate); }
/// <summary> /// Peforms the backpropagatin algorithm. /// </summary> /// <param name="predictions">The predictions.</param> /// <param name="oneHots">The one hot values.</param> public virtual void Backpropagate(double[][] predictions, int[] oneHots) { ProcessCaches(oneHots); double[][] gradients = CalculateOutputDerivative(predictions, oneHots); for (var i = 0; i < predictions.Length; i++) { double[][] currentGradient = gradients[i].AsMatrix(); Core.Data.LinkedListNode <NnLayer> currentLayer = _neuralNetwork.Layers.Last; while (currentLayer != null) { double[][] delta = currentGradient; if (PipelineSettings.Instance.CanPerformDropout && currentLayer.Next != null) { delta = delta.DotProduct(PipelineSettings.Instance.DropoutVectors[currentLayer.Depth]); } // We don't multiply with the weighed sum derivative for the last layer in the network. if (currentLayer.Depth < _neuralNetwork.Layers.Count - 1) { delta = delta.HadamardProduct(_weightedSumDerivatives[currentLayer.Depth][i].AsMatrix()); } double[][] activation = _activations[currentLayer.Depth][i].AsMatrix(); double[][] wDelta = activation.Transpose().Multiply(delta); AdjustParameters(currentLayer, delta, wDelta, _neuralNetwork.LearningRate); currentGradient = currentLayer.Value.BackpropagateError(delta); currentLayer = currentLayer.Previous; } } }
/// <summary> /// Calculates the weighted sum derivatives and activation caches in advance. /// </summary> /// <param name="oneHots">The one hot values.</param> private void ProcessCaches(int[] oneHots) { var wSumDerivs = new List <double[][]>(); var activationCaches = new List <double[][]>(); Core.Data.LinkedListNode <NnLayer> currentLayer = _neuralNetwork.Layers.First; while (currentLayer != null) { double[][] wSumDeriv = WeightedSumDerivative(currentLayer.Value.ActivationFunction, currentLayer.Depth, oneHots); wSumDerivs.Add(wSumDeriv); double[][] activationCache = _neuralNetwork.ActivationCache[currentLayer.Depth]; activationCaches.Add(activationCache); currentLayer = currentLayer.Next; } _weightedSumDerivatives = wSumDerivs; _activations = activationCaches; }
/// <summary> /// Adjusts the current parameters with specified gradient and learning rate. /// </summary> /// <param name="node">The node.</param> /// <param name="bDelta">The bias delta values.</param> /// <param name="wDelta">The weight delta values.</param> /// <param name="learningRate">The learning rate.</param> public virtual void AdjustParameters(Core.Data.LinkedListNode <NnLayer> node, double[][] bDelta, double[][] wDelta, double learningRate) { node.Value.BiasVector.AdjustValue(bDelta, learningRate); node.Value.WeightMatrix.AdjustValue(wDelta, learningRate); }