/// <summary> /// Internal function which will do the forward pass from the first layer to the last. /// Dotproducts and activations at each node will be computed and cached in the context object /// </summary> /// <param name="network">The MLP</param> /// <param name="ctx">The context wrapper over the training vector</param> internal static void DoForwardPass( MultilayerPerceptron network, VectorPropagationContext ctx) { for (int layerindex = 0; layerindex < network.Layers.Length; layerindex++) { Layer layerCurrent = network.Layers[layerindex]; Layer layerPrevious = null; int countOfNodes = layerCurrent.Nodes.Length; double[] incomingvalues = null;//Values coming into this layer if (layerindex == 0) { //We are on the first layer, so the inputs are the vector itself incomingvalues = ctx.Vector.Inputs; } else { //We are on hidden layers, so the inputs are the activations of the previous layer layerPrevious = network.Layers[layerindex - 1]; incomingvalues = layerPrevious.Nodes.Select(nd => ctx.NodeActivationCache[nd.GetID()]).ToArray(); } for (int nodeindex = 0; nodeindex < countOfNodes; nodeindex++) { Neuron node = layerCurrent.Nodes[nodeindex]; double dotproduct = 0; for (int i = 0; i < node.Weights.Length; i++) { dotproduct = dotproduct + incomingvalues[i] * node.Weights[i].Value; } dotproduct += node.Bias.Value; ctx.NodeDotProductsCache[node.GetID()] = dotproduct; double activation = ComputeActivation(layerCurrent, node, dotproduct); ctx.NodeActivationCache[node.GetID()] = activation; } } }
/// <summary> /// This function computes the output values at the output layer using the given input vector and trained network /// </summary> /// <param name="network"></param> /// <param name="vector"></param> /// <returns></returns> public static double[] ComputeNetworkOutput(MultilayerPerceptron network, Vector vector) { VectorPropagationContext vectorContext = new VectorPropagationContext(vector); DoForwardPass(network, vectorContext); Layer layerLast = network.Layers.Last(); double[] activationsFromLastLayer = layerLast. Nodes.Select(nd => vectorContext.NodeActivationCache[nd.GetID()]).ToArray(); return(activationsFromLastLayer); }
/// <summary> /// You have the deltas at every node. Now compute the wt updates. /// But, unlike SGD, hold on to the values. We will need to sum and the increments after the entire batch is done /// </summary> /// <param name="ctx"></param> private void ComputeWeightUpdates(VectorPropagationContext ctx) { for (int layerindex = 0; layerindex < this.Perceptron.Layers.Length; layerindex++) { Layer layerCurrent = this.Perceptron.Layers[layerindex]; Layer layerPrevious = null; int countOfNodes = layerCurrent.Nodes.Length; double[] incomingvalues = null;//Values coming into this layer if (layerindex == 0) { //We are on the first layer incomingvalues = ctx.Vector.Inputs; } else { //We are on intermediate layers layerPrevious = this.Perceptron.Layers[layerindex - 1]; incomingvalues = layerPrevious.Nodes.Select(nd => ctx.NodeActivationCache[nd.GetID()]).ToArray(); } for (int nodeindex = 0; nodeindex < countOfNodes; nodeindex++) { Neuron node = layerCurrent.Nodes[nodeindex]; double deltaAtNode = ctx.NodeDeltaCache[node.GetID()]; int noOfWts = node.Weights.Length; for (int wtindex = 0; wtindex < noOfWts; wtindex++) { Weight wt = node.Weights[wtindex]; double outputFromPreviousNodeThroughThisWeight = incomingvalues[wtindex]; double derivative = deltaAtNode * outputFromPreviousNodeThroughThisWeight; double wt_existing = wt.Value; double wt_increment = -this.LearningRate * derivative; //double wt_new = wt_existing + wt_increment; //wt.Value = wt_new; ctx.WeightUpdateCache[wt.GetID()] = wt_increment; } double bias_derivative = deltaAtNode * 1.0; double bias_increment = -this.LearningRate * bias_derivative; //node.Bias.Value = node.Bias.Value + bias_increment; ctx.WeightUpdateCache[node.Bias.GetID()] = bias_increment; } } }
public void Train() { _epochsElapsed = 0; for (int epochs = 0; epochs < this.MaxEpochs; epochs++) { _epochsElapsed++; List <VectorPropagationContext> wrappers = new List <VectorPropagationContext>(); foreach (Vector vec in this.Vectors) { VectorPropagationContext ctx = new VectorPropagationContext(vec); wrappers.Add(ctx); core.Utils.DoForwardPass(this.Perceptron, ctx); core.Utils.DoBackwardPassComputeDeltas(this.Perceptron, ctx); this.ComputeWeightUpdates(ctx); } bool cancel = NotifyProgress(wrappers); if (cancel) { return; } } }
private void DoForwardAndBackProp(VectorPropagationContext ctx) { core.Utils.DoForwardPass(this.Perceptron, ctx); core.Utils.DoBackwardPassComputeDeltas(this.Perceptron, ctx); this.ComputeWeightUpdates(ctx); }
/// <summary> /// Compute the deltas at each node /// /// </summary> /// <param name="perceptron"></param> /// <param name="ctx"></param> internal static void DoBackwardPassComputeDeltas(MultilayerPerceptron network, VectorPropagationContext ctx) { for (int layerindex = network.Layers.Length - 1; layerindex >= 0; layerindex--) { Layer layerCurrent = network.Layers[layerindex]; Layer layerAhead = null; int countOfNodes = layerCurrent.Nodes.Length; if (layerindex == network.Layers.Length - 1) { //We are on the last layer double[] errorsOutputLayer = new double[countOfNodes]; ctx.Outputs = new double[countOfNodes]; for (int nodeindex = 0; nodeindex < countOfNodes; nodeindex++) { Neuron nodeCurrent = layerCurrent.Nodes[nodeindex]; double outputExpected = ctx.Vector.Outputs[nodeindex]; double outputActual = ctx.NodeActivationCache[nodeCurrent.GetID()]; ctx.Outputs[nodeindex] = outputActual; double errorAtNode = outputExpected - outputActual; //ctx.NodeActivationCache[node.GetID()] = activation; errorsOutputLayer[nodeindex] = errorAtNode; double dotproduct = ctx.NodeDotProductsCache[nodeCurrent.GetID()]; double derivative = ComputeDerivativeOfActivation(layerCurrent, nodeCurrent, dotproduct, outputActual); double deltaNode = -errorAtNode * derivative; ctx.NodeDeltaCache[nodeCurrent.GetID()] = deltaNode; } double mse = 0.5 * errorsOutputLayer.Select(e => e * e).Sum(); ctx.MeanSquaredError = mse; } else { //We are on the intermediate layers layerAhead = network.Layers[layerindex + 1]; int countOfNodesAhead = layerAhead.Nodes.Length; for (int nodeindex = 0; nodeindex < countOfNodes; nodeindex++) { Neuron nodeCurrent = layerCurrent.Nodes[nodeindex]; double activation = ctx.NodeActivationCache[nodeCurrent.GetID()]; double dotproduct = ctx.NodeDotProductsCache[nodeCurrent.GetID()]; double summationOfDeltas = 0.0; //For every node ahead, sum up the weight and delta of that node for (int nodeindex_ahead = 0; nodeindex_ahead < countOfNodesAhead; nodeindex_ahead++) { Neuron nodeAhead = layerAhead.Nodes[nodeindex_ahead]; double wt_from_layer_current_to_ahead = nodeAhead.Weights[nodeindex].Value; double deltaNodeAhead = ctx.NodeDeltaCache[nodeAhead.GetID()]; summationOfDeltas += wt_from_layer_current_to_ahead * deltaNodeAhead; } double derivative = ComputeDerivativeOfActivation(layerCurrent, nodeCurrent, dotproduct, activation); ctx.NodeDeltaCache[nodeCurrent.GetID()] = summationOfDeltas * derivative; } } } }