public void ComputeGradient() { Matrix.ValidateMatricesDims(NextLayer.Values.Extra, Values.Extra); for (int raw = 0; raw < Values.Rows; raw++) { for (int column = 0; column < Values.Columns; column++) { double x = Values.Primal[raw, column]; // Current value double dy = Values.Extra[raw, column]; // Current gradient double df = _activation.Gradient(NextLayer.Values.Primal[raw, column], dy); double dw = x * df; Biases.Extra[raw, column] = df; Weights.Extra[raw, column] = dw; if (PrevLayer != null) { PrevLayer.Values.Extra[raw, column] += df * Weights.Primal[raw, column]; // Take the gradient in output unit and chain it with the local gradients . This will allow us to possibly use the output of one gate multiple times (think of it as a wire branching out), since it turns out that the gradients from these different branches just add up when computing the final gradient with respect to the circuit output. } } } #if DEBUG if (GeneralSettings.GradientsTracingEnabled) { Trace.Write("Affine layer: " + GetHashCode() + ". Values grad:"); Trace.Write(Values.Extra.ToString()); Trace.Write("Bias grad: " + Biases.Extra); Trace.WriteLine("Weights grad: " + Weights.Extra); } #endif }