/// <summary> /// Returns analytic gradient /// </summary> /// <param name="layer_1"></param> /// <param name="layer_2"> set as Input_Layer to test layer_1 only </param> /// <param name="loss_layer"></param> /// <param name="I"></param> /// <param name="T"></param> /// <returns > Tuple of analytic gradients <dB, dW, dI> </returns> public static Tuple <Tensor, Tensor, Tensor> analytic_grad(Base_Layer layer_1, Base_Layer layer_2, Base_Layer loss_layer, Tensor I, Tensor T) { Tensor I_copy = Utils.copy(I); Tensor analytic_dI, analytic_dB, analytic_dW; loss_layer.loss(layer_2.forward(layer_1.forward(I_copy), true), T); analytic_dI = layer_1.backward(layer_2.backward(loss_layer.backward())); analytic_dB = layer_1.dB; analytic_dW = layer_1.dW; return(Tuple.Create(analytic_dB, analytic_dW, analytic_dI)); }
/// <summary> /// Returns numerical gradient /// </summary> /// <param name="test_layer_1"></param> /// <param name="test_layer_2"> set as Input_Layer to test layer_1 only </param> /// <param name="loss_layer"></param> /// <param name="I"></param> /// <param name="T"></param> /// <param name="h"></param> /// <returns> Tuple of analytic gradients <dB, dW, dI> </returns> public static Tuple <Tensor, Tensor, Tensor> numeric_grad(Base_Layer test_layer_1, Base_Layer test_layer_2, Base_Layer loss_layer, Tensor I, Tensor T, Double h = 0.00001) { Tensor I_copy; Tensor B = test_layer_1.B; Tensor W = test_layer_1.W; Tensor numeric_dB = new Tensor(B.dimensions, B.dim_1, B.dim_2, B.dim_3, B.dim_4); Tensor numeric_dW = new Tensor(W.dimensions, W.dim_1, W.dim_2, W.dim_3, W.dim_4); Tensor numeric_dI = new Tensor(I.dimensions, I.dim_1, I.dim_2, I.dim_3, I.dim_4); for (int i = 0; i < B.values.Length; i++) { I_copy = Utils.copy(I); test_layer_1.B.values[i] += h; Tensor L_up = loss_layer.loss(test_layer_2.forward(test_layer_1.forward(I_copy), true), T); I_copy = Utils.copy(I); test_layer_1.B.values[i] -= 2 * h; Tensor L_down = loss_layer.loss(test_layer_2.forward(test_layer_1.forward(I_copy), true), T); test_layer_1.B.values[i] += h; numeric_dB.values[i] = Utils.sum(Utils.subtract(L_up, L_down)) / (2 * h); } for (int i = 0; i < W.values.Length; i++) { I_copy = Utils.copy(I); test_layer_1.W.values[i] += h; Tensor L_up = loss_layer.loss(test_layer_2.forward(test_layer_1.forward(I_copy), true), T); I_copy = Utils.copy(I); test_layer_1.W.values[i] -= 2 * h; Tensor L_down = loss_layer.loss(test_layer_2.forward(test_layer_1.forward(I_copy), true), T); test_layer_1.W.values[i] += h; numeric_dW.values[i] = Utils.sum(Utils.subtract(L_up, L_down)) / (2 * h); } for (int i = 0; i < I.values.Length; i++) { Tensor I_up = Utils.copy(I); Tensor I_down = Utils.copy(I); I_up.values[i] += h; I_down.values[i] -= h; Tensor L_up = loss_layer.loss(test_layer_2.forward(test_layer_1.forward(I_up), true), T); Tensor L_down = loss_layer.loss(test_layer_2.forward(test_layer_1.forward(I_down), true), T); numeric_dI.values[i] = Utils.sum(Utils.subtract(L_up, L_down)) / (2 * h); } return(Tuple.Create(numeric_dB, numeric_dW, numeric_dI)); }