public Tensor4 CalcDelts(ILayer lastLayer, ILayer nextLayer) { this.lastLayer = lastLayer; this.nextLayer = nextLayer; delts = nextLayer.Backward(); return(delts); }
/// <summary> /// 利用反向传播(链式法则) 求导 /// </summary> /// <param name="x"></param> /// <param name="t"></param> /// <returns></returns> public Matrix[] Gradient(Matrix x, Matrix t) { losses.Add(Loss(x, t)); ///反向传播求导 Matrix dout = softmaxWithLoss.Backward(null); dout = dropout02.Backward(dout); dout = affineLayer02.Backward(dout); dout = (layers["ReLU1"] as ReLU).Backward(dout); dout = dropout01.Backward(dout); dout = (affineLayer01).Backward(dout); grads[0] = affineLayer01.dw; //(CommonFunctions.Gradient((Matrix) => Loss(x, t), Params[0])); grads[1] = affineLayer01.db; // (CommonFunctions.Gradient((Matrix) => Loss(x, t), Params[1])); grads[2] = affineLayer02.dw; // (CommonFunctions.Gradient((Matrix) => Loss(x, t), Params[2])); grads[3] = affineLayer02.db; // (CommonFunctions.Gradient((Matrix) => Loss(x, t), Params[3])); return(grads); }
public static void CheckLayer(ILayer layer, int fanInWidth, int fanInHeight, int fanInDepth, int batchSize, float epsilon, Random random) { var accuracyCondition = 1e-2; layer.Initialize(fanInWidth, fanInHeight, fanInDepth, batchSize, Initialization.GlorotUniform, random); var fanIn = fanInWidth * fanInHeight * fanInDepth; var fanOut = layer.Width * layer.Height * layer.Depth; // Forward pass - set input activation in layer var input = Matrix <float> .Build.Random(batchSize, fanIn, random.Next()); layer.Forward(input); // Set delta to 1 var delta = Matrix <float> .Build.Dense(batchSize, fanOut, 1.0f); // Backward pass to calculate gradients layer.Backward(delta); // get weights and gradients var parametersAndGradients = new List <ParametersAndGradients>(); layer.AddParameresAndGradients(parametersAndGradients); foreach (var parameterAndGradient in parametersAndGradients) { var gradients = parameterAndGradient.Gradients; var parameters = parameterAndGradient.Parameters; var output1 = Matrix <float> .Build.Dense(batchSize, fanOut); var output2 = Matrix <float> .Build.Dense(batchSize, fanOut); for (int i = 0; i < parameters.Length; i++) { output1.Clear(); output2.Clear(); var oldValue = parameters[i]; parameters[i] = oldValue + epsilon; layer.Forward(input).CopyTo(output1); parameters[i] = oldValue - epsilon; layer.Forward(input).CopyTo(output2); parameters[i] = oldValue; output1.Subtract(output2, output1); // output1 = output1 - output2 var grad = output1.ToRowMajorArray().Select(f => f / (2.0f * epsilon)); var gradient = grad.Sum(); // approximated gradient var actual = gradients[i]; Assert.AreEqual(gradient, actual, accuracyCondition); } } }
public Tensor4 CalcDelts(ILayer lastLayer, ILayer nextLayer) { this.lastLayer = lastLayer; this.nextLayer = nextLayer; var a = nextLayer.Backward(); var b = CalcDerivs(); delts = new Tensor4(b.dhw, 1, 1, a.bs); for (int d = 0; d < b.bs; d++) { for (int i = 0; i < b.dhw; i++) { delts[d, 0, 0, i] = a[d, 0, 0, i] * b[d, 0, 0, i]; } } //delts = nextLayer.Backward() * CalcDerivs(); return(delts); }
public void Train(Vol x, object y) { net.Forward(x, true); var cost_loss = net.Backward(y); //Debug.Log ("loss:" + cost_loss); var l2_decay_loss = 0.0f; var l1_decay_loss = 0.0f; List <ParamsAndGrads> pglist = new List <ParamsAndGrads> (); net.GetParamsAndGrads(pglist); for (var i = 0; i < pglist.Count; i++) { var pg = pglist[i]; var p = pg.param; var g = pg.grads; var l2_decay_mul = pg.l2_decay_mul; var l1_decay_mul = pg.l1_decay_mul; var l2_decay = this.l2_decay * l2_decay_mul; var l1_decay = this.l1_decay * l1_decay_mul; var plen = p.Length; for (var j = 0; j < plen; j++) { l2_decay_loss += l2_decay * p[j] * p[j] / 2; l1_decay_loss += l1_decay * Math.Abs(p[j]); var l1grad = l1_decay * (p[j] > 0 ? 1 : -1); var l2grad = l2_decay * (p[j]); var gij = (l2grad + l1grad + g [j]); //p[j] += - this.learning_rate * gij; p[j] += -this.learning_rate * g[j]; // 記得要歸0 g[j] = 0.0f; } } }