public float Backward(ClassOutput y) { var loss = (this.LossLayer as RegressionLayer).Backward(y); for (int i = Layers.Count - 1; i >= 0; i--) { Layers[i].Backward(); } return(loss); }
public double Backward(ClassOutput y) { var x = this.in_Act; x.DW = new double[x.W.Length]; // zero out the gradient of input Vol var loss = 0.0; var i = y.dim; var yi = y.val; var dy = x.W[i] - yi; x.DW[i] = dy; loss += 0.5 * dy * dy; return(loss); }
public float Backward(ClassOutput y) { var x = this.input; x.DW = new float[x.W.Length]; // zero out the gradient of input Vol var loss = 0.0f; var i = y.dim; var yi = y.val; var dy = x.W[i] - yi; x.DW[i] = dy; loss += 0.5f * dy * dy; return(loss); }
public TrainingResult Train(Vol x, ClassOutput y) { if (Net.LossLayer is RegressionLayer) { this.Regression = true; } else { this.Regression = false; } this.Net.Forward(x, true); var cost_loss = this.Net.Backward(y); var l2_decay_loss = 0.0; var l1_decay_loss = 0.0; this.k++; if (this.k % this.batch_size == 0) { var pglist = this.Net.GetParamsAndGrads(); this.gsum.Clear(); for (int i = 0; i < pglist.Count; i++) { this.gsum.Add(new double[pglist[i].Params.Length]); } if (this.gsum.Count == 0 && (this.method != TrainingMethod.sgd || this.momentum > 0.0)) { for (int i = 0; i < pglist.Count; i++) { if (this.method == TrainingMethod.adam || this.method == TrainingMethod.adadelta) { this.xsum.Add(new double[pglist[i].Params.Length]); } } } for (int i = 0; i < pglist.Count; i++) { var pg = pglist[i]; var p = pg.Params; var g = pg.Grads; var l2_decay_mul = pg.l2_decay_mul; var l1_decay_mul = pg.l1_decay_mul; var l2_decay = this.l2_decay * l2_decay_mul; var l1_decay = this.l1_decay * l2_decay_mul; var plen = p.Length; for (int j = 0; j < plen; j++) { l2_decay_loss += l2_decay * p[j] * p[j] / 2; l1_decay_loss += l1_decay * Math.Abs(p[j]); var l1grad = l1_decay * (p[j] > 0 ? 1 : -1); var l2grad = l2_decay * (p[j]); var gij = (l2grad + l1grad + g[j]) / this.batch_size; var gsumi = this.gsum[i]; if (this.method == TrainingMethod.adam) { var xsumi = this.xsum[i]; gsumi[j] = gsumi[j] * this.beta1 + (1 - this.beta1) * gij; xsumi[j] = xsumi[j] * this.beta2 + (1 - this.beta2) * gij * gij; // update biased second moment estimate var biasCorr1 = gsumi[j] * (1 - Math.Pow(this.beta1, this.k)); // correct bias first moment estimate var biasCorr2 = xsumi[j] * (1 - Math.Pow(this.beta2, this.k)); // correct bias second moment estimate var dx = -this.learning_rate * biasCorr1 / (Math.Sqrt(biasCorr2) + this.eps); p[j] += dx; } else if (this.method == TrainingMethod.adagrad) { // adagrad update gsumi[j] = gsumi[j] + gij * gij; var dx = -this.learning_rate / Math.Sqrt(gsumi[j] + this.eps) * gij; p[j] += dx; } else if (this.method == TrainingMethod.windowgrad) { // this is adagrad but with a moving window weighted average // so the gradient is not accumulated over the entire history of the run. // it's also referred to as Idea #1 in Zeiler paper on Adadelta. Seems reasonable to me! gsumi[j] = this.ro * gsumi[j] + (1 - this.ro) * gij * gij; var dx = -this.learning_rate / Math.Sqrt(gsumi[j] + this.eps) * gij; // eps added for better conditioning p[j] += dx; } else if (this.method == TrainingMethod.adadelta) { var xsumi = this.xsum[i]; gsumi[j] = this.ro * gsumi[j] + (1 - this.ro) * gij * gij; var dx = -Math.Sqrt((xsumi[j] + this.eps) / (gsumi[j] + this.eps)) * gij; xsumi[j] = this.ro * xsumi[j] + (1 - this.ro) * dx * dx; // yes, xsum lags behind gsum by 1. p[j] += dx; } else if (this.method == TrainingMethod.nesterov) { var dx = gsumi[j]; gsumi[j] = gsumi[j] * this.momentum + this.learning_rate * gij; dx = this.momentum * dx - (1.0 + this.momentum) * gsumi[j]; p[j] += dx; } else { // assume SGD if (this.momentum > 0.0) { // momentum update var dx = this.momentum * gsumi[j] - this.learning_rate * gij; // step gsumi[j] = dx; // back this up for next iteration of momentum p[j] += dx; // apply corrected gradient } else { // vanilla sgd p[j] += -this.learning_rate * gij; } } g[j] = 0.0; // zero out gradient so that we can begin accumulating anew } } } return(new TrainingResult() { l2_decay_loss = l2_decay_loss, l1_decay_loss = l1_decay_loss, cost_loss = cost_loss, softmax_loss = cost_loss, loss = cost_loss + l1_decay_loss + l2_decay_loss }); }
public void backward(double reward) { this.latest_reward = reward; this.average_reward_window.Add(reward); this.reward_window.RemoveAt(0); this.reward_window.Add(reward); if (!this.learning) { return; } // various book-keeping this.age += 1; // it is time t+1 and we have to store (s_t, a_t, r_t, s_{t+1}) as new experience // (given that an appropriate number of state measurements already exist, of course) if (this.forward_passes > this.temporal_window + 1) { var e = new Experience(); var n = this.window_size; e.state0 = this.net_window[n - 2]; e.action0 = this.action_window[n - 2]; e.reward0 = this.reward_window[n - 2]; e.state1 = this.net_window[n - 1]; if (this.experience.Count < this.experience_size) { this.experience.Add(e); } else { // replace. finite memory! var ri = Util.Randi(0, this.experience_size); this.experience[(int)ri] = e; } } // learn based on experience, once we have some samples to go on // this is where the magic happens... if (this.experience.Count > this.start_learn_threshold) { var avcost = 0.0; for (var k = 0; k < this.tdtrainer.batch_size; k++) { var re = Util.Randi(0, this.experience.Count); var e = this.experience[(int)re]; var x = new Vol(1, 1, this.net_inputs); x.W = e.state0; var maxact = this.policy(e.state1); var r = e.reward0 + this.gamma * maxact.value; ClassOutput ystruct = new ClassOutput() { dim = (int)e.action0, val = r }; var loss = this.tdtrainer.Train(x, ystruct); avcost += loss.loss; } avcost = avcost / this.tdtrainer.batch_size; this.average_loss_window.Add(avcost); } }