Ejemplo n.º 1
0
        public float Backward(ClassOutput y)
        {
            var loss = (this.LossLayer as RegressionLayer).Backward(y);

            for (int i = Layers.Count - 1; i >= 0; i--)
            {
                Layers[i].Backward();
            }

            return(loss);
        }
Ejemplo n.º 2
0
        public double Backward(ClassOutput y)
        {
            var x = this.in_Act;

            x.DW = new double[x.W.Length]; // zero out the gradient of input Vol
            var loss = 0.0;
            var i    = y.dim;
            var yi   = y.val;
            var dy   = x.W[i] - yi;

            x.DW[i] = dy;
            loss   += 0.5 * dy * dy;

            return(loss);
        }
Ejemplo n.º 3
0
        public float Backward(ClassOutput y)
        {
            var x = this.input;

            x.DW = new float[x.W.Length]; // zero out the gradient of input Vol
            var loss = 0.0f;
            var i    = y.dim;
            var yi   = y.val;
            var dy   = x.W[i] - yi;

            x.DW[i] = dy;
            loss   += 0.5f * dy * dy;

            return(loss);
        }
Ejemplo n.º 4
0
        public TrainingResult Train(Vol x, ClassOutput y)
        {
            if (Net.LossLayer is RegressionLayer)
            {
                this.Regression = true;
            }
            else
            {
                this.Regression = false;
            }
            this.Net.Forward(x, true);

            var cost_loss = this.Net.Backward(y);

            var l2_decay_loss = 0.0;
            var l1_decay_loss = 0.0;

            this.k++;

            if (this.k % this.batch_size == 0)
            {
                var pglist = this.Net.GetParamsAndGrads();
                this.gsum.Clear();
                for (int i = 0; i < pglist.Count; i++)
                {
                    this.gsum.Add(new double[pglist[i].Params.Length]);
                }
                if (this.gsum.Count == 0 && (this.method != TrainingMethod.sgd || this.momentum > 0.0))
                {
                    for (int i = 0; i < pglist.Count; i++)
                    {
                        if (this.method == TrainingMethod.adam || this.method == TrainingMethod.adadelta)
                        {
                            this.xsum.Add(new double[pglist[i].Params.Length]);
                        }
                    }
                }

                for (int i = 0; i < pglist.Count; i++)
                {
                    var pg = pglist[i];
                    var p  = pg.Params;
                    var g  = pg.Grads;

                    var l2_decay_mul = pg.l2_decay_mul;
                    var l1_decay_mul = pg.l1_decay_mul;
                    var l2_decay     = this.l2_decay * l2_decay_mul;
                    var l1_decay     = this.l1_decay * l2_decay_mul;

                    var plen = p.Length;
                    for (int j = 0; j < plen; j++)
                    {
                        l2_decay_loss += l2_decay * p[j] * p[j] / 2;
                        l1_decay_loss += l1_decay * Math.Abs(p[j]);
                        var l1grad = l1_decay * (p[j] > 0 ? 1 : -1);
                        var l2grad = l2_decay * (p[j]);
                        var gij    = (l2grad + l1grad + g[j]) / this.batch_size;

                        var gsumi = this.gsum[i];

                        if (this.method == TrainingMethod.adam)
                        {
                            var xsumi = this.xsum[i];
                            gsumi[j] = gsumi[j] * this.beta1 + (1 - this.beta1) * gij;
                            xsumi[j] = xsumi[j] * this.beta2 + (1 - this.beta2) * gij * gij; // update biased second moment estimate
                            var biasCorr1 = gsumi[j] * (1 - Math.Pow(this.beta1, this.k));   // correct bias first moment estimate
                            var biasCorr2 = xsumi[j] * (1 - Math.Pow(this.beta2, this.k));   // correct bias second moment estimate
                            var dx        = -this.learning_rate * biasCorr1 / (Math.Sqrt(biasCorr2) + this.eps);
                            p[j] += dx;
                        }
                        else if (this.method == TrainingMethod.adagrad)
                        {
                            // adagrad update
                            gsumi[j] = gsumi[j] + gij * gij;
                            var dx = -this.learning_rate / Math.Sqrt(gsumi[j] + this.eps) * gij;
                            p[j] += dx;
                        }
                        else if (this.method == TrainingMethod.windowgrad)
                        {
                            // this is adagrad but with a moving window weighted average
                            // so the gradient is not accumulated over the entire history of the run.
                            // it's also referred to as Idea #1 in Zeiler paper on Adadelta. Seems reasonable to me!
                            gsumi[j] = this.ro * gsumi[j] + (1 - this.ro) * gij * gij;
                            var dx = -this.learning_rate / Math.Sqrt(gsumi[j] + this.eps) * gij; // eps added for better conditioning
                            p[j] += dx;
                        }
                        else if (this.method == TrainingMethod.adadelta)
                        {
                            var xsumi = this.xsum[i];
                            gsumi[j] = this.ro * gsumi[j] + (1 - this.ro) * gij * gij;
                            var dx = -Math.Sqrt((xsumi[j] + this.eps) / (gsumi[j] + this.eps)) * gij;
                            xsumi[j] = this.ro * xsumi[j] + (1 - this.ro) * dx * dx; // yes, xsum lags behind gsum by 1.
                            p[j]    += dx;
                        }
                        else if (this.method == TrainingMethod.nesterov)
                        {
                            var dx = gsumi[j];
                            gsumi[j] = gsumi[j] * this.momentum + this.learning_rate * gij;
                            dx       = this.momentum * dx - (1.0 + this.momentum) * gsumi[j];
                            p[j]    += dx;
                        }
                        else
                        {
                            // assume SGD
                            if (this.momentum > 0.0)
                            {
                                // momentum update
                                var dx = this.momentum * gsumi[j] - this.learning_rate * gij; // step
                                gsumi[j] = dx;                                                // back this up for next iteration of momentum
                                p[j]    += dx;                                                // apply corrected gradient
                            }
                            else
                            {
                                // vanilla sgd
                                p[j] += -this.learning_rate * gij;
                            }
                        }
                        g[j] = 0.0; // zero out gradient so that we can begin accumulating anew
                    }
                }
            }
            return(new TrainingResult()
            {
                l2_decay_loss = l2_decay_loss, l1_decay_loss = l1_decay_loss,
                cost_loss = cost_loss, softmax_loss = cost_loss, loss =
                    cost_loss + l1_decay_loss + l2_decay_loss
            });
        }
Ejemplo n.º 5
0
        public void backward(double reward)
        {
            this.latest_reward = reward;
            this.average_reward_window.Add(reward);


            this.reward_window.RemoveAt(0);
            this.reward_window.Add(reward);

            if (!this.learning)
            {
                return;
            }

            // various book-keeping
            this.age += 1;

            // it is time t+1 and we have to store (s_t, a_t, r_t, s_{t+1}) as new experience
            // (given that an appropriate number of state measurements already exist, of course)
            if (this.forward_passes > this.temporal_window + 1)
            {
                var e = new Experience();
                var n = this.window_size;
                e.state0  = this.net_window[n - 2];
                e.action0 = this.action_window[n - 2];
                e.reward0 = this.reward_window[n - 2];
                e.state1  = this.net_window[n - 1];
                if (this.experience.Count < this.experience_size)
                {
                    this.experience.Add(e);
                }
                else
                {
                    // replace. finite memory!
                    var ri = Util.Randi(0, this.experience_size);
                    this.experience[(int)ri] = e;
                }
            }

            // learn based on experience, once we have some samples to go on
            // this is where the magic happens...
            if (this.experience.Count > this.start_learn_threshold)
            {
                var avcost = 0.0;
                for (var k = 0; k < this.tdtrainer.batch_size; k++)
                {
                    var re = Util.Randi(0, this.experience.Count);
                    var e  = this.experience[(int)re];
                    var x  = new Vol(1, 1, this.net_inputs);
                    x.W = e.state0;
                    var         maxact  = this.policy(e.state1);
                    var         r       = e.reward0 + this.gamma * maxact.value;
                    ClassOutput ystruct = new ClassOutput()
                    {
                        dim = (int)e.action0, val = r
                    };
                    var loss = this.tdtrainer.Train(x, ystruct);
                    avcost += loss.loss;
                }
                avcost = avcost / this.tdtrainer.batch_size;
                this.average_loss_window.Add(avcost);
            }
        }