示例#1
0
        public void Optimize(TensorOld target, TensorOld gradient)
        {
            if (!last.ContainsKey(gradient))
            {
                last[gradient] = gradient.GetSameShape();
                TensorOld.Apply(gradient, last[gradient], g => LearningRate * g);
                target.Minus(last[gradient]);
                return;
            }

            var prev = last[gradient];

            TensorOld.Apply(prev, gradient, prev, (p, g) => g * LearningRate - p * Moment);
            target.Minus(prev);
        }
示例#2
0
        public void Optimize(TensorOld target, TensorOld gradient)
        {
            if (!dict.ContainsKey(gradient))
            {
                dict[gradient] = new AdamCache(gradient.Shape);
            }

            var c = dict[gradient];

            TensorOld.Apply(c.M, gradient, c.M, (m, g) => Beta1 * m + (1 - Beta1) * g);
            TensorOld.Apply(c.V, gradient, c.V, (v, g) => Beta2 * v + (1 - Beta2) * g * g);
            TensorOld.Apply(c.M, c.V, c.T, (m, v) => Alpha * m / (Math.Sqrt(v) + E));
            target.Minus(c.T);
        }