Example #1
0
        public override void Backward(Tensor <Type> delta, Backpropagation bp)
        {
            argmax = argmax ?? Op.Argmax(x, Axis, keepDims: true);
            var deltaX = UnArgmax <Type> .Create(delta, argmax, Axis, x.Shape);

            bp.PushGradientTo(x, deltaX);
        }
Example #2
0
        public ReinforceMul(int N = 7)
        {
            this.N = N;
            int dim0 = N, dim1 = N * N, dim2 = N;

            var a      = Op.Scalar <int>("a");
            var b      = Op.Scalar <int>("b");
            var c_gold = Op.Scalar <int>("c_gold");
            var L      = Op.Shared(NN.Random.Uniform(-1f, 1f, N, dim0), "L");

            var xa  = Blocks.Linear("Wa", L[a], dim1);
            var xb  = Blocks.Linear("Wb", L[b], dim1);
            var xab = Blocks.Linear("Wab", L[a] * L[b], dim1);

            var x = Blocks.Linear("Wc", Op.Tanh(xa + xb + xab), dim2);
            var y = Op.Softmax(x);

            var c = Operators.Scalars.ReinforceCategorical.Create(y, "baseline");

            c.Name = nameof(c);
            var eq = Op.Eq(c_gold, c);

            c.Reward      = eq.As <float>();
            c.Reward.Name = "reward";
            this.baseline = c.Baseline;

            var loss    = -c.As <float>();
            var weights = loss.FindAll <Tensor <float> .Shared>();

            foreach (var W in weights)
            {
                loss += 0.001f * Op.Norm2(W);
            }
            loss.Name = nameof(loss);

            var grad = Op.Grad(loss); // reward isn't differentiable but Reinforce will still backpropagate gradients

            var lr      = Op.Scalar <float>("lr");
            var updates = new OrderedDictionary();

            foreach (var W in grad.Keys)
            {
                updates[W] = W - lr * grad[W];
            }

            //var dB = (Scalar<float>)loss.Backpropagation.ScalarDerivatives[c.Baseline];
            //updates[c.Baseline] = c.Baseline - lr * dB;
            updates[c.Baseline] = c.Baseline * 0.9f + 0.1f * c.Reward;

            var train_ = Op.Function(new IVar[] { a, b, c_gold, lr }, c.Reward, updates);

            Train = lr_ => {
                var sample = NextSample();
                return((int)train_(sample.Item1, sample.Item2, sample.Item3, lr_));
            };

            Mul = Op.Function(input: (a, b), output: Op.Argmax(x));
        }