public override void Backward(Tensor <Type> delta, Backpropagation bp) { argmax = argmax ?? Op.Argmax(x, Axis, keepDims: true); var deltaX = UnArgmax <Type> .Create(delta, argmax, Axis, x.Shape); bp.PushGradientTo(x, deltaX); }
public ReinforceMul(int N = 7) { this.N = N; int dim0 = N, dim1 = N * N, dim2 = N; var a = Op.Scalar <int>("a"); var b = Op.Scalar <int>("b"); var c_gold = Op.Scalar <int>("c_gold"); var L = Op.Shared(NN.Random.Uniform(-1f, 1f, N, dim0), "L"); var xa = Blocks.Linear("Wa", L[a], dim1); var xb = Blocks.Linear("Wb", L[b], dim1); var xab = Blocks.Linear("Wab", L[a] * L[b], dim1); var x = Blocks.Linear("Wc", Op.Tanh(xa + xb + xab), dim2); var y = Op.Softmax(x); var c = Operators.Scalars.ReinforceCategorical.Create(y, "baseline"); c.Name = nameof(c); var eq = Op.Eq(c_gold, c); c.Reward = eq.As <float>(); c.Reward.Name = "reward"; this.baseline = c.Baseline; var loss = -c.As <float>(); var weights = loss.FindAll <Tensor <float> .Shared>(); foreach (var W in weights) { loss += 0.001f * Op.Norm2(W); } loss.Name = nameof(loss); var grad = Op.Grad(loss); // reward isn't differentiable but Reinforce will still backpropagate gradients var lr = Op.Scalar <float>("lr"); var updates = new OrderedDictionary(); foreach (var W in grad.Keys) { updates[W] = W - lr * grad[W]; } //var dB = (Scalar<float>)loss.Backpropagation.ScalarDerivatives[c.Baseline]; //updates[c.Baseline] = c.Baseline - lr * dB; updates[c.Baseline] = c.Baseline * 0.9f + 0.1f * c.Reward; var train_ = Op.Function(new IVar[] { a, b, c_gold, lr }, c.Reward, updates); Train = lr_ => { var sample = NextSample(); return((int)train_(sample.Item1, sample.Item2, sample.Item3, lr_)); }; Mul = Op.Function(input: (a, b), output: Op.Argmax(x)); }