public override void Backward(Scalar <int> delta, Backpropagation bp) { if (DistributionGradient == null) { if (Reward == null) { throw new Exception($"No Reward was provided for ReinforceCategorical. Can't Backward"); } DistributionGradient = delta.As <float>() * (Reward - Baseline) / Distribution.Item[this] * Op.OneHot <float>(Distribution.Shape, new[] { this }, 1f); } bp.PushGradientTo(target: Distribution, delta: DistributionGradient); bp.PushGradientTo(target: Baseline, delta: (Baseline - Reward)); }
public void As_SIUnitSystem_ThrowsArgumentExceptionIfNotSupported() { var quantity = new Scalar(value: 1, unit: Scalar.BaseUnit); Func <object> AsWithSIUnitSystem = () => quantity.As(UnitSystem.SI); if (SupportsSIUnitSystem) { var value = (double)AsWithSIUnitSystem(); Assert.Equal(1, value); } else { Assert.Throws <ArgumentException>(AsWithSIUnitSystem); } }
public override void Backward(Scalar <T> delta, Backpropagation bp) => bp.PushGradientTo(x, delta.As <U>());