public static void TestAttentionReduce() { var n = 3; var b = 4; var d = 5; var statesData = new double[n, b, d]; UniformRandomArray(statesData); var softmaxData = new double[n, b]; UniformRandomArray(softmaxData); var softmax = Variable <double>(PartialShape.Create(-1, b)); var states = Variable <double>(PartialShape.Create(-1, b, d)); var reduce = new AttentionReduce <double>(softmax, states); var ctx = Context.GpuContext(0); var exe = new Executor(ctx, reduce.Output) { AssignAllGradient = true }; exe.Initalize(); var dOutputData = new double[b, d]; UniformRandomArray(dOutputData); exe.AssignTensor(softmax, softmaxData.AsTensor()); exe.AssignTensor(states, statesData.AsTensor()); exe.Forward(); exe.AssignGradient(reduce.Output, dOutputData.AsTensor(), replace: true); exe.Backward(); var dSoftmax = exe.GetGradient(reduce.Softmax); var dStates = exe.GetGradient(reduce.States); var bump = 1e-6; var dSoftmaxFd = GradientChecker.FiniteDifferenceGradient(exe, softmax, bump: bump); AreClose(dSoftmaxFd.ToArray2D(), dSoftmax.ToArray2D(), 1e-7); var dStatesFd = GradientChecker.FiniteDifferenceGradient(exe, states, bump: bump); AreClose(dStatesFd.ToArray3D(), dStates.ToArray3D(), 1e-7); //var dVectorsFdArray = dVectorsFd.Reshape(-1).ToArray(); //var dVectorsBackpropArray = dStates.Reshape(-1).ToArray(); //var err = MaxAbsDiff(dVectorsFdArray, dVectorsBackpropArray); }
public override void Forward(Executor executor) { var states = executor.GetTensor(States); var softmax = executor.GetTensor(Softmax); var n = states.Shape[0]; var b = states.Shape[1]; var d = states.Shape[2]; var prod = softmax.Reshape(n, b, 1) * states; // currently reduce sum only works up to 2d tensor // then we do a reduce to make it an 2d tensor // after reduce, we reshape it back. var reduce = ReduceSum(prod.Reshape(n, b * d), 0).Reshape(b, d); executor.AssignTensor(Output, reduce); }
public static void LoopStyle() { var inputVar = Variable <double>(); var statesVar = Variable <double>(); var weightVar = Variable <double>(); var loop = new LoopDemo(inputVar, statesVar, weightVar); var outputVar = loop.Output; // create executor var ctx = Context.GpuContext(0); var exe = new Executor(ctx, outputVar) { AssignAllGradient = true }; exe.Initalize(); // fake forward data const int steps = 4; const int n = 5; var input = new double[n, n]; var states = new double[steps, n, n]; var weight = new double[n, n]; var rng = new Random(42); UniformRandomArray(input, rng); UniformRandomArray(states, rng); UniformRandomArray(weight, rng); exe.AssignTensor(inputVar, input.AsTensor()); exe.AssignTensor(statesVar, states.AsTensor()); exe.AssignTensor(weightVar, weight.AsTensor()); // run forward exe.Forward(); var outputTensor = exe.GetTensor(outputVar); outputTensor.Print(); // fake backward data var dOutput = new double[n, n]; UniformRandomArray(dOutput, rng); exe.AssignGradient(outputVar, dOutput.AsTensor(), replace: true); // run backward exe.Backward(); // verify gradients var bump = 1e-7; var dInputTensor = exe.GetGradient(inputVar); var dInputTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, inputVar, bump: bump); //dInputTensor.Print(); //dInputTensor_FD.Print(); AreClose(dInputTensor_FD.ToArray2D(), dInputTensor.ToArray2D(), 1e-7); var dStatesTensor = exe.GetGradient(statesVar); var dStatesTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, statesVar, bump: bump); //dStatesTensor.Reshape(steps, -1).Print(); //dStatesTensor_FD.Reshape(steps, -1).Print(); AreClose(dStatesTensor_FD.ToArray3D(), dStatesTensor.ToArray3D(), 1e-7); var dWeightTensor = exe.GetGradient(weightVar); var dWeightTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, weightVar, bump: bump); //dWeightTensor.Print(); //dWeightTensor_FD.Print(); AreClose(dWeightTensor_FD.ToArray2D(), dWeightTensor.ToArray2D(), 1e-3); }
public static void UnrollingStyle() { // create unrolling graph const int steps = 4; var inputVar = Variable <double>(); var stateVars = Enumerable.Range(0, steps).Select(_ => Variable <double>()).ToArray(); var weightVar = Variable <double>(); var outputVar = CreateUnrollingGraph(inputVar, stateVars, weightVar); // create executor var ctx = Context.GpuContext(0); var exe = new Executor(ctx, outputVar) { AssignAllGradient = true }; exe.Initalize(); // fake forward data const int n = 5; var input = new double[n, n]; var states = Enumerable.Range(0, steps).Select(_ => new double[n, n]).ToArray(); var weight = new double[n, n]; var rng = new Random(42); UniformRandomArray(input, rng); foreach (var state in states) { UniformRandomArray(state, rng); } UniformRandomArray(weight, rng); exe.AssignTensor(inputVar, input.AsTensor()); for (var i = 0; i < steps; ++i) { exe.AssignTensor(stateVars[i], states[i].AsTensor()); } exe.AssignTensor(weightVar, weight.AsTensor()); // run forward exe.Forward(); var outputTensor = exe.GetTensor(outputVar); outputTensor.Print(); // fake backward data var dOutput = new double[n, n]; UniformRandomArray(dOutput, rng); exe.AssignGradient(outputVar, dOutput.AsTensor(), replace: true); // run backward exe.Backward(); // verify gradients var bump = 1e-7; var dInputTensor = exe.GetGradient(inputVar); var dInputTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, inputVar, bump: bump); //dInputTensor.Print(); //dInputTensor_FD.Print(); AreClose(dInputTensor_FD.ToArray2D(), dInputTensor.ToArray2D(), 1e-7); for (var i = 0; i < steps; ++i) { var stateVar = stateVars[i]; var dStateTensor = exe.GetGradient(stateVar); var dStateTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, stateVar, bump: bump); //dStateTensor.Print(); //dStateTensor_FD.Print(); AreClose(dStateTensor_FD.ToArray2D(), dStateTensor.ToArray2D(), 1e-7); } var dWeightTensor = exe.GetGradient(weightVar); var dWeightTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, weightVar, bump: bump); //dWeightTensor.Print(); //dWeightTensor_FD.Print(); AreClose(dWeightTensor_FD.ToArray2D(), dWeightTensor.ToArray2D(), 1e-3); }
public static void TestAttention() { //var batch = 4; //var encoderHiddenSize = 5; //var decoderHiddenSize = 4; //var attentionDim = 3; var batch = 10; var encoderHiddenSize = 20; var decoderHiddenSize = 25; var attentionDim = 30; // (encoderSeqLength, batch, encoderHiddenSize) var encoderHiddenStates = Variable <double>(PartialShape.Create(-1, batch, encoderHiddenSize)); var decoderHiddenStates = Variable <double>(PartialShape.Create(batch, decoderHiddenSize)); var attention = new Attention <double>(encoderHiddenStates, decoderHiddenStates, attentionDim); var ctx = Context.GpuContext(0); var exe = new Executor(ctx, attention.Output) { AssignAllGradient = true }; exe.Initalize(); // encoderSeqLength is flexibly at runtime var encoderSeqLength = 3; var dataEncoderHiddenStates = new double[encoderSeqLength, batch, encoderHiddenSize]; UniformRandomArray(dataEncoderHiddenStates); var dataDecoderHiddenStates = new double[batch, decoderHiddenSize]; UniformRandomArray(dataDecoderHiddenStates); exe.AssignTensor(encoderHiddenStates, dataEncoderHiddenStates.AsTensor()); exe.AssignTensor(decoderHiddenStates, dataDecoderHiddenStates.AsTensor()); exe.Forward(); var tensorOutput = exe.GetTensor(attention.Output); //Console.WriteLine(tensorOutput.Shape); //tensorOutput.Print(); var dataDOutput = new double[batch, encoderHiddenSize]; UniformRandomArray(dataDOutput); exe.AssignGradient(attention.Output, dataDOutput.AsTensor(), replace: true); exe.Backward(); var tensorDWh = exe.GetGradient(attention.Wh); //tensorDWh.Print(); var tensorDWd = exe.GetGradient(attention.Wd); //tensorDWd.Print(); var tensorDH = exe.GetGradient(attention.EncoderHiddenStates); //Console.WriteLine(tensorDH.Shape); //tensorDH.Reshape(-1, encoderHiddenSize).Print(); var tensorDD = exe.GetGradient(attention.DecoderHiddenStates); //Console.WriteLine(tensorDD.Shape); //tensorDD.Print(); var bump = 1e-7; var tensorDWh_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.Wh, bump: bump); //tensorDWh.Print(); //tensorDWh_fd.Print(); AreClose(tensorDWh.ToArray2D(), tensorDWh_fd.ToArray2D(), 1e-7); var tensorDWd_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.Wd, bump: bump); //tensorDWd.Print(); //tensorDWd_fd.Print(); AreClose(tensorDWd.ToArray2D(), tensorDWd_fd.ToArray2D(), 1e-7); var tensorDH_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.EncoderHiddenStates, bump: bump); //tensorDH.Reshape(-1, encoderHiddenSize).Print(); //tensorDH_fd.Reshape(-1, encoderHiddenSize).Print(); AreClose(tensorDH.ToArray3D(), tensorDH_fd.ToArray3D(), 1e-7); var tensorDD_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.DecoderHiddenStates, bump: bump); //tensorDD.Print(); //tensorDD_fd.Print(); AreClose(tensorDD.ToArray2D(), tensorDD_fd.ToArray2D(), 1e-7); }