public static void Gradient_Dot_GPU() { var rng = new Random(); var m = 10; var k = 5; var n = 3; var x = Variable <double>(); var y = Variable <double>(); var z = Dot(x, y); var ctx = gpu; var exe = new Executor(ctx, z) { AssignAllGradient = true }; //var l = 10; var hx = new double[m, k]; var hy = new double[k, n]; UniformRandomArray(hx, rng); UniformRandomArray(hy, rng); var hz = Dot(hx, hy); //for (var i = 0; i < l; ++i) hz[i] = hx[i] + hy[i]; //hx.AsTensor().Print(); //hy.AsTensor().Print(); exe.AssignTensor(x, hx.AsTensor()); exe.AssignTensor(y, hy.AsTensor()); exe.Forward(); var tz = exe.GetTensor(z); //tz.Print(); AreClose(hz, tz.ToArray2D(), 1e-10); var hdz = new double[m, n]; UniformRandomArray(hdz, rng); //hdz.AsTensor().Print(); exe.AssignGradient(z, hdz.AsTensor(), replace: true); exe.Backward(); var tdx = exe.GetGradient(x); var tdy = exe.GetGradient(y); tdx.Print(); tdy.Print(); var bump = 1e-6; var hdx = GradientChecker.FiniteDifferenceGradient(exe, x, bump: bump); var hdy = GradientChecker.FiniteDifferenceGradient(exe, y, bump: bump); hdx.Print(); hdy.Print(); AreClose(tdx.ToArray(), hdx.ToArray(), 1e-6); AreClose(tdy.ToArray(), hdy.ToArray(), 1e-6); }
public static void TestAttentionReduce() { var n = 3; var b = 4; var d = 5; var statesData = new double[n, b, d]; UniformRandomArray(statesData); var softmaxData = new double[n, b]; UniformRandomArray(softmaxData); var softmax = Variable <double>(PartialShape.Create(-1, b)); var states = Variable <double>(PartialShape.Create(-1, b, d)); var reduce = new AttentionReduce <double>(softmax, states); var ctx = Context.GpuContext(0); var exe = new Executor(ctx, reduce.Output) { AssignAllGradient = true }; exe.Initalize(); var dOutputData = new double[b, d]; UniformRandomArray(dOutputData); exe.AssignTensor(softmax, softmaxData.AsTensor()); exe.AssignTensor(states, statesData.AsTensor()); exe.Forward(); exe.AssignGradient(reduce.Output, dOutputData.AsTensor(), replace: true); exe.Backward(); var dSoftmax = exe.GetGradient(reduce.Softmax); var dStates = exe.GetGradient(reduce.States); var bump = 1e-6; var dSoftmaxFd = GradientChecker.FiniteDifferenceGradient(exe, softmax, bump: bump); AreClose(dSoftmaxFd.ToArray2D(), dSoftmax.ToArray2D(), 1e-7); var dStatesFd = GradientChecker.FiniteDifferenceGradient(exe, states, bump: bump); AreClose(dStatesFd.ToArray3D(), dStates.ToArray3D(), 1e-7); //var dVectorsFdArray = dVectorsFd.Reshape(-1).ToArray(); //var dVectorsBackpropArray = dStates.Reshape(-1).ToArray(); //var err = MaxAbsDiff(dVectorsFdArray, dVectorsBackpropArray); }
public static void Gradient_WeightedSumReduce_01_GPU() { var rng = new Random(42); var x = Variable <double>(); var w = Variable <double>(); var wsr = new WeightedSumReduce <double>(w, x); var y = wsr.Output; var ctx = gpu; var exe = new Executor(ctx, y) { AssignAllGradient = true }; var n = 5; var d = 3; var hx = new double[n, d]; var hw = new double[n, d]; UniformRandomArray(hx, rng); UniformRandomArray(hw, rng); var hy = new double[d]; for (var i = 0; i < d; ++i) { var acc = 0.0; for (var j = 0; j < n; ++j) { acc += hw[j, i] * hx[j, i]; } hy[i] = acc; } exe.AssignTensor(x, hx.AsTensor()); exe.AssignTensor(w, hw.AsTensor()); exe.Forward(); var ty = exe.GetTensor(y); ty.Print(); AreClose(hy, ty.ToArray(), 1e-10); var hdy = new double[d]; UniformRandomArray(hdy, rng); exe.AssignGradient(y, hdy.AsTensor(), replace: true); exe.Backward(); var tdx = exe.GetGradient(x); var tdw = exe.GetGradient(w); tdx.Print(); tdw.Print(); var bump = 1e-8; var hdx = GradientChecker.FiniteDifferenceGradient(exe, x, bump: bump); var hdw = GradientChecker.FiniteDifferenceGradient(exe, w, bump: bump); hdx.Print(); hdw.Print(); AreClose(hdx.ToArray2D(), tdx.ToArray2D(), 1e-7); AreClose(hdw.ToArray2D(), tdw.ToArray2D(), 1e-7); }
public static void Test() { // compile the graph on one context, then get the forward and backward computation delegate from the // returned tuple. var ctx = Context.GpuContext(0); var funcs = Compile <double, double, double, double>(ctx, Foo); var forward = funcs.Item1; var backward = funcs.Item2; // create host arrays var m = 100; var k = 90; var n = 80; var x = new double[m, k]; var w = new double[k, n]; var b = new double[n]; // randomly set the host arrays var rng = new Random(42); AleaTKUtil.Common.UniformRandomArray(x, rng); AleaTKUtil.Common.UniformRandomArray(w, rng); AleaTKUtil.Common.UniformRandomArray(b, rng); // you can calc the output var y = forward(x.AsTensor(), w.AsTensor(), b.AsTensor()); //y.Print(); // fake some gradient var dy = new double[m, n]; AleaTKUtil.Common.UniformRandomArray(dy, rng); // calc the gradients, they are in a tuple var gradients = backward(dy.AsTensor()); var dx = gradients.Item1; var dw = gradients.Item2; var db = gradients.Item3; // the following code is just to verify the gradients with finite difference. var varX = Variable <double>(); var varW = Variable <double>(); var varB = Variable <double>(); var varY = Foo(varX, varW, varB); var exe = new Executor(ctx, varY); exe.AssignTensor(varX, x.AsTensor()); exe.AssignTensor(varW, w.AsTensor()); exe.AssignTensor(varB, b.AsTensor()); exe.AssignGradient(varY, dy.AsTensor(), replace: true); var bump = 1e-7; var dx_fd = GradientChecker.FiniteDifferenceGradient(exe, varX, bump: bump); //dx.Print(); //dx_fd.Print(); AleaTKUtil.Common.AreClose(dx_fd.ToArray2D(), dx.ToArray2D(), 1e-6); var dw_fd = GradientChecker.FiniteDifferenceGradient(exe, varW, bump: bump); //dw.Print(); //dw_fd.Print(); AleaTKUtil.Common.AreClose(dw_fd.ToArray2D(), dw.ToArray2D(), 1e-6); var db_fd = GradientChecker.FiniteDifferenceGradient(exe, varB, bump: bump); //db.Print(); //db_fd.Print(); AleaTKUtil.Common.AreClose(db_fd.ToArray(), db.ToArray(), 1e-5); }
public static void LoopStyle() { var inputVar = Variable <double>(); var statesVar = Variable <double>(); var weightVar = Variable <double>(); var loop = new LoopDemo(inputVar, statesVar, weightVar); var outputVar = loop.Output; // create executor var ctx = Context.GpuContext(0); var exe = new Executor(ctx, outputVar) { AssignAllGradient = true }; exe.Initalize(); // fake forward data const int steps = 4; const int n = 5; var input = new double[n, n]; var states = new double[steps, n, n]; var weight = new double[n, n]; var rng = new Random(42); UniformRandomArray(input, rng); UniformRandomArray(states, rng); UniformRandomArray(weight, rng); exe.AssignTensor(inputVar, input.AsTensor()); exe.AssignTensor(statesVar, states.AsTensor()); exe.AssignTensor(weightVar, weight.AsTensor()); // run forward exe.Forward(); var outputTensor = exe.GetTensor(outputVar); outputTensor.Print(); // fake backward data var dOutput = new double[n, n]; UniformRandomArray(dOutput, rng); exe.AssignGradient(outputVar, dOutput.AsTensor(), replace: true); // run backward exe.Backward(); // verify gradients var bump = 1e-7; var dInputTensor = exe.GetGradient(inputVar); var dInputTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, inputVar, bump: bump); //dInputTensor.Print(); //dInputTensor_FD.Print(); AreClose(dInputTensor_FD.ToArray2D(), dInputTensor.ToArray2D(), 1e-7); var dStatesTensor = exe.GetGradient(statesVar); var dStatesTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, statesVar, bump: bump); //dStatesTensor.Reshape(steps, -1).Print(); //dStatesTensor_FD.Reshape(steps, -1).Print(); AreClose(dStatesTensor_FD.ToArray3D(), dStatesTensor.ToArray3D(), 1e-7); var dWeightTensor = exe.GetGradient(weightVar); var dWeightTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, weightVar, bump: bump); //dWeightTensor.Print(); //dWeightTensor_FD.Print(); AreClose(dWeightTensor_FD.ToArray2D(), dWeightTensor.ToArray2D(), 1e-3); }
public static void UnrollingStyle() { // create unrolling graph const int steps = 4; var inputVar = Variable <double>(); var stateVars = Enumerable.Range(0, steps).Select(_ => Variable <double>()).ToArray(); var weightVar = Variable <double>(); var outputVar = CreateUnrollingGraph(inputVar, stateVars, weightVar); // create executor var ctx = Context.GpuContext(0); var exe = new Executor(ctx, outputVar) { AssignAllGradient = true }; exe.Initalize(); // fake forward data const int n = 5; var input = new double[n, n]; var states = Enumerable.Range(0, steps).Select(_ => new double[n, n]).ToArray(); var weight = new double[n, n]; var rng = new Random(42); UniformRandomArray(input, rng); foreach (var state in states) { UniformRandomArray(state, rng); } UniformRandomArray(weight, rng); exe.AssignTensor(inputVar, input.AsTensor()); for (var i = 0; i < steps; ++i) { exe.AssignTensor(stateVars[i], states[i].AsTensor()); } exe.AssignTensor(weightVar, weight.AsTensor()); // run forward exe.Forward(); var outputTensor = exe.GetTensor(outputVar); outputTensor.Print(); // fake backward data var dOutput = new double[n, n]; UniformRandomArray(dOutput, rng); exe.AssignGradient(outputVar, dOutput.AsTensor(), replace: true); // run backward exe.Backward(); // verify gradients var bump = 1e-7; var dInputTensor = exe.GetGradient(inputVar); var dInputTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, inputVar, bump: bump); //dInputTensor.Print(); //dInputTensor_FD.Print(); AreClose(dInputTensor_FD.ToArray2D(), dInputTensor.ToArray2D(), 1e-7); for (var i = 0; i < steps; ++i) { var stateVar = stateVars[i]; var dStateTensor = exe.GetGradient(stateVar); var dStateTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, stateVar, bump: bump); //dStateTensor.Print(); //dStateTensor_FD.Print(); AreClose(dStateTensor_FD.ToArray2D(), dStateTensor.ToArray2D(), 1e-7); } var dWeightTensor = exe.GetGradient(weightVar); var dWeightTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, weightVar, bump: bump); //dWeightTensor.Print(); //dWeightTensor_FD.Print(); AreClose(dWeightTensor_FD.ToArray2D(), dWeightTensor.ToArray2D(), 1e-3); }
public static void TestAttention() { //var batch = 4; //var encoderHiddenSize = 5; //var decoderHiddenSize = 4; //var attentionDim = 3; var batch = 10; var encoderHiddenSize = 20; var decoderHiddenSize = 25; var attentionDim = 30; // (encoderSeqLength, batch, encoderHiddenSize) var encoderHiddenStates = Variable <double>(PartialShape.Create(-1, batch, encoderHiddenSize)); var decoderHiddenStates = Variable <double>(PartialShape.Create(batch, decoderHiddenSize)); var attention = new Attention <double>(encoderHiddenStates, decoderHiddenStates, attentionDim); var ctx = Context.GpuContext(0); var exe = new Executor(ctx, attention.Output) { AssignAllGradient = true }; exe.Initalize(); // encoderSeqLength is flexibly at runtime var encoderSeqLength = 3; var dataEncoderHiddenStates = new double[encoderSeqLength, batch, encoderHiddenSize]; UniformRandomArray(dataEncoderHiddenStates); var dataDecoderHiddenStates = new double[batch, decoderHiddenSize]; UniformRandomArray(dataDecoderHiddenStates); exe.AssignTensor(encoderHiddenStates, dataEncoderHiddenStates.AsTensor()); exe.AssignTensor(decoderHiddenStates, dataDecoderHiddenStates.AsTensor()); exe.Forward(); var tensorOutput = exe.GetTensor(attention.Output); //Console.WriteLine(tensorOutput.Shape); //tensorOutput.Print(); var dataDOutput = new double[batch, encoderHiddenSize]; UniformRandomArray(dataDOutput); exe.AssignGradient(attention.Output, dataDOutput.AsTensor(), replace: true); exe.Backward(); var tensorDWh = exe.GetGradient(attention.Wh); //tensorDWh.Print(); var tensorDWd = exe.GetGradient(attention.Wd); //tensorDWd.Print(); var tensorDH = exe.GetGradient(attention.EncoderHiddenStates); //Console.WriteLine(tensorDH.Shape); //tensorDH.Reshape(-1, encoderHiddenSize).Print(); var tensorDD = exe.GetGradient(attention.DecoderHiddenStates); //Console.WriteLine(tensorDD.Shape); //tensorDD.Print(); var bump = 1e-7; var tensorDWh_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.Wh, bump: bump); //tensorDWh.Print(); //tensorDWh_fd.Print(); AreClose(tensorDWh.ToArray2D(), tensorDWh_fd.ToArray2D(), 1e-7); var tensorDWd_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.Wd, bump: bump); //tensorDWd.Print(); //tensorDWd_fd.Print(); AreClose(tensorDWd.ToArray2D(), tensorDWd_fd.ToArray2D(), 1e-7); var tensorDH_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.EncoderHiddenStates, bump: bump); //tensorDH.Reshape(-1, encoderHiddenSize).Print(); //tensorDH_fd.Reshape(-1, encoderHiddenSize).Print(); AreClose(tensorDH.ToArray3D(), tensorDH_fd.ToArray3D(), 1e-7); var tensorDD_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.DecoderHiddenStates, bump: bump); //tensorDD.Print(); //tensorDD_fd.Print(); AreClose(tensorDD.ToArray2D(), tensorDD_fd.ToArray2D(), 1e-7); }