Beispiel #1
0
        public static void Gradient_Dot_GPU()
        {
            var rng = new Random();
            var m   = 10;
            var k   = 5;
            var n   = 3;
            var x   = Variable <double>();
            var y   = Variable <double>();
            var z   = Dot(x, y);

            var ctx = gpu;
            var exe = new Executor(ctx, z)
            {
                AssignAllGradient = true
            };

            //var l = 10;
            var hx = new double[m, k];
            var hy = new double[k, n];

            UniformRandomArray(hx, rng);
            UniformRandomArray(hy, rng);
            var hz = Dot(hx, hy);

            //for (var i = 0; i < l; ++i) hz[i] = hx[i] + hy[i];
            //hx.AsTensor().Print();
            //hy.AsTensor().Print();

            exe.AssignTensor(x, hx.AsTensor());
            exe.AssignTensor(y, hy.AsTensor());
            exe.Forward();
            var tz = exe.GetTensor(z);

            //tz.Print();
            AreClose(hz, tz.ToArray2D(), 1e-10);

            var hdz = new double[m, n];

            UniformRandomArray(hdz, rng);
            //hdz.AsTensor().Print();
            exe.AssignGradient(z, hdz.AsTensor(), replace: true);
            exe.Backward();
            var tdx = exe.GetGradient(x);
            var tdy = exe.GetGradient(y);

            tdx.Print();
            tdy.Print();

            var bump = 1e-6;
            var hdx  = GradientChecker.FiniteDifferenceGradient(exe, x, bump: bump);
            var hdy  = GradientChecker.FiniteDifferenceGradient(exe, y, bump: bump);

            hdx.Print();
            hdy.Print();

            AreClose(tdx.ToArray(), hdx.ToArray(), 1e-6);
            AreClose(tdy.ToArray(), hdy.ToArray(), 1e-6);
        }
Beispiel #2
0
        public static void TestAttentionReduce()
        {
            var n = 3;
            var b = 4;
            var d = 5;

            var statesData = new double[n, b, d];

            UniformRandomArray(statesData);
            var softmaxData = new double[n, b];

            UniformRandomArray(softmaxData);

            var softmax = Variable <double>(PartialShape.Create(-1, b));
            var states  = Variable <double>(PartialShape.Create(-1, b, d));
            var reduce  = new AttentionReduce <double>(softmax, states);

            var ctx = Context.GpuContext(0);
            var exe = new Executor(ctx, reduce.Output)
            {
                AssignAllGradient = true
            };

            exe.Initalize();

            var dOutputData = new double[b, d];

            UniformRandomArray(dOutputData);

            exe.AssignTensor(softmax, softmaxData.AsTensor());
            exe.AssignTensor(states, statesData.AsTensor());
            exe.Forward();
            exe.AssignGradient(reduce.Output, dOutputData.AsTensor(), replace: true);
            exe.Backward();

            var dSoftmax = exe.GetGradient(reduce.Softmax);
            var dStates  = exe.GetGradient(reduce.States);

            var bump = 1e-6;

            var dSoftmaxFd = GradientChecker.FiniteDifferenceGradient(exe, softmax, bump: bump);

            AreClose(dSoftmaxFd.ToArray2D(), dSoftmax.ToArray2D(), 1e-7);

            var dStatesFd = GradientChecker.FiniteDifferenceGradient(exe, states, bump: bump);

            AreClose(dStatesFd.ToArray3D(), dStates.ToArray3D(), 1e-7);

            //var dVectorsFdArray = dVectorsFd.Reshape(-1).ToArray();
            //var dVectorsBackpropArray = dStates.Reshape(-1).ToArray();
            //var err = MaxAbsDiff(dVectorsFdArray, dVectorsBackpropArray);
        }
Beispiel #3
0
        public static void Gradient_WeightedSumReduce_01_GPU()
        {
            var rng = new Random(42);
            var x   = Variable <double>();
            var w   = Variable <double>();
            var wsr = new WeightedSumReduce <double>(w, x);
            var y   = wsr.Output;

            var ctx = gpu;
            var exe = new Executor(ctx, y)
            {
                AssignAllGradient = true
            };

            var n  = 5;
            var d  = 3;
            var hx = new double[n, d];
            var hw = new double[n, d];

            UniformRandomArray(hx, rng);
            UniformRandomArray(hw, rng);
            var hy = new double[d];

            for (var i = 0; i < d; ++i)
            {
                var acc = 0.0;
                for (var j = 0; j < n; ++j)
                {
                    acc += hw[j, i] * hx[j, i];
                }
                hy[i] = acc;
            }

            exe.AssignTensor(x, hx.AsTensor());
            exe.AssignTensor(w, hw.AsTensor());
            exe.Forward();
            var ty = exe.GetTensor(y);

            ty.Print();
            AreClose(hy, ty.ToArray(), 1e-10);

            var hdy = new double[d];

            UniformRandomArray(hdy, rng);
            exe.AssignGradient(y, hdy.AsTensor(), replace: true);
            exe.Backward();
            var tdx = exe.GetGradient(x);
            var tdw = exe.GetGradient(w);

            tdx.Print();
            tdw.Print();

            var bump = 1e-8;
            var hdx  = GradientChecker.FiniteDifferenceGradient(exe, x, bump: bump);
            var hdw  = GradientChecker.FiniteDifferenceGradient(exe, w, bump: bump);

            hdx.Print();
            hdw.Print();

            AreClose(hdx.ToArray2D(), tdx.ToArray2D(), 1e-7);
            AreClose(hdw.ToArray2D(), tdw.ToArray2D(), 1e-7);
        }
Beispiel #4
0
        public static void Test()
        {
            // compile the graph on one context, then get the forward and backward computation delegate from the
            // returned tuple.
            var ctx      = Context.GpuContext(0);
            var funcs    = Compile <double, double, double, double>(ctx, Foo);
            var forward  = funcs.Item1;
            var backward = funcs.Item2;

            // create host arrays
            var m = 100;
            var k = 90;
            var n = 80;
            var x = new double[m, k];
            var w = new double[k, n];
            var b = new double[n];

            // randomly set the host arrays
            var rng = new Random(42);

            AleaTKUtil.Common.UniformRandomArray(x, rng);
            AleaTKUtil.Common.UniformRandomArray(w, rng);
            AleaTKUtil.Common.UniformRandomArray(b, rng);

            // you can calc the output
            var y = forward(x.AsTensor(), w.AsTensor(), b.AsTensor());
            //y.Print();

            // fake some gradient
            var dy = new double[m, n];

            AleaTKUtil.Common.UniformRandomArray(dy, rng);

            // calc the gradients, they are in a tuple
            var gradients = backward(dy.AsTensor());
            var dx        = gradients.Item1;
            var dw        = gradients.Item2;
            var db        = gradients.Item3;

            // the following code is just to verify the gradients with finite difference.
            var varX = Variable <double>();
            var varW = Variable <double>();
            var varB = Variable <double>();
            var varY = Foo(varX, varW, varB);
            var exe  = new Executor(ctx, varY);

            exe.AssignTensor(varX, x.AsTensor());
            exe.AssignTensor(varW, w.AsTensor());
            exe.AssignTensor(varB, b.AsTensor());
            exe.AssignGradient(varY, dy.AsTensor(), replace: true);
            var bump = 1e-7;

            var dx_fd = GradientChecker.FiniteDifferenceGradient(exe, varX, bump: bump);

            //dx.Print();
            //dx_fd.Print();
            AleaTKUtil.Common.AreClose(dx_fd.ToArray2D(), dx.ToArray2D(), 1e-6);

            var dw_fd = GradientChecker.FiniteDifferenceGradient(exe, varW, bump: bump);

            //dw.Print();
            //dw_fd.Print();
            AleaTKUtil.Common.AreClose(dw_fd.ToArray2D(), dw.ToArray2D(), 1e-6);

            var db_fd = GradientChecker.FiniteDifferenceGradient(exe, varB, bump: bump);

            //db.Print();
            //db_fd.Print();
            AleaTKUtil.Common.AreClose(db_fd.ToArray(), db.ToArray(), 1e-5);
        }
Beispiel #5
0
        public static void LoopStyle()
        {
            var inputVar  = Variable <double>();
            var statesVar = Variable <double>();
            var weightVar = Variable <double>();
            var loop      = new LoopDemo(inputVar, statesVar, weightVar);
            var outputVar = loop.Output;

            // create executor
            var ctx = Context.GpuContext(0);
            var exe = new Executor(ctx, outputVar)
            {
                AssignAllGradient = true
            };

            exe.Initalize();

            // fake forward data
            const int steps  = 4;
            const int n      = 5;
            var       input  = new double[n, n];
            var       states = new double[steps, n, n];
            var       weight = new double[n, n];

            var rng = new Random(42);

            UniformRandomArray(input, rng);
            UniformRandomArray(states, rng);
            UniformRandomArray(weight, rng);

            exe.AssignTensor(inputVar, input.AsTensor());
            exe.AssignTensor(statesVar, states.AsTensor());
            exe.AssignTensor(weightVar, weight.AsTensor());

            // run forward
            exe.Forward();
            var outputTensor = exe.GetTensor(outputVar);

            outputTensor.Print();

            // fake backward data
            var dOutput = new double[n, n];

            UniformRandomArray(dOutput, rng);
            exe.AssignGradient(outputVar, dOutput.AsTensor(), replace: true);

            // run backward
            exe.Backward();

            // verify gradients
            var bump = 1e-7;

            var dInputTensor    = exe.GetGradient(inputVar);
            var dInputTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, inputVar, bump: bump);

            //dInputTensor.Print();
            //dInputTensor_FD.Print();
            AreClose(dInputTensor_FD.ToArray2D(), dInputTensor.ToArray2D(), 1e-7);

            var dStatesTensor    = exe.GetGradient(statesVar);
            var dStatesTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, statesVar, bump: bump);

            //dStatesTensor.Reshape(steps, -1).Print();
            //dStatesTensor_FD.Reshape(steps, -1).Print();
            AreClose(dStatesTensor_FD.ToArray3D(), dStatesTensor.ToArray3D(), 1e-7);

            var dWeightTensor    = exe.GetGradient(weightVar);
            var dWeightTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, weightVar, bump: bump);

            //dWeightTensor.Print();
            //dWeightTensor_FD.Print();
            AreClose(dWeightTensor_FD.ToArray2D(), dWeightTensor.ToArray2D(), 1e-3);
        }
Beispiel #6
0
        public static void UnrollingStyle()
        {
            // create unrolling graph
            const int steps     = 4;
            var       inputVar  = Variable <double>();
            var       stateVars = Enumerable.Range(0, steps).Select(_ => Variable <double>()).ToArray();
            var       weightVar = Variable <double>();
            var       outputVar = CreateUnrollingGraph(inputVar, stateVars, weightVar);

            // create executor
            var ctx = Context.GpuContext(0);
            var exe = new Executor(ctx, outputVar)
            {
                AssignAllGradient = true
            };

            exe.Initalize();

            // fake forward data
            const int n      = 5;
            var       input  = new double[n, n];
            var       states = Enumerable.Range(0, steps).Select(_ => new double[n, n]).ToArray();
            var       weight = new double[n, n];

            var rng = new Random(42);

            UniformRandomArray(input, rng);
            foreach (var state in states)
            {
                UniformRandomArray(state, rng);
            }
            UniformRandomArray(weight, rng);

            exe.AssignTensor(inputVar, input.AsTensor());
            for (var i = 0; i < steps; ++i)
            {
                exe.AssignTensor(stateVars[i], states[i].AsTensor());
            }
            exe.AssignTensor(weightVar, weight.AsTensor());

            // run forward
            exe.Forward();
            var outputTensor = exe.GetTensor(outputVar);

            outputTensor.Print();

            // fake backward data
            var dOutput = new double[n, n];

            UniformRandomArray(dOutput, rng);
            exe.AssignGradient(outputVar, dOutput.AsTensor(), replace: true);

            // run backward
            exe.Backward();

            // verify gradients
            var bump = 1e-7;

            var dInputTensor    = exe.GetGradient(inputVar);
            var dInputTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, inputVar, bump: bump);

            //dInputTensor.Print();
            //dInputTensor_FD.Print();
            AreClose(dInputTensor_FD.ToArray2D(), dInputTensor.ToArray2D(), 1e-7);

            for (var i = 0; i < steps; ++i)
            {
                var stateVar        = stateVars[i];
                var dStateTensor    = exe.GetGradient(stateVar);
                var dStateTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, stateVar, bump: bump);
                //dStateTensor.Print();
                //dStateTensor_FD.Print();
                AreClose(dStateTensor_FD.ToArray2D(), dStateTensor.ToArray2D(), 1e-7);
            }

            var dWeightTensor    = exe.GetGradient(weightVar);
            var dWeightTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, weightVar, bump: bump);

            //dWeightTensor.Print();
            //dWeightTensor_FD.Print();
            AreClose(dWeightTensor_FD.ToArray2D(), dWeightTensor.ToArray2D(), 1e-3);
        }
Beispiel #7
0
        public static void TestAttention()
        {
            //var batch = 4;
            //var encoderHiddenSize = 5;
            //var decoderHiddenSize = 4;
            //var attentionDim = 3;
            var batch             = 10;
            var encoderHiddenSize = 20;
            var decoderHiddenSize = 25;
            var attentionDim      = 30;

            // (encoderSeqLength, batch, encoderHiddenSize)
            var encoderHiddenStates = Variable <double>(PartialShape.Create(-1, batch, encoderHiddenSize));
            var decoderHiddenStates = Variable <double>(PartialShape.Create(batch, decoderHiddenSize));
            var attention           = new Attention <double>(encoderHiddenStates, decoderHiddenStates, attentionDim);

            var ctx = Context.GpuContext(0);
            var exe = new Executor(ctx, attention.Output)
            {
                AssignAllGradient = true
            };

            exe.Initalize();

            // encoderSeqLength is flexibly at runtime
            var encoderSeqLength        = 3;
            var dataEncoderHiddenStates = new double[encoderSeqLength, batch, encoderHiddenSize];

            UniformRandomArray(dataEncoderHiddenStates);

            var dataDecoderHiddenStates = new double[batch, decoderHiddenSize];

            UniformRandomArray(dataDecoderHiddenStates);

            exe.AssignTensor(encoderHiddenStates, dataEncoderHiddenStates.AsTensor());
            exe.AssignTensor(decoderHiddenStates, dataDecoderHiddenStates.AsTensor());
            exe.Forward();

            var tensorOutput = exe.GetTensor(attention.Output);
            //Console.WriteLine(tensorOutput.Shape);
            //tensorOutput.Print();

            var dataDOutput = new double[batch, encoderHiddenSize];

            UniformRandomArray(dataDOutput);
            exe.AssignGradient(attention.Output, dataDOutput.AsTensor(), replace: true);
            exe.Backward();

            var tensorDWh = exe.GetGradient(attention.Wh);
            //tensorDWh.Print();

            var tensorDWd = exe.GetGradient(attention.Wd);
            //tensorDWd.Print();

            var tensorDH = exe.GetGradient(attention.EncoderHiddenStates);
            //Console.WriteLine(tensorDH.Shape);
            //tensorDH.Reshape(-1, encoderHiddenSize).Print();

            var tensorDD = exe.GetGradient(attention.DecoderHiddenStates);
            //Console.WriteLine(tensorDD.Shape);
            //tensorDD.Print();

            var bump = 1e-7;

            var tensorDWh_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.Wh, bump: bump);

            //tensorDWh.Print();
            //tensorDWh_fd.Print();
            AreClose(tensorDWh.ToArray2D(), tensorDWh_fd.ToArray2D(), 1e-7);

            var tensorDWd_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.Wd, bump: bump);

            //tensorDWd.Print();
            //tensorDWd_fd.Print();
            AreClose(tensorDWd.ToArray2D(), tensorDWd_fd.ToArray2D(), 1e-7);

            var tensorDH_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.EncoderHiddenStates, bump: bump);

            //tensorDH.Reshape(-1, encoderHiddenSize).Print();
            //tensorDH_fd.Reshape(-1, encoderHiddenSize).Print();
            AreClose(tensorDH.ToArray3D(), tensorDH_fd.ToArray3D(), 1e-7);

            var tensorDD_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.DecoderHiddenStates, bump: bump);

            //tensorDD.Print();
            //tensorDD_fd.Print();
            AreClose(tensorDD.ToArray2D(), tensorDD_fd.ToArray2D(), 1e-7);
        }