Ejemplo n.º 1
0
        public static void TestAttentionReduce()
        {
            var n = 3;
            var b = 4;
            var d = 5;

            var statesData = new double[n, b, d];

            UniformRandomArray(statesData);
            var softmaxData = new double[n, b];

            UniformRandomArray(softmaxData);

            var softmax = Variable <double>(PartialShape.Create(-1, b));
            var states  = Variable <double>(PartialShape.Create(-1, b, d));
            var reduce  = new AttentionReduce <double>(softmax, states);

            var ctx = Context.GpuContext(0);
            var exe = new Executor(ctx, reduce.Output)
            {
                AssignAllGradient = true
            };

            exe.Initalize();

            var dOutputData = new double[b, d];

            UniformRandomArray(dOutputData);

            exe.AssignTensor(softmax, softmaxData.AsTensor());
            exe.AssignTensor(states, statesData.AsTensor());
            exe.Forward();
            exe.AssignGradient(reduce.Output, dOutputData.AsTensor(), replace: true);
            exe.Backward();

            var dSoftmax = exe.GetGradient(reduce.Softmax);
            var dStates  = exe.GetGradient(reduce.States);

            var bump = 1e-6;

            var dSoftmaxFd = GradientChecker.FiniteDifferenceGradient(exe, softmax, bump: bump);

            AreClose(dSoftmaxFd.ToArray2D(), dSoftmax.ToArray2D(), 1e-7);

            var dStatesFd = GradientChecker.FiniteDifferenceGradient(exe, states, bump: bump);

            AreClose(dStatesFd.ToArray3D(), dStates.ToArray3D(), 1e-7);

            //var dVectorsFdArray = dVectorsFd.Reshape(-1).ToArray();
            //var dVectorsBackpropArray = dStates.Reshape(-1).ToArray();
            //var err = MaxAbsDiff(dVectorsFdArray, dVectorsBackpropArray);
        }
Ejemplo n.º 2
0
        public static void LoopStyle()
        {
            var inputVar  = Variable <double>();
            var statesVar = Variable <double>();
            var weightVar = Variable <double>();
            var loop      = new LoopDemo(inputVar, statesVar, weightVar);
            var outputVar = loop.Output;

            // create executor
            var ctx = Context.GpuContext(0);
            var exe = new Executor(ctx, outputVar)
            {
                AssignAllGradient = true
            };

            exe.Initalize();

            // fake forward data
            const int steps  = 4;
            const int n      = 5;
            var       input  = new double[n, n];
            var       states = new double[steps, n, n];
            var       weight = new double[n, n];

            var rng = new Random(42);

            UniformRandomArray(input, rng);
            UniformRandomArray(states, rng);
            UniformRandomArray(weight, rng);

            exe.AssignTensor(inputVar, input.AsTensor());
            exe.AssignTensor(statesVar, states.AsTensor());
            exe.AssignTensor(weightVar, weight.AsTensor());

            // run forward
            exe.Forward();
            var outputTensor = exe.GetTensor(outputVar);

            outputTensor.Print();

            // fake backward data
            var dOutput = new double[n, n];

            UniformRandomArray(dOutput, rng);
            exe.AssignGradient(outputVar, dOutput.AsTensor(), replace: true);

            // run backward
            exe.Backward();

            // verify gradients
            var bump = 1e-7;

            var dInputTensor    = exe.GetGradient(inputVar);
            var dInputTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, inputVar, bump: bump);

            //dInputTensor.Print();
            //dInputTensor_FD.Print();
            AreClose(dInputTensor_FD.ToArray2D(), dInputTensor.ToArray2D(), 1e-7);

            var dStatesTensor    = exe.GetGradient(statesVar);
            var dStatesTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, statesVar, bump: bump);

            //dStatesTensor.Reshape(steps, -1).Print();
            //dStatesTensor_FD.Reshape(steps, -1).Print();
            AreClose(dStatesTensor_FD.ToArray3D(), dStatesTensor.ToArray3D(), 1e-7);

            var dWeightTensor    = exe.GetGradient(weightVar);
            var dWeightTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, weightVar, bump: bump);

            //dWeightTensor.Print();
            //dWeightTensor_FD.Print();
            AreClose(dWeightTensor_FD.ToArray2D(), dWeightTensor.ToArray2D(), 1e-3);
        }
Ejemplo n.º 3
0
        public static void UnrollingStyle()
        {
            // create unrolling graph
            const int steps     = 4;
            var       inputVar  = Variable <double>();
            var       stateVars = Enumerable.Range(0, steps).Select(_ => Variable <double>()).ToArray();
            var       weightVar = Variable <double>();
            var       outputVar = CreateUnrollingGraph(inputVar, stateVars, weightVar);

            // create executor
            var ctx = Context.GpuContext(0);
            var exe = new Executor(ctx, outputVar)
            {
                AssignAllGradient = true
            };

            exe.Initalize();

            // fake forward data
            const int n      = 5;
            var       input  = new double[n, n];
            var       states = Enumerable.Range(0, steps).Select(_ => new double[n, n]).ToArray();
            var       weight = new double[n, n];

            var rng = new Random(42);

            UniformRandomArray(input, rng);
            foreach (var state in states)
            {
                UniformRandomArray(state, rng);
            }
            UniformRandomArray(weight, rng);

            exe.AssignTensor(inputVar, input.AsTensor());
            for (var i = 0; i < steps; ++i)
            {
                exe.AssignTensor(stateVars[i], states[i].AsTensor());
            }
            exe.AssignTensor(weightVar, weight.AsTensor());

            // run forward
            exe.Forward();
            var outputTensor = exe.GetTensor(outputVar);

            outputTensor.Print();

            // fake backward data
            var dOutput = new double[n, n];

            UniformRandomArray(dOutput, rng);
            exe.AssignGradient(outputVar, dOutput.AsTensor(), replace: true);

            // run backward
            exe.Backward();

            // verify gradients
            var bump = 1e-7;

            var dInputTensor    = exe.GetGradient(inputVar);
            var dInputTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, inputVar, bump: bump);

            //dInputTensor.Print();
            //dInputTensor_FD.Print();
            AreClose(dInputTensor_FD.ToArray2D(), dInputTensor.ToArray2D(), 1e-7);

            for (var i = 0; i < steps; ++i)
            {
                var stateVar        = stateVars[i];
                var dStateTensor    = exe.GetGradient(stateVar);
                var dStateTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, stateVar, bump: bump);
                //dStateTensor.Print();
                //dStateTensor_FD.Print();
                AreClose(dStateTensor_FD.ToArray2D(), dStateTensor.ToArray2D(), 1e-7);
            }

            var dWeightTensor    = exe.GetGradient(weightVar);
            var dWeightTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, weightVar, bump: bump);

            //dWeightTensor.Print();
            //dWeightTensor_FD.Print();
            AreClose(dWeightTensor_FD.ToArray2D(), dWeightTensor.ToArray2D(), 1e-3);
        }
Ejemplo n.º 4
0
        public static void TestAttention()
        {
            //var batch = 4;
            //var encoderHiddenSize = 5;
            //var decoderHiddenSize = 4;
            //var attentionDim = 3;
            var batch             = 10;
            var encoderHiddenSize = 20;
            var decoderHiddenSize = 25;
            var attentionDim      = 30;

            // (encoderSeqLength, batch, encoderHiddenSize)
            var encoderHiddenStates = Variable <double>(PartialShape.Create(-1, batch, encoderHiddenSize));
            var decoderHiddenStates = Variable <double>(PartialShape.Create(batch, decoderHiddenSize));
            var attention           = new Attention <double>(encoderHiddenStates, decoderHiddenStates, attentionDim);

            var ctx = Context.GpuContext(0);
            var exe = new Executor(ctx, attention.Output)
            {
                AssignAllGradient = true
            };

            exe.Initalize();

            // encoderSeqLength is flexibly at runtime
            var encoderSeqLength        = 3;
            var dataEncoderHiddenStates = new double[encoderSeqLength, batch, encoderHiddenSize];

            UniformRandomArray(dataEncoderHiddenStates);

            var dataDecoderHiddenStates = new double[batch, decoderHiddenSize];

            UniformRandomArray(dataDecoderHiddenStates);

            exe.AssignTensor(encoderHiddenStates, dataEncoderHiddenStates.AsTensor());
            exe.AssignTensor(decoderHiddenStates, dataDecoderHiddenStates.AsTensor());
            exe.Forward();

            var tensorOutput = exe.GetTensor(attention.Output);
            //Console.WriteLine(tensorOutput.Shape);
            //tensorOutput.Print();

            var dataDOutput = new double[batch, encoderHiddenSize];

            UniformRandomArray(dataDOutput);
            exe.AssignGradient(attention.Output, dataDOutput.AsTensor(), replace: true);
            exe.Backward();

            var tensorDWh = exe.GetGradient(attention.Wh);
            //tensorDWh.Print();

            var tensorDWd = exe.GetGradient(attention.Wd);
            //tensorDWd.Print();

            var tensorDH = exe.GetGradient(attention.EncoderHiddenStates);
            //Console.WriteLine(tensorDH.Shape);
            //tensorDH.Reshape(-1, encoderHiddenSize).Print();

            var tensorDD = exe.GetGradient(attention.DecoderHiddenStates);
            //Console.WriteLine(tensorDD.Shape);
            //tensorDD.Print();

            var bump = 1e-7;

            var tensorDWh_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.Wh, bump: bump);

            //tensorDWh.Print();
            //tensorDWh_fd.Print();
            AreClose(tensorDWh.ToArray2D(), tensorDWh_fd.ToArray2D(), 1e-7);

            var tensorDWd_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.Wd, bump: bump);

            //tensorDWd.Print();
            //tensorDWd_fd.Print();
            AreClose(tensorDWd.ToArray2D(), tensorDWd_fd.ToArray2D(), 1e-7);

            var tensorDH_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.EncoderHiddenStates, bump: bump);

            //tensorDH.Reshape(-1, encoderHiddenSize).Print();
            //tensorDH_fd.Reshape(-1, encoderHiddenSize).Print();
            AreClose(tensorDH.ToArray3D(), tensorDH_fd.ToArray3D(), 1e-7);

            var tensorDD_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.DecoderHiddenStates, bump: bump);

            //tensorDD.Print();
            //tensorDD_fd.Print();
            AreClose(tensorDD.ToArray2D(), tensorDD_fd.ToArray2D(), 1e-7);
        }