コード例 #1
0
        public static void EstimatePi(Context ctx, int batchs, ulong batchSize, double error)
        {
            const ulong seed = 0UL;

            // allocate buffer for the generated points and a scalar to hold the simulated value of pi
            var points = ctx.Device.Allocate <double2>(Shape.Create((long)batchSize));
            var pi     = ctx.Device.Allocate <double>(Shape.Scalar);

            // transform that checks if point is inside unit square or not
            // the value 4.0 is because we only simulate points in positive quadrant
            var pis = Map(points, point => (point.x * point.x + point.y * point.y) < 1.0 ? 4.0 : 0.0);

            // iterate over multiple batches
            for (var i = 0; i < batchs; ++i)
            {
                Console.WriteLine($"Batch {i}");
                // generates random numbers, apply the mapping followed by a mean reduction
                var offset = batchSize * (ulong)i;
                ctx.Assign(points, RandomUniform <double2>(seed: seed, offset: offset));
                ctx.Assign(pi, i == 0 ? ReduceMean(pis) : (pi + ReduceMean(pis)) / 2.0);
            }

            Console.WriteLine($"Pi = {pi.ToScalar()}");
            Assert.That(pi.ToScalar(), Is.EqualTo(Math.PI).Within(error));
        }
コード例 #2
0
        public static void TestAttentionReduce()
        {
            var n = 3;
            var b = 4;
            var d = 5;

            var statesData = new double[n, b, d];

            UniformRandomArray(statesData);
            var softmaxData = new double[n, b];

            UniformRandomArray(softmaxData);

            var softmax = Variable <double>(PartialShape.Create(-1, b));
            var states  = Variable <double>(PartialShape.Create(-1, b, d));
            var reduce  = new AttentionReduce <double>(softmax, states);

            var ctx = Context.GpuContext(0);
            var exe = new Executor(ctx, reduce.Output)
            {
                AssignAllGradient = true
            };

            exe.Initalize();

            var dOutputData = new double[b, d];

            UniformRandomArray(dOutputData);

            exe.AssignTensor(softmax, softmaxData.AsTensor());
            exe.AssignTensor(states, statesData.AsTensor());
            exe.Forward();
            exe.AssignGradient(reduce.Output, dOutputData.AsTensor(), replace: true);
            exe.Backward();

            var dSoftmax = exe.GetGradient(reduce.Softmax);
            var dStates  = exe.GetGradient(reduce.States);

            var bump = 1e-6;

            var dSoftmaxFd = GradientChecker.FiniteDifferenceGradient(exe, softmax, bump: bump);

            AreClose(dSoftmaxFd.ToArray2D(), dSoftmax.ToArray2D(), 1e-7);

            var dStatesFd = GradientChecker.FiniteDifferenceGradient(exe, states, bump: bump);

            AreClose(dStatesFd.ToArray3D(), dStates.ToArray3D(), 1e-7);

            //var dVectorsFdArray = dVectorsFd.Reshape(-1).ToArray();
            //var dVectorsBackpropArray = dStates.Reshape(-1).ToArray();
            //var err = MaxAbsDiff(dVectorsFdArray, dVectorsBackpropArray);
        }
コード例 #3
0
ファイル: PTB.cs プロジェクト: vishalbelsare/AleaTK
        public static void Run(bool isConsole, ConfigType cfgType, bool usingCuDnn)
        {
            Console.WriteLine($"UsingCUDNN({usingCuDnn}), Config: {cfgType}");

            var ptb = new Data(DataPath);
            var ctx = Context.GpuContext(0);

            Config cfg, cfgValid, cfgTest, cfgInteractive;

            switch (cfgType)
            {
            case ConfigType.Small:
                cfg            = Config.Small(batchSize: 20);
                cfgValid       = Config.Small(batchSize: 20);
                cfgTest        = Config.Small(batchSize: 1, numSteps: 1);
                cfgInteractive = Config.Small(batchSize: 1, numSteps: 10);
                break;

            case ConfigType.Medium:
                cfg            = Config.Medium(batchSize: 20);
                cfgValid       = Config.Medium(batchSize: 20);
                cfgTest        = Config.Medium(batchSize: 1, numSteps: 1);
                cfgInteractive = Config.Medium(batchSize: 1, numSteps: 10);
                break;

            case ConfigType.Large:
                cfg            = Config.Large(batchSize: 20);
                cfgValid       = Config.Large(batchSize: 20);
                cfgTest        = Config.Large(batchSize: 1, numSteps: 1);
                cfgInteractive = Config.Large(batchSize: 1, numSteps: 10);
                break;

            default:
                throw new ArgumentOutOfRangeException(nameof(cfgType), cfgType, null);
            }

            Assert.AreEqual(ptb.WordToIdDict.Count, cfg.VocabSize);
            Assert.AreEqual(ptb.WordToIdDict.Count, cfgValid.VocabSize);
            Assert.AreEqual(ptb.WordToIdDict.Count, cfgTest.VocabSize);
            Assert.AreEqual(ptb.WordToIdDict.Count, cfgInteractive.VocabSize);

            var model            = new Model(ctx, cfg, isTraining: true, usingCuDnn: usingCuDnn);
            var modelValid       = new Model(ctx, cfgValid, isTraining: false, usingCuDnn: usingCuDnn);
            var modelTest        = new Model(ctx, cfgTest, isTraining: false, usingCuDnn: usingCuDnn);
            var modelInteractive = new Model(ctx, cfgInteractive, isTraining: false, usingCuDnn: usingCuDnn);

            for (var i = 0; i < cfg.MaxMaxEpoch; ++i)
            {
                var lrDecay      = Math.Pow(cfg.LrDecay, Math.Max(i - cfg.MaxEpoch, 0.0));
                var learningRate = cfg.LearningRate * lrDecay;

                Console.WriteLine($"Epoch: {i + 1} Learning rate: {learningRate:F3}");
                var trainPerplexity = model.RunEpoch(ptb.TrainData, learningRate: learningRate, verbose: true);
                Console.WriteLine($"Epoch: {i + 1} Train Perplexity: {trainPerplexity:F3}");

                if (!Profiling)
                {
                    modelValid.CopyWeightsFrom(model);
                    var validPerplexity = modelValid.RunEpoch(ptb.ValidData);
                    Console.WriteLine($"Epoch: {i + 1} Valid Perplexity: {validPerplexity:F3}");
                }
            }

            if (!Profiling)
            {
                modelTest.CopyWeightsFrom(model);
                Console.WriteLine("Testing with test data, this is slow, since batch size is set to small...");
                var testPerplexity = modelTest.RunEpoch(ptb.TestData, verbose: true);
                Console.WriteLine($"Test Perplexity: {testPerplexity:F3}");
            }

            if (!Profiling && isConsole)
            {
                var inputs = new int[cfgInteractive.NumSteps, 1];
                modelInteractive.CopyWeightsFrom(model);

                // since the entropy and softmax are merged, so we have to allocate the target (label) tensor
                modelInteractive.Optimizer.AssignTensor(modelInteractive.Targets, inputs.AsTensor());

                while (true)
                {
                    Console.WriteLine();
                    Console.WriteLine($"Enter some words (less than {cfgInteractive.NumSteps} words)");
                    var readLine = Console.ReadLine();
                    if (readLine == null)
                    {
                        break;
                    }
                    var line  = readLine.Trim(' ', '\t', '\r', '\n');
                    var words = line.Split(new[] { ' ', '\t', '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);
                    if (words.Length <= 0 || words.Length > cfgInteractive.NumSteps)
                    {
                        continue;
                    }

                    for (var i = 0; i < cfgInteractive.NumSteps; ++i)
                    {
                        if (i < words.Length)
                        {
                            inputs[i, 0] = ptb.WordToId(words[i]);
                        }
                        else
                        {
                            inputs[i, 0] = ptb.WordToId("<unk>");
                        }
                    }

                    Console.WriteLine("Your inputs are:");
                    for (var i = 0; i < cfgInteractive.NumSteps; ++i)
                    {
                        Console.Write($"{ptb.IdToWord(inputs[i, 0])} ");
                    }
                    Console.WriteLine();

                    modelInteractive.ResetStates();
                    modelInteractive.Optimizer.AssignTensor(modelInteractive.Inputs, inputs.AsTensor());
                    modelInteractive.Optimizer.Forward();

                    var logPred   = modelInteractive.Optimizer.GetTensor(modelInteractive.Loss.LogPred).ToArray2D();
                    var pred      = new List <IndexAndProb>();
                    var totalProb = 0.0;
                    for (var i = 0; i < cfgInteractive.VocabSize; ++i)
                    {
                        var p = new IndexAndProb {
                            Index = i, Prob = Math.Exp(logPred[words.Length - 1, i])
                        };
                        pred.Add(p);
                        totalProb += p.Prob;
                    }
                    Console.WriteLine($"Total probability: {totalProb:F4}");
                    pred.Sort();
                    Console.WriteLine("Candidates are:");
                    pred.Take(10).Iter((x, o) => { Console.WriteLine($" {x.Prob:P2} --> {ptb.IdToWord(x.Index)}"); });
                }
            }
        }
コード例 #4
0
ファイル: PTB.cs プロジェクト: vishalbelsare/AleaTK
            public Model(Context ctx, Config cfg, bool isTraining = true, bool usingCuDnn = true)
            {
                Config     = cfg;
                IsTraining = isTraining;
                UsingCuDnn = usingCuDnn;

                Inputs  = Variable <int>(PartialShape.Create(cfg.NumSteps, cfg.BatchSize));
                Targets = Variable <int>(PartialShape.Create(cfg.NumSteps, cfg.BatchSize));

                // embedding
                Embedding = new Embedding <float>(Inputs, cfg.VocabSize, cfg.HiddenSize, initScale: cfg.InitScale);

                // add dropout
                EmbeddedOutput = Embedding.Output;
                if (isTraining && cfg.KeepProb < 1.0)
                {
                    var dropout = new Dropout <float>(EmbeddedOutput, dropoutProb: 1.0 - cfg.KeepProb);
                    EmbeddedOutput = dropout.Output;
                }

                // rnn layer, dropout for intermediate lstm layers and for output
                if (usingCuDnn)
                {
                    RnnAccelerated = new Rnn <float>(new LstmRnnType(forgetBiasInit: 0.0), EmbeddedOutput, cfg.NumLayers, cfg.HiddenSize, isTraining: isTraining, dropout: isTraining && cfg.KeepProb < 1.0 ? 1.0 - Config.KeepProb : 0.0);
                    RnnOutput      = RnnAccelerated.Y;
                    if (isTraining && cfg.KeepProb < 1.0)
                    {
                        var dropout = new Dropout <float>(RnnOutput, dropoutProb: 1.0 - cfg.KeepProb);
                        RnnOutput = dropout.Output;
                    }
                }
                else
                {
                    RnnDirect = new Lstm <float> [cfg.NumLayers];
                    for (var i = 0; i < cfg.NumLayers; ++i)
                    {
                        var lstm = new Lstm <float>(i == 0 ? EmbeddedOutput : RnnOutput, cfg.HiddenSize, forgetBiasInit: 0.0);
                        RnnDirect[i] = lstm;
                        RnnOutput    = lstm.Y;
                        if (isTraining && cfg.KeepProb < 1.0)
                        {
                            var dropout = new Dropout <float>(RnnOutput, dropoutProb: 1.0 - cfg.KeepProb);
                            RnnOutput = dropout.Output;
                        }
                    }
                }

                FC = new FullyConnected <float>(RnnOutput.Reshape(RnnOutput.Shape[0] * RnnOutput.Shape[1], RnnOutput.Shape[2]), cfg.VocabSize);

                Loss = new SoftmaxCrossEntropySparse <float>(FC.Output, Targets.Reshape(Targets.Shape[0] * Targets.Shape[1]));

                Optimizer = new GradientDescentOptimizer(ctx, Loss.Loss, cfg.LearningRate, new GlobalNormGradientClipper(cfg.MaxGradNorm));

                // warmup to force JIT compilation to get timings without JIT overhead
                Optimizer.Initalize();
                ResetStates();
                Optimizer.AssignTensor(Inputs, Fill(Shape.Create(Inputs.Shape.AsArray), 0));
                Optimizer.AssignTensor(Targets, Fill(Shape.Create(Targets.Shape.AsArray), 0));
                Optimizer.Forward();
                if (isTraining)
                {
                    Optimizer.Backward();
                }

                // now reset states
                Optimizer.Initalize();
                ResetStates();
            }
コード例 #5
0
ファイル: PTB.cs プロジェクト: vishalbelsare/AleaTK
        public static void TestLstmAgainstReferenceResults()
        {
            var mfr = new MatFileReader(@"lstm_small.mat");

            var inputSize  = mfr.GetInt("InputSize");
            var seqLength  = mfr.GetInt("SeqLength");
            var hiddenSize = mfr.GetInt("HiddenSize");
            var batchSize  = mfr.GetInt("BatchSize");

            var x    = Variable <float>(PartialShape.Create(seqLength, batchSize, inputSize));
            var lstm = new Lstm <float>(x, hiddenSize);

            var ctx = Context.GpuContext(0);
            var exe = new Executor(ctx, lstm.Y);

            exe.Initalize();

            var h0 = mfr.GetDoubleArray("h0").Select(n => (float)n).ToArray();
            var c0 = mfr.GetDoubleArray("c0").Select(n => (float)n).ToArray();

            exe.AssignTensor(lstm.CX, c0.AsTensor(Shape.Create(batchSize, hiddenSize)));
            exe.AssignTensor(lstm.HX, h0.AsTensor(Shape.Create(batchSize, hiddenSize)));

            var input = mfr.GetDoubleArray("X").Select(n => (float)n).ToArray();

            exe.AssignTensor(x, input.AsTensor(Shape.Create(seqLength, batchSize, inputSize)));

            var w = mfr.GetDoubleArray("W").Select(n => (float)n).ToArray();

            w.AsTensor(Shape.Create(inputSize + hiddenSize + 1, 4 * hiddenSize)).Print();
            exe.AssignTensor(lstm.W, w.AsTensor(Shape.Create(inputSize + hiddenSize + 1, 4 * hiddenSize)));

            exe.Forward();

            var H = mfr.GetDoubleArray("H").Select(n => (float)n).ToArray();

            H.AsTensor(Shape.Create(seqLength * batchSize, hiddenSize)).Print();

            var myH = exe.GetTensor(lstm.Y).ToArray();

            myH.AsTensor(Shape.Create(seqLength * batchSize, hiddenSize)).Print();

            AreClose(H, myH, 1e-6);

            var CN = mfr.GetDoubleArray("cn").Select(n => (float)n).ToArray();

            CN.AsTensor(Shape.Create(batchSize, hiddenSize)).Print();

            var myCN = exe.GetTensor(lstm.CY).ToArray();

            myCN.AsTensor(Shape.Create(batchSize, hiddenSize)).Print();

            AreClose(CN, myCN, 1e-6);

            var HN = mfr.GetDoubleArray("hn").Select(n => (float)n).ToArray();

            HN.AsTensor(Shape.Create(batchSize, hiddenSize)).Print();

            var myHN = exe.GetTensor(lstm.HY).ToArray();

            myHN.AsTensor(Shape.Create(batchSize, hiddenSize)).Print();

            AreClose(HN, myHN, 1e-6);

            var dH = mfr.GetDoubleArray("dH").Select(n => (float)n).ToArray();

            exe.AssignGradient(lstm.Y, dH.AsTensor(Shape.Create(seqLength, batchSize, hiddenSize)), replace: true);

            exe.Backward();

            var dX = mfr.GetDoubleArray("dX").Select(n => (float)n).ToArray();

            dX.AsTensor(Shape.Create(seqLength * batchSize, inputSize)).Print();

            var dXmy = exe.GetGradient(lstm.X).ToArray();

            dXmy.AsTensor(Shape.Create(seqLength * batchSize, inputSize)).Print();
            AreClose(dX, dXmy, 1e-6);

            var dW = mfr.GetDoubleArray("dW").Select(n => (float)n).ToArray();

            dW.AsTensor(Shape.Create(inputSize + hiddenSize + 1, 4 * hiddenSize)).Print();

            var dWmy = exe.GetGradient(lstm.W).ToArray();

            dWmy.AsTensor(Shape.Create(lstm.W.Shape.AsArray)).Print();
            AreClose(dW, dWmy, 1e-6);

            var dc0 = mfr.GetDoubleArray("dc0").Select(n => (float)n).ToArray();

            dc0.AsTensor(Shape.Create(batchSize, hiddenSize)).Print();

            var dc0my = exe.GetGradient(lstm.CX).ToArray();

            dc0my.AsTensor(Shape.Create(batchSize, hiddenSize)).Print();
            AreClose(dc0, dc0my, 1e-6);

            var dh0 = mfr.GetDoubleArray("dh0").Select(n => (float)n).ToArray();

            dh0.AsTensor(Shape.Create(batchSize, hiddenSize)).Print();

            var dh0my = exe.GetGradient(lstm.HX).ToArray();

            dh0my.AsTensor(Shape.Create(batchSize, hiddenSize)).Print();
            AreClose(dh0, dh0my, 1e-6);

            ctx.ToGpuContext().Stream.Synchronize();
        }
コード例 #6
0
ファイル: PTB.cs プロジェクト: vishalbelsare/AleaTK
        public static void TestLstmAgainstCuDnnVersion()
        {
            var ctx        = Context.GpuContext(0);
            var inputSize  = 5;
            var seqLength  = 3;
            var batchSize  = 2;
            var hiddenSize = 4;
            var error      = 1e-5;

            var data = Context.CpuContext.Eval((2.0f.AsScalar() *
                                                RandomUniform <float>(Shape.Create(seqLength, batchSize, inputSize)) -
                                                1.0f.AsScalar())).ToArray3D();
            //data.AsTensor(Shape.Create(seqLength*batchSize, inputSize)).Print();

            var h0 = Context.CpuContext.Eval(RandomNormal <float>(Shape.Create(batchSize, hiddenSize))).ToArray2D();
            var c0 = Context.CpuContext.Eval(RandomNormal <float>(Shape.Create(batchSize, hiddenSize))).ToArray2D();
            var dy = Context.CpuContext.Eval((2.0f.AsScalar() *
                                              RandomUniform <float>(Shape.Create(seqLength, batchSize, hiddenSize)) -
                                              1.0f.AsScalar())).ToArray3D();
            //dy.AsTensor(Shape.Create(seqLength * batchSize, hiddenSize)).Print();

            var wi = 0.5f;
            var wf = 0.4f;
            var wo = 0.3f;
            var wa = 0.2f;
            var ui = 0.5f;
            var uf = 0.4f;
            var uo = 0.3f;
            var ua = 0.1f;
            var bi = 0.5f;
            var bf = 0.4f;
            var bo = 0.3f;
            var ba = 0.2f;

            float[,,] y1, y2, dx1, dx2;
            float[,] cy1, cy2, hy1, hy2;
            float[,] dcx1, dcx2, dhx1, dhx2;
            float[,] dw1, dw2;

            {
                // calc with cuDNN
                var x    = Variable <float>(PartialShape.Create(seqLength, batchSize, inputSize));
                var lstm = new Rnn <float>(new LstmRnnType(), x, 1, hiddenSize, dropout: 0.0);
                var exe  = new Executor(ctx, lstm.Y);
                exe.Initalize();

                // set input
                exe.AssignTensor(lstm.X, data.AsTensor());

                // set states
                exe.AssignTensor(lstm.CX, c0.AsTensor(Shape.Create(1, batchSize, hiddenSize)));
                exe.AssignTensor(lstm.HX, h0.AsTensor(Shape.Create(1, batchSize, hiddenSize)));

                // set weigths
                // cuDNN matrices order: IFAO
                var w      = exe.GetTensor(lstm.W).Reshape(inputSize * 4 + hiddenSize * 4 + 2 * 4, hiddenSize);
                var offset = 0;
                // Wi
                ctx.Assign(w.Slice(Range(offset, offset + inputSize)), Fill(Shape.Create(inputSize, hiddenSize), wi));
                offset += inputSize;
                // Wf
                ctx.Assign(w.Slice(Range(offset, offset + inputSize)), Fill(Shape.Create(inputSize, hiddenSize), wf));
                offset += inputSize;
                // Wa
                ctx.Assign(w.Slice(Range(offset, offset + inputSize)), Fill(Shape.Create(inputSize, hiddenSize), wa));
                offset += inputSize;
                // Wo
                ctx.Assign(w.Slice(Range(offset, offset + inputSize)), Fill(Shape.Create(inputSize, hiddenSize), wo));
                offset += inputSize;
                // Ui
                ctx.Assign(w.Slice(Range(offset, offset + hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), ui));
                offset += hiddenSize;
                // Uf
                ctx.Assign(w.Slice(Range(offset, offset + hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), uf));
                offset += hiddenSize;
                // Ua
                ctx.Assign(w.Slice(Range(offset, offset + hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), ua));
                offset += hiddenSize;
                // Uo
                ctx.Assign(w.Slice(Range(offset, offset + hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), uo));
                offset += hiddenSize;
                // Bi
                ctx.Assign(w.Slice(offset), Fill(Shape.Create(1, hiddenSize), bi));
                offset++;
                // Bf
                ctx.Assign(w.Slice(offset), Fill(Shape.Create(1, hiddenSize), bf));
                offset++;
                // Ba
                ctx.Assign(w.Slice(offset), Fill(Shape.Create(1, hiddenSize), ba));
                offset++;
                // Bo
                ctx.Assign(w.Slice(offset), Fill(Shape.Create(1, hiddenSize), bo));

                exe.Forward();

                y1  = exe.GetTensor(lstm.Y).ToArray3D();
                cy1 = exe.GetTensor(lstm.CY).Reshape(batchSize, hiddenSize).ToArray2D();
                hy1 = exe.GetTensor(lstm.HY).Reshape(batchSize, hiddenSize).ToArray2D();

                exe.AssignGradient(lstm.Y, dy.AsTensor(), replace: true);

                exe.Backward();

                dx1  = exe.GetGradient(lstm.X).ToArray3D();
                dcx1 = exe.GetGradient(lstm.CX).Reshape(batchSize, hiddenSize).ToArray2D();
                dhx1 = exe.GetGradient(lstm.HX).Reshape(batchSize, hiddenSize).ToArray2D();

                // we make dw follow the shape as (1 + inputSize + hiddenSize, 4*hiddenSize), need to transpose because cuDNN uses Fortran storge order
                var dwCUDNN = exe.GetGradient(lstm.W).ToArray().AsTensor();
                dw1 = new float[1 + inputSize + hiddenSize, 4 * hiddenSize];
                var dw1Tensor = Reference <float>(dw1);
                var cpu       = Context.CpuContext;
                offset = 0;

                // cuDNN order: IFAO, need to transpose because cuDNN uses Fortran storge order

                // Wi
                cpu.Assign(dw1Tensor.Slice(Range(1, inputSize + 1), Range(0, hiddenSize)), dwCUDNN.Slice(Range(offset, offset + inputSize * hiddenSize)).Reshape(hiddenSize, inputSize).T);
                offset += inputSize * hiddenSize;
                // Wf
                cpu.Assign(dw1Tensor.Slice(Range(1, inputSize + 1), Range(hiddenSize, 2 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + inputSize * hiddenSize)).Reshape(hiddenSize, inputSize).T);
                offset += inputSize * hiddenSize;
                // Wa
                cpu.Assign(dw1Tensor.Slice(Range(1, inputSize + 1), Range(3 * hiddenSize, 4 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + inputSize * hiddenSize)).Reshape(hiddenSize, inputSize).T);
                offset += inputSize * hiddenSize;
                // Wo
                cpu.Assign(dw1Tensor.Slice(Range(1, inputSize + 1), Range(2 * hiddenSize, 3 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + inputSize * hiddenSize)).Reshape(hiddenSize, inputSize).T);
                offset += inputSize * hiddenSize;
                // Ui
                cpu.Assign(dw1Tensor.Slice(Range(inputSize + 1, -1), Range(0, hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize * hiddenSize)).Reshape(hiddenSize, hiddenSize).T);
                offset += hiddenSize * hiddenSize;
                // Uf
                cpu.Assign(dw1Tensor.Slice(Range(inputSize + 1, -1), Range(hiddenSize, 2 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize * hiddenSize)).Reshape(hiddenSize, hiddenSize).T);
                offset += hiddenSize * hiddenSize;
                // Ua
                cpu.Assign(dw1Tensor.Slice(Range(inputSize + 1, -1), Range(3 * hiddenSize, 4 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize * hiddenSize)).Reshape(hiddenSize, hiddenSize).T);
                offset += hiddenSize * hiddenSize;
                // Uo
                cpu.Assign(dw1Tensor.Slice(Range(inputSize + 1, -1), Range(2 * hiddenSize, 3 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize * hiddenSize)).Reshape(hiddenSize, hiddenSize).T);
                offset += hiddenSize * hiddenSize;
                // Bi
                cpu.Assign(dw1Tensor.Slice(0, Range(0, hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize)).Reshape(hiddenSize, 1).T);
                offset += hiddenSize;
                // Bf
                cpu.Assign(dw1Tensor.Slice(0, Range(hiddenSize, 2 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize)).Reshape(hiddenSize, 1).T);
                offset += hiddenSize;
                // Ba
                cpu.Assign(dw1Tensor.Slice(0, Range(3 * hiddenSize, 4 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize)).Reshape(hiddenSize, 1).T);
                offset += hiddenSize;
                // Bo
                cpu.Assign(dw1Tensor.Slice(0, Range(2 * hiddenSize, 3 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize)).Reshape(hiddenSize, 1).T);
            }

            {
                // calc with direct LSTM implementation
                var x    = Variable <float>(PartialShape.Create(seqLength, batchSize, inputSize));
                var lstm = new Lstm <float>(x, hiddenSize, forgetBiasInit: 0.0);
                var exe  = new Executor(ctx, lstm.Y);
                exe.Initalize();

                // set input
                exe.AssignTensor(lstm.X, data.AsTensor());

                // set states
                exe.AssignTensor(lstm.CX, c0.AsTensor());
                exe.AssignTensor(lstm.HX, h0.AsTensor());

                // set weights
                var w = exe.GetTensor(lstm.W);
                // Wi
                ctx.Assign(w.Slice(Range(1, inputSize + 1), Range(0, hiddenSize)), Fill(Shape.Create(inputSize, hiddenSize), wi));
                // Wf
                ctx.Assign(w.Slice(Range(1, inputSize + 1), Range(hiddenSize, 2 * hiddenSize)), Fill(Shape.Create(inputSize, hiddenSize), wf));
                // Wo
                ctx.Assign(w.Slice(Range(1, inputSize + 1), Range(2 * hiddenSize, 3 * hiddenSize)), Fill(Shape.Create(inputSize, hiddenSize), wo));
                // Wa
                ctx.Assign(w.Slice(Range(1, inputSize + 1), Range(3 * hiddenSize, 4 * hiddenSize)), Fill(Shape.Create(inputSize, hiddenSize), wa));
                // Ui
                ctx.Assign(w.Slice(Range(inputSize + 1, -1), Range(0, hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), ui));
                // Uf
                ctx.Assign(w.Slice(Range(inputSize + 1, -1), Range(hiddenSize, 2 * hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), uf));
                // Uo
                ctx.Assign(w.Slice(Range(inputSize + 1, -1), Range(2 * hiddenSize, 3 * hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), uo));
                // Ua
                ctx.Assign(w.Slice(Range(inputSize + 1, -1), Range(3 * hiddenSize, 4 * hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), ua));
                // Bi
                ctx.Assign(w.Slice(0, Range(0, hiddenSize)), Fill(Shape.Create(1, hiddenSize), bi));
                // Bf
                ctx.Assign(w.Slice(0, Range(hiddenSize, 2 * hiddenSize)), Fill(Shape.Create(1, hiddenSize), bf));
                // Bo
                ctx.Assign(w.Slice(0, Range(2 * hiddenSize, 3 * hiddenSize)), Fill(Shape.Create(1, hiddenSize), bo));
                // Ba
                ctx.Assign(w.Slice(0, Range(3 * hiddenSize, 4 * hiddenSize)), Fill(Shape.Create(1, hiddenSize), ba));

                exe.Forward();

                y2  = exe.GetTensor(lstm.Y).ToArray3D();
                cy2 = exe.GetTensor(lstm.CY).ToArray2D();
                hy2 = exe.GetTensor(lstm.HY).ToArray2D();

                exe.AssignGradient(lstm.Y, dy.AsTensor(), replace: true);

                exe.Backward();

                dx2  = exe.GetGradient(lstm.X).ToArray3D();
                dcx2 = exe.GetGradient(lstm.CX).Reshape(batchSize, hiddenSize).ToArray2D();
                dhx2 = exe.GetGradient(lstm.HX).Reshape(batchSize, hiddenSize).ToArray2D();
                dw2  = exe.GetGradient(lstm.W).ToArray2D();
            }

            AreClose(y1, y2, error);
            AreClose(cy1, cy2, error);
            AreClose(hy1, hy2, error);
            AreClose(dx1, dx2, error);
            AreClose(dcx1, dcx2, error);
            AreClose(dhx1, dhx2, error);
            AreClose(dw1, dw2, error);
        }
コード例 #7
0
ファイル: Rnn.cs プロジェクト: vishalbelsare/AleaTK
        public static void RnnAgainstRnnDynamic()
        {
            var ctx        = Context.GpuContext(0);
            var inputSize  = 5;
            var seqLength  = 3;
            var batchSize  = 2;
            var hiddenSize = 4;
            var error      = 1e-5;

            var data = Context.CpuContext.Eval(RandomUniform <float>(-1, 1, Shape.Create(seqLength, batchSize, inputSize))).ToArray3D();

            data.AsTensor(Shape.Create(seqLength * batchSize, inputSize)).Print();

            var h0 = Context.CpuContext.Eval(RandomNormal <float>(Shape.Create(batchSize, hiddenSize))).ToArray2D();
            var c0 = Context.CpuContext.Eval(RandomNormal <float>(Shape.Create(batchSize, hiddenSize))).ToArray2D();
            var dy = Context.CpuContext.Eval(RandomUniform <float>(-1, 1, Shape.Create(seqLength, batchSize, hiddenSize))).ToArray3D();

            float[,,] y1, y2, dx1, dx2;
            float[,] cy1, cy2, hy1, hy2;
            float[,] dcx1, dcx2, dhx1, dhx2;
            float[] dw1, dw2;

            {
                var x    = Variable <float>(PartialShape.Create(seqLength, batchSize, inputSize));
                var lstm = new Rnn <float>(new LstmRnnType(), x, 1, hiddenSize, dropout: 0.0);
                var exe  = new Executor(ctx, lstm.Y);
                exe.Initalize();

                // set input
                exe.AssignTensor(lstm.X, data.AsTensor());

                // set states
                exe.AssignTensor(lstm.CX, c0.AsTensor(Shape.Create(1, batchSize, hiddenSize)));
                exe.AssignTensor(lstm.HX, h0.AsTensor(Shape.Create(1, batchSize, hiddenSize)));

                // set weigths, cuDNN matrices order: IFAO
                var w = exe.GetTensor(lstm.W).Reshape(inputSize * 4 + hiddenSize * 4 + 2 * 4, hiddenSize);
                SetWeights(ctx, w, inputSize, hiddenSize);

                exe.Forward();

                y1  = exe.GetTensor(lstm.Y).ToArray3D();
                cy1 = exe.GetTensor(lstm.CY).Reshape(batchSize, hiddenSize).ToArray2D();
                hy1 = exe.GetTensor(lstm.HY).Reshape(batchSize, hiddenSize).ToArray2D();

                exe.AssignGradient(lstm.Y, dy.AsTensor(), replace: true);

                exe.Backward();

                dx1  = exe.GetGradient(lstm.X).ToArray3D();
                dcx1 = exe.GetGradient(lstm.CX).Reshape(batchSize, hiddenSize).ToArray2D();
                dhx1 = exe.GetGradient(lstm.HX).Reshape(batchSize, hiddenSize).ToArray2D();
                dw1  = exe.GetGradient(lstm.W).ToArray(); // cuDNN weight is 1D linear blob
            }

            {
                var x    = Variable <float>(PartialShape.Create(-1, -1, inputSize));
                var lstm = new RnnDynamic <float>(new LstmRnnType(), x, 1, hiddenSize, dropout: 0.0);
                var exe  = new Executor(ctx, lstm.Y);
                exe.Initalize();

                // set input
                exe.AssignTensor(lstm.X, data.AsTensor());

                // set states
                exe.AssignTensor(lstm.CX, c0.AsTensor(Shape.Create(1, batchSize, hiddenSize)));
                exe.AssignTensor(lstm.HX, h0.AsTensor(Shape.Create(1, batchSize, hiddenSize)));

                // set weigths, cuDNN matrices order: IFAO
                var w = exe.GetTensor(lstm.W).Reshape(inputSize * 4 + hiddenSize * 4 + 2 * 4, hiddenSize);
                SetWeights(ctx, w, inputSize, hiddenSize);

                exe.Forward();

                y2  = exe.GetTensor(lstm.Y).ToArray3D();
                cy2 = exe.GetTensor(lstm.CY).Reshape(batchSize, hiddenSize).ToArray2D();
                hy2 = exe.GetTensor(lstm.HY).Reshape(batchSize, hiddenSize).ToArray2D();

                exe.AssignGradient(lstm.Y, dy.AsTensor(), replace: true);

                exe.Backward();

                dx2  = exe.GetGradient(lstm.X).ToArray3D();
                dcx2 = exe.GetGradient(lstm.CX).Reshape(batchSize, hiddenSize).ToArray2D();
                dhx2 = exe.GetGradient(lstm.HX).Reshape(batchSize, hiddenSize).ToArray2D();
                dw2  = exe.GetGradient(lstm.W).ToArray();
            }

            AreClose(y1, y2, error);
            AreClose(cy1, cy2, error);
            AreClose(hy1, hy2, error);
            AreClose(dx1, dx2, error);
            AreClose(dcx1, dcx2, error);
            AreClose(dhx1, dhx2, error);
            AreClose(dw1, dw2, error);
        }
コード例 #8
0
 public static void PiEstimationGpu()
 {
     EstimatePi(Context.GpuContext(0), 100, 10000000, 1e-3);
 }
コード例 #9
0
        public static void LoopStyle()
        {
            var inputVar  = Variable <double>();
            var statesVar = Variable <double>();
            var weightVar = Variable <double>();
            var loop      = new LoopDemo(inputVar, statesVar, weightVar);
            var outputVar = loop.Output;

            // create executor
            var ctx = Context.GpuContext(0);
            var exe = new Executor(ctx, outputVar)
            {
                AssignAllGradient = true
            };

            exe.Initalize();

            // fake forward data
            const int steps  = 4;
            const int n      = 5;
            var       input  = new double[n, n];
            var       states = new double[steps, n, n];
            var       weight = new double[n, n];

            var rng = new Random(42);

            UniformRandomArray(input, rng);
            UniformRandomArray(states, rng);
            UniformRandomArray(weight, rng);

            exe.AssignTensor(inputVar, input.AsTensor());
            exe.AssignTensor(statesVar, states.AsTensor());
            exe.AssignTensor(weightVar, weight.AsTensor());

            // run forward
            exe.Forward();
            var outputTensor = exe.GetTensor(outputVar);

            outputTensor.Print();

            // fake backward data
            var dOutput = new double[n, n];

            UniformRandomArray(dOutput, rng);
            exe.AssignGradient(outputVar, dOutput.AsTensor(), replace: true);

            // run backward
            exe.Backward();

            // verify gradients
            var bump = 1e-7;

            var dInputTensor    = exe.GetGradient(inputVar);
            var dInputTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, inputVar, bump: bump);

            //dInputTensor.Print();
            //dInputTensor_FD.Print();
            AreClose(dInputTensor_FD.ToArray2D(), dInputTensor.ToArray2D(), 1e-7);

            var dStatesTensor    = exe.GetGradient(statesVar);
            var dStatesTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, statesVar, bump: bump);

            //dStatesTensor.Reshape(steps, -1).Print();
            //dStatesTensor_FD.Reshape(steps, -1).Print();
            AreClose(dStatesTensor_FD.ToArray3D(), dStatesTensor.ToArray3D(), 1e-7);

            var dWeightTensor    = exe.GetGradient(weightVar);
            var dWeightTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, weightVar, bump: bump);

            //dWeightTensor.Print();
            //dWeightTensor_FD.Print();
            AreClose(dWeightTensor_FD.ToArray2D(), dWeightTensor.ToArray2D(), 1e-3);
        }
コード例 #10
0
        public static void UnrollingStyle()
        {
            // create unrolling graph
            const int steps     = 4;
            var       inputVar  = Variable <double>();
            var       stateVars = Enumerable.Range(0, steps).Select(_ => Variable <double>()).ToArray();
            var       weightVar = Variable <double>();
            var       outputVar = CreateUnrollingGraph(inputVar, stateVars, weightVar);

            // create executor
            var ctx = Context.GpuContext(0);
            var exe = new Executor(ctx, outputVar)
            {
                AssignAllGradient = true
            };

            exe.Initalize();

            // fake forward data
            const int n      = 5;
            var       input  = new double[n, n];
            var       states = Enumerable.Range(0, steps).Select(_ => new double[n, n]).ToArray();
            var       weight = new double[n, n];

            var rng = new Random(42);

            UniformRandomArray(input, rng);
            foreach (var state in states)
            {
                UniformRandomArray(state, rng);
            }
            UniformRandomArray(weight, rng);

            exe.AssignTensor(inputVar, input.AsTensor());
            for (var i = 0; i < steps; ++i)
            {
                exe.AssignTensor(stateVars[i], states[i].AsTensor());
            }
            exe.AssignTensor(weightVar, weight.AsTensor());

            // run forward
            exe.Forward();
            var outputTensor = exe.GetTensor(outputVar);

            outputTensor.Print();

            // fake backward data
            var dOutput = new double[n, n];

            UniformRandomArray(dOutput, rng);
            exe.AssignGradient(outputVar, dOutput.AsTensor(), replace: true);

            // run backward
            exe.Backward();

            // verify gradients
            var bump = 1e-7;

            var dInputTensor    = exe.GetGradient(inputVar);
            var dInputTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, inputVar, bump: bump);

            //dInputTensor.Print();
            //dInputTensor_FD.Print();
            AreClose(dInputTensor_FD.ToArray2D(), dInputTensor.ToArray2D(), 1e-7);

            for (var i = 0; i < steps; ++i)
            {
                var stateVar        = stateVars[i];
                var dStateTensor    = exe.GetGradient(stateVar);
                var dStateTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, stateVar, bump: bump);
                //dStateTensor.Print();
                //dStateTensor_FD.Print();
                AreClose(dStateTensor_FD.ToArray2D(), dStateTensor.ToArray2D(), 1e-7);
            }

            var dWeightTensor    = exe.GetGradient(weightVar);
            var dWeightTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, weightVar, bump: bump);

            //dWeightTensor.Print();
            //dWeightTensor_FD.Print();
            AreClose(dWeightTensor_FD.ToArray2D(), dWeightTensor.ToArray2D(), 1e-3);
        }
コード例 #11
0
        public static void TestAttention()
        {
            //var batch = 4;
            //var encoderHiddenSize = 5;
            //var decoderHiddenSize = 4;
            //var attentionDim = 3;
            var batch             = 10;
            var encoderHiddenSize = 20;
            var decoderHiddenSize = 25;
            var attentionDim      = 30;

            // (encoderSeqLength, batch, encoderHiddenSize)
            var encoderHiddenStates = Variable <double>(PartialShape.Create(-1, batch, encoderHiddenSize));
            var decoderHiddenStates = Variable <double>(PartialShape.Create(batch, decoderHiddenSize));
            var attention           = new Attention <double>(encoderHiddenStates, decoderHiddenStates, attentionDim);

            var ctx = Context.GpuContext(0);
            var exe = new Executor(ctx, attention.Output)
            {
                AssignAllGradient = true
            };

            exe.Initalize();

            // encoderSeqLength is flexibly at runtime
            var encoderSeqLength        = 3;
            var dataEncoderHiddenStates = new double[encoderSeqLength, batch, encoderHiddenSize];

            UniformRandomArray(dataEncoderHiddenStates);

            var dataDecoderHiddenStates = new double[batch, decoderHiddenSize];

            UniformRandomArray(dataDecoderHiddenStates);

            exe.AssignTensor(encoderHiddenStates, dataEncoderHiddenStates.AsTensor());
            exe.AssignTensor(decoderHiddenStates, dataDecoderHiddenStates.AsTensor());
            exe.Forward();

            var tensorOutput = exe.GetTensor(attention.Output);
            //Console.WriteLine(tensorOutput.Shape);
            //tensorOutput.Print();

            var dataDOutput = new double[batch, encoderHiddenSize];

            UniformRandomArray(dataDOutput);
            exe.AssignGradient(attention.Output, dataDOutput.AsTensor(), replace: true);
            exe.Backward();

            var tensorDWh = exe.GetGradient(attention.Wh);
            //tensorDWh.Print();

            var tensorDWd = exe.GetGradient(attention.Wd);
            //tensorDWd.Print();

            var tensorDH = exe.GetGradient(attention.EncoderHiddenStates);
            //Console.WriteLine(tensorDH.Shape);
            //tensorDH.Reshape(-1, encoderHiddenSize).Print();

            var tensorDD = exe.GetGradient(attention.DecoderHiddenStates);
            //Console.WriteLine(tensorDD.Shape);
            //tensorDD.Print();

            var bump = 1e-7;

            var tensorDWh_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.Wh, bump: bump);

            //tensorDWh.Print();
            //tensorDWh_fd.Print();
            AreClose(tensorDWh.ToArray2D(), tensorDWh_fd.ToArray2D(), 1e-7);

            var tensorDWd_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.Wd, bump: bump);

            //tensorDWd.Print();
            //tensorDWd_fd.Print();
            AreClose(tensorDWd.ToArray2D(), tensorDWd_fd.ToArray2D(), 1e-7);

            var tensorDH_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.EncoderHiddenStates, bump: bump);

            //tensorDH.Reshape(-1, encoderHiddenSize).Print();
            //tensorDH_fd.Reshape(-1, encoderHiddenSize).Print();
            AreClose(tensorDH.ToArray3D(), tensorDH_fd.ToArray3D(), 1e-7);

            var tensorDD_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.DecoderHiddenStates, bump: bump);

            //tensorDD.Print();
            //tensorDD_fd.Print();
            AreClose(tensorDD.ToArray2D(), tensorDD_fd.ToArray2D(), 1e-7);
        }