public static void TestAttentionReduce() { var n = 3; var b = 4; var d = 5; var statesData = new double[n, b, d]; UniformRandomArray(statesData); var softmaxData = new double[n, b]; UniformRandomArray(softmaxData); var softmax = Variable <double>(PartialShape.Create(-1, b)); var states = Variable <double>(PartialShape.Create(-1, b, d)); var reduce = new AttentionReduce <double>(softmax, states); var ctx = Context.GpuContext(0); var exe = new Executor(ctx, reduce.Output) { AssignAllGradient = true }; exe.Initalize(); var dOutputData = new double[b, d]; UniformRandomArray(dOutputData); exe.AssignTensor(softmax, softmaxData.AsTensor()); exe.AssignTensor(states, statesData.AsTensor()); exe.Forward(); exe.AssignGradient(reduce.Output, dOutputData.AsTensor(), replace: true); exe.Backward(); var dSoftmax = exe.GetGradient(reduce.Softmax); var dStates = exe.GetGradient(reduce.States); var bump = 1e-6; var dSoftmaxFd = GradientChecker.FiniteDifferenceGradient(exe, softmax, bump: bump); AreClose(dSoftmaxFd.ToArray2D(), dSoftmax.ToArray2D(), 1e-7); var dStatesFd = GradientChecker.FiniteDifferenceGradient(exe, states, bump: bump); AreClose(dStatesFd.ToArray3D(), dStates.ToArray3D(), 1e-7); //var dVectorsFdArray = dVectorsFd.Reshape(-1).ToArray(); //var dVectorsBackpropArray = dStates.Reshape(-1).ToArray(); //var err = MaxAbsDiff(dVectorsFdArray, dVectorsBackpropArray); }
public static void Run(bool isConsole, ConfigType cfgType, bool usingCuDnn) { Console.WriteLine($"UsingCUDNN({usingCuDnn}), Config: {cfgType}"); var ptb = new Data(DataPath); var ctx = Context.GpuContext(0); Config cfg, cfgValid, cfgTest, cfgInteractive; switch (cfgType) { case ConfigType.Small: cfg = Config.Small(batchSize: 20); cfgValid = Config.Small(batchSize: 20); cfgTest = Config.Small(batchSize: 1, numSteps: 1); cfgInteractive = Config.Small(batchSize: 1, numSteps: 10); break; case ConfigType.Medium: cfg = Config.Medium(batchSize: 20); cfgValid = Config.Medium(batchSize: 20); cfgTest = Config.Medium(batchSize: 1, numSteps: 1); cfgInteractive = Config.Medium(batchSize: 1, numSteps: 10); break; case ConfigType.Large: cfg = Config.Large(batchSize: 20); cfgValid = Config.Large(batchSize: 20); cfgTest = Config.Large(batchSize: 1, numSteps: 1); cfgInteractive = Config.Large(batchSize: 1, numSteps: 10); break; default: throw new ArgumentOutOfRangeException(nameof(cfgType), cfgType, null); } Assert.AreEqual(ptb.WordToIdDict.Count, cfg.VocabSize); Assert.AreEqual(ptb.WordToIdDict.Count, cfgValid.VocabSize); Assert.AreEqual(ptb.WordToIdDict.Count, cfgTest.VocabSize); Assert.AreEqual(ptb.WordToIdDict.Count, cfgInteractive.VocabSize); var model = new Model(ctx, cfg, isTraining: true, usingCuDnn: usingCuDnn); var modelValid = new Model(ctx, cfgValid, isTraining: false, usingCuDnn: usingCuDnn); var modelTest = new Model(ctx, cfgTest, isTraining: false, usingCuDnn: usingCuDnn); var modelInteractive = new Model(ctx, cfgInteractive, isTraining: false, usingCuDnn: usingCuDnn); for (var i = 0; i < cfg.MaxMaxEpoch; ++i) { var lrDecay = Math.Pow(cfg.LrDecay, Math.Max(i - cfg.MaxEpoch, 0.0)); var learningRate = cfg.LearningRate * lrDecay; Console.WriteLine($"Epoch: {i + 1} Learning rate: {learningRate:F3}"); var trainPerplexity = model.RunEpoch(ptb.TrainData, learningRate: learningRate, verbose: true); Console.WriteLine($"Epoch: {i + 1} Train Perplexity: {trainPerplexity:F3}"); if (!Profiling) { modelValid.CopyWeightsFrom(model); var validPerplexity = modelValid.RunEpoch(ptb.ValidData); Console.WriteLine($"Epoch: {i + 1} Valid Perplexity: {validPerplexity:F3}"); } } if (!Profiling) { modelTest.CopyWeightsFrom(model); Console.WriteLine("Testing with test data, this is slow, since batch size is set to small..."); var testPerplexity = modelTest.RunEpoch(ptb.TestData, verbose: true); Console.WriteLine($"Test Perplexity: {testPerplexity:F3}"); } if (!Profiling && isConsole) { var inputs = new int[cfgInteractive.NumSteps, 1]; modelInteractive.CopyWeightsFrom(model); // since the entropy and softmax are merged, so we have to allocate the target (label) tensor modelInteractive.Optimizer.AssignTensor(modelInteractive.Targets, inputs.AsTensor()); while (true) { Console.WriteLine(); Console.WriteLine($"Enter some words (less than {cfgInteractive.NumSteps} words)"); var readLine = Console.ReadLine(); if (readLine == null) { break; } var line = readLine.Trim(' ', '\t', '\r', '\n'); var words = line.Split(new[] { ' ', '\t', '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); if (words.Length <= 0 || words.Length > cfgInteractive.NumSteps) { continue; } for (var i = 0; i < cfgInteractive.NumSteps; ++i) { if (i < words.Length) { inputs[i, 0] = ptb.WordToId(words[i]); } else { inputs[i, 0] = ptb.WordToId("<unk>"); } } Console.WriteLine("Your inputs are:"); for (var i = 0; i < cfgInteractive.NumSteps; ++i) { Console.Write($"{ptb.IdToWord(inputs[i, 0])} "); } Console.WriteLine(); modelInteractive.ResetStates(); modelInteractive.Optimizer.AssignTensor(modelInteractive.Inputs, inputs.AsTensor()); modelInteractive.Optimizer.Forward(); var logPred = modelInteractive.Optimizer.GetTensor(modelInteractive.Loss.LogPred).ToArray2D(); var pred = new List <IndexAndProb>(); var totalProb = 0.0; for (var i = 0; i < cfgInteractive.VocabSize; ++i) { var p = new IndexAndProb { Index = i, Prob = Math.Exp(logPred[words.Length - 1, i]) }; pred.Add(p); totalProb += p.Prob; } Console.WriteLine($"Total probability: {totalProb:F4}"); pred.Sort(); Console.WriteLine("Candidates are:"); pred.Take(10).Iter((x, o) => { Console.WriteLine($" {x.Prob:P2} --> {ptb.IdToWord(x.Index)}"); }); } } }
public static void TestLstmAgainstReferenceResults() { var mfr = new MatFileReader(@"lstm_small.mat"); var inputSize = mfr.GetInt("InputSize"); var seqLength = mfr.GetInt("SeqLength"); var hiddenSize = mfr.GetInt("HiddenSize"); var batchSize = mfr.GetInt("BatchSize"); var x = Variable <float>(PartialShape.Create(seqLength, batchSize, inputSize)); var lstm = new Lstm <float>(x, hiddenSize); var ctx = Context.GpuContext(0); var exe = new Executor(ctx, lstm.Y); exe.Initalize(); var h0 = mfr.GetDoubleArray("h0").Select(n => (float)n).ToArray(); var c0 = mfr.GetDoubleArray("c0").Select(n => (float)n).ToArray(); exe.AssignTensor(lstm.CX, c0.AsTensor(Shape.Create(batchSize, hiddenSize))); exe.AssignTensor(lstm.HX, h0.AsTensor(Shape.Create(batchSize, hiddenSize))); var input = mfr.GetDoubleArray("X").Select(n => (float)n).ToArray(); exe.AssignTensor(x, input.AsTensor(Shape.Create(seqLength, batchSize, inputSize))); var w = mfr.GetDoubleArray("W").Select(n => (float)n).ToArray(); w.AsTensor(Shape.Create(inputSize + hiddenSize + 1, 4 * hiddenSize)).Print(); exe.AssignTensor(lstm.W, w.AsTensor(Shape.Create(inputSize + hiddenSize + 1, 4 * hiddenSize))); exe.Forward(); var H = mfr.GetDoubleArray("H").Select(n => (float)n).ToArray(); H.AsTensor(Shape.Create(seqLength * batchSize, hiddenSize)).Print(); var myH = exe.GetTensor(lstm.Y).ToArray(); myH.AsTensor(Shape.Create(seqLength * batchSize, hiddenSize)).Print(); AreClose(H, myH, 1e-6); var CN = mfr.GetDoubleArray("cn").Select(n => (float)n).ToArray(); CN.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); var myCN = exe.GetTensor(lstm.CY).ToArray(); myCN.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); AreClose(CN, myCN, 1e-6); var HN = mfr.GetDoubleArray("hn").Select(n => (float)n).ToArray(); HN.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); var myHN = exe.GetTensor(lstm.HY).ToArray(); myHN.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); AreClose(HN, myHN, 1e-6); var dH = mfr.GetDoubleArray("dH").Select(n => (float)n).ToArray(); exe.AssignGradient(lstm.Y, dH.AsTensor(Shape.Create(seqLength, batchSize, hiddenSize)), replace: true); exe.Backward(); var dX = mfr.GetDoubleArray("dX").Select(n => (float)n).ToArray(); dX.AsTensor(Shape.Create(seqLength * batchSize, inputSize)).Print(); var dXmy = exe.GetGradient(lstm.X).ToArray(); dXmy.AsTensor(Shape.Create(seqLength * batchSize, inputSize)).Print(); AreClose(dX, dXmy, 1e-6); var dW = mfr.GetDoubleArray("dW").Select(n => (float)n).ToArray(); dW.AsTensor(Shape.Create(inputSize + hiddenSize + 1, 4 * hiddenSize)).Print(); var dWmy = exe.GetGradient(lstm.W).ToArray(); dWmy.AsTensor(Shape.Create(lstm.W.Shape.AsArray)).Print(); AreClose(dW, dWmy, 1e-6); var dc0 = mfr.GetDoubleArray("dc0").Select(n => (float)n).ToArray(); dc0.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); var dc0my = exe.GetGradient(lstm.CX).ToArray(); dc0my.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); AreClose(dc0, dc0my, 1e-6); var dh0 = mfr.GetDoubleArray("dh0").Select(n => (float)n).ToArray(); dh0.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); var dh0my = exe.GetGradient(lstm.HX).ToArray(); dh0my.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); AreClose(dh0, dh0my, 1e-6); ctx.ToGpuContext().Stream.Synchronize(); }
public static void TestLstmAgainstCuDnnVersion() { var ctx = Context.GpuContext(0); var inputSize = 5; var seqLength = 3; var batchSize = 2; var hiddenSize = 4; var error = 1e-5; var data = Context.CpuContext.Eval((2.0f.AsScalar() * RandomUniform <float>(Shape.Create(seqLength, batchSize, inputSize)) - 1.0f.AsScalar())).ToArray3D(); //data.AsTensor(Shape.Create(seqLength*batchSize, inputSize)).Print(); var h0 = Context.CpuContext.Eval(RandomNormal <float>(Shape.Create(batchSize, hiddenSize))).ToArray2D(); var c0 = Context.CpuContext.Eval(RandomNormal <float>(Shape.Create(batchSize, hiddenSize))).ToArray2D(); var dy = Context.CpuContext.Eval((2.0f.AsScalar() * RandomUniform <float>(Shape.Create(seqLength, batchSize, hiddenSize)) - 1.0f.AsScalar())).ToArray3D(); //dy.AsTensor(Shape.Create(seqLength * batchSize, hiddenSize)).Print(); var wi = 0.5f; var wf = 0.4f; var wo = 0.3f; var wa = 0.2f; var ui = 0.5f; var uf = 0.4f; var uo = 0.3f; var ua = 0.1f; var bi = 0.5f; var bf = 0.4f; var bo = 0.3f; var ba = 0.2f; float[,,] y1, y2, dx1, dx2; float[,] cy1, cy2, hy1, hy2; float[,] dcx1, dcx2, dhx1, dhx2; float[,] dw1, dw2; { // calc with cuDNN var x = Variable <float>(PartialShape.Create(seqLength, batchSize, inputSize)); var lstm = new Rnn <float>(new LstmRnnType(), x, 1, hiddenSize, dropout: 0.0); var exe = new Executor(ctx, lstm.Y); exe.Initalize(); // set input exe.AssignTensor(lstm.X, data.AsTensor()); // set states exe.AssignTensor(lstm.CX, c0.AsTensor(Shape.Create(1, batchSize, hiddenSize))); exe.AssignTensor(lstm.HX, h0.AsTensor(Shape.Create(1, batchSize, hiddenSize))); // set weigths // cuDNN matrices order: IFAO var w = exe.GetTensor(lstm.W).Reshape(inputSize * 4 + hiddenSize * 4 + 2 * 4, hiddenSize); var offset = 0; // Wi ctx.Assign(w.Slice(Range(offset, offset + inputSize)), Fill(Shape.Create(inputSize, hiddenSize), wi)); offset += inputSize; // Wf ctx.Assign(w.Slice(Range(offset, offset + inputSize)), Fill(Shape.Create(inputSize, hiddenSize), wf)); offset += inputSize; // Wa ctx.Assign(w.Slice(Range(offset, offset + inputSize)), Fill(Shape.Create(inputSize, hiddenSize), wa)); offset += inputSize; // Wo ctx.Assign(w.Slice(Range(offset, offset + inputSize)), Fill(Shape.Create(inputSize, hiddenSize), wo)); offset += inputSize; // Ui ctx.Assign(w.Slice(Range(offset, offset + hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), ui)); offset += hiddenSize; // Uf ctx.Assign(w.Slice(Range(offset, offset + hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), uf)); offset += hiddenSize; // Ua ctx.Assign(w.Slice(Range(offset, offset + hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), ua)); offset += hiddenSize; // Uo ctx.Assign(w.Slice(Range(offset, offset + hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), uo)); offset += hiddenSize; // Bi ctx.Assign(w.Slice(offset), Fill(Shape.Create(1, hiddenSize), bi)); offset++; // Bf ctx.Assign(w.Slice(offset), Fill(Shape.Create(1, hiddenSize), bf)); offset++; // Ba ctx.Assign(w.Slice(offset), Fill(Shape.Create(1, hiddenSize), ba)); offset++; // Bo ctx.Assign(w.Slice(offset), Fill(Shape.Create(1, hiddenSize), bo)); exe.Forward(); y1 = exe.GetTensor(lstm.Y).ToArray3D(); cy1 = exe.GetTensor(lstm.CY).Reshape(batchSize, hiddenSize).ToArray2D(); hy1 = exe.GetTensor(lstm.HY).Reshape(batchSize, hiddenSize).ToArray2D(); exe.AssignGradient(lstm.Y, dy.AsTensor(), replace: true); exe.Backward(); dx1 = exe.GetGradient(lstm.X).ToArray3D(); dcx1 = exe.GetGradient(lstm.CX).Reshape(batchSize, hiddenSize).ToArray2D(); dhx1 = exe.GetGradient(lstm.HX).Reshape(batchSize, hiddenSize).ToArray2D(); // we make dw follow the shape as (1 + inputSize + hiddenSize, 4*hiddenSize), need to transpose because cuDNN uses Fortran storge order var dwCUDNN = exe.GetGradient(lstm.W).ToArray().AsTensor(); dw1 = new float[1 + inputSize + hiddenSize, 4 * hiddenSize]; var dw1Tensor = Reference <float>(dw1); var cpu = Context.CpuContext; offset = 0; // cuDNN order: IFAO, need to transpose because cuDNN uses Fortran storge order // Wi cpu.Assign(dw1Tensor.Slice(Range(1, inputSize + 1), Range(0, hiddenSize)), dwCUDNN.Slice(Range(offset, offset + inputSize * hiddenSize)).Reshape(hiddenSize, inputSize).T); offset += inputSize * hiddenSize; // Wf cpu.Assign(dw1Tensor.Slice(Range(1, inputSize + 1), Range(hiddenSize, 2 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + inputSize * hiddenSize)).Reshape(hiddenSize, inputSize).T); offset += inputSize * hiddenSize; // Wa cpu.Assign(dw1Tensor.Slice(Range(1, inputSize + 1), Range(3 * hiddenSize, 4 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + inputSize * hiddenSize)).Reshape(hiddenSize, inputSize).T); offset += inputSize * hiddenSize; // Wo cpu.Assign(dw1Tensor.Slice(Range(1, inputSize + 1), Range(2 * hiddenSize, 3 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + inputSize * hiddenSize)).Reshape(hiddenSize, inputSize).T); offset += inputSize * hiddenSize; // Ui cpu.Assign(dw1Tensor.Slice(Range(inputSize + 1, -1), Range(0, hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize * hiddenSize)).Reshape(hiddenSize, hiddenSize).T); offset += hiddenSize * hiddenSize; // Uf cpu.Assign(dw1Tensor.Slice(Range(inputSize + 1, -1), Range(hiddenSize, 2 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize * hiddenSize)).Reshape(hiddenSize, hiddenSize).T); offset += hiddenSize * hiddenSize; // Ua cpu.Assign(dw1Tensor.Slice(Range(inputSize + 1, -1), Range(3 * hiddenSize, 4 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize * hiddenSize)).Reshape(hiddenSize, hiddenSize).T); offset += hiddenSize * hiddenSize; // Uo cpu.Assign(dw1Tensor.Slice(Range(inputSize + 1, -1), Range(2 * hiddenSize, 3 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize * hiddenSize)).Reshape(hiddenSize, hiddenSize).T); offset += hiddenSize * hiddenSize; // Bi cpu.Assign(dw1Tensor.Slice(0, Range(0, hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize)).Reshape(hiddenSize, 1).T); offset += hiddenSize; // Bf cpu.Assign(dw1Tensor.Slice(0, Range(hiddenSize, 2 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize)).Reshape(hiddenSize, 1).T); offset += hiddenSize; // Ba cpu.Assign(dw1Tensor.Slice(0, Range(3 * hiddenSize, 4 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize)).Reshape(hiddenSize, 1).T); offset += hiddenSize; // Bo cpu.Assign(dw1Tensor.Slice(0, Range(2 * hiddenSize, 3 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize)).Reshape(hiddenSize, 1).T); } { // calc with direct LSTM implementation var x = Variable <float>(PartialShape.Create(seqLength, batchSize, inputSize)); var lstm = new Lstm <float>(x, hiddenSize, forgetBiasInit: 0.0); var exe = new Executor(ctx, lstm.Y); exe.Initalize(); // set input exe.AssignTensor(lstm.X, data.AsTensor()); // set states exe.AssignTensor(lstm.CX, c0.AsTensor()); exe.AssignTensor(lstm.HX, h0.AsTensor()); // set weights var w = exe.GetTensor(lstm.W); // Wi ctx.Assign(w.Slice(Range(1, inputSize + 1), Range(0, hiddenSize)), Fill(Shape.Create(inputSize, hiddenSize), wi)); // Wf ctx.Assign(w.Slice(Range(1, inputSize + 1), Range(hiddenSize, 2 * hiddenSize)), Fill(Shape.Create(inputSize, hiddenSize), wf)); // Wo ctx.Assign(w.Slice(Range(1, inputSize + 1), Range(2 * hiddenSize, 3 * hiddenSize)), Fill(Shape.Create(inputSize, hiddenSize), wo)); // Wa ctx.Assign(w.Slice(Range(1, inputSize + 1), Range(3 * hiddenSize, 4 * hiddenSize)), Fill(Shape.Create(inputSize, hiddenSize), wa)); // Ui ctx.Assign(w.Slice(Range(inputSize + 1, -1), Range(0, hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), ui)); // Uf ctx.Assign(w.Slice(Range(inputSize + 1, -1), Range(hiddenSize, 2 * hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), uf)); // Uo ctx.Assign(w.Slice(Range(inputSize + 1, -1), Range(2 * hiddenSize, 3 * hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), uo)); // Ua ctx.Assign(w.Slice(Range(inputSize + 1, -1), Range(3 * hiddenSize, 4 * hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), ua)); // Bi ctx.Assign(w.Slice(0, Range(0, hiddenSize)), Fill(Shape.Create(1, hiddenSize), bi)); // Bf ctx.Assign(w.Slice(0, Range(hiddenSize, 2 * hiddenSize)), Fill(Shape.Create(1, hiddenSize), bf)); // Bo ctx.Assign(w.Slice(0, Range(2 * hiddenSize, 3 * hiddenSize)), Fill(Shape.Create(1, hiddenSize), bo)); // Ba ctx.Assign(w.Slice(0, Range(3 * hiddenSize, 4 * hiddenSize)), Fill(Shape.Create(1, hiddenSize), ba)); exe.Forward(); y2 = exe.GetTensor(lstm.Y).ToArray3D(); cy2 = exe.GetTensor(lstm.CY).ToArray2D(); hy2 = exe.GetTensor(lstm.HY).ToArray2D(); exe.AssignGradient(lstm.Y, dy.AsTensor(), replace: true); exe.Backward(); dx2 = exe.GetGradient(lstm.X).ToArray3D(); dcx2 = exe.GetGradient(lstm.CX).Reshape(batchSize, hiddenSize).ToArray2D(); dhx2 = exe.GetGradient(lstm.HX).Reshape(batchSize, hiddenSize).ToArray2D(); dw2 = exe.GetGradient(lstm.W).ToArray2D(); } AreClose(y1, y2, error); AreClose(cy1, cy2, error); AreClose(hy1, hy2, error); AreClose(dx1, dx2, error); AreClose(dcx1, dcx2, error); AreClose(dhx1, dhx2, error); AreClose(dw1, dw2, error); }
public static void RnnAgainstRnnDynamic() { var ctx = Context.GpuContext(0); var inputSize = 5; var seqLength = 3; var batchSize = 2; var hiddenSize = 4; var error = 1e-5; var data = Context.CpuContext.Eval(RandomUniform <float>(-1, 1, Shape.Create(seqLength, batchSize, inputSize))).ToArray3D(); data.AsTensor(Shape.Create(seqLength * batchSize, inputSize)).Print(); var h0 = Context.CpuContext.Eval(RandomNormal <float>(Shape.Create(batchSize, hiddenSize))).ToArray2D(); var c0 = Context.CpuContext.Eval(RandomNormal <float>(Shape.Create(batchSize, hiddenSize))).ToArray2D(); var dy = Context.CpuContext.Eval(RandomUniform <float>(-1, 1, Shape.Create(seqLength, batchSize, hiddenSize))).ToArray3D(); float[,,] y1, y2, dx1, dx2; float[,] cy1, cy2, hy1, hy2; float[,] dcx1, dcx2, dhx1, dhx2; float[] dw1, dw2; { var x = Variable <float>(PartialShape.Create(seqLength, batchSize, inputSize)); var lstm = new Rnn <float>(new LstmRnnType(), x, 1, hiddenSize, dropout: 0.0); var exe = new Executor(ctx, lstm.Y); exe.Initalize(); // set input exe.AssignTensor(lstm.X, data.AsTensor()); // set states exe.AssignTensor(lstm.CX, c0.AsTensor(Shape.Create(1, batchSize, hiddenSize))); exe.AssignTensor(lstm.HX, h0.AsTensor(Shape.Create(1, batchSize, hiddenSize))); // set weigths, cuDNN matrices order: IFAO var w = exe.GetTensor(lstm.W).Reshape(inputSize * 4 + hiddenSize * 4 + 2 * 4, hiddenSize); SetWeights(ctx, w, inputSize, hiddenSize); exe.Forward(); y1 = exe.GetTensor(lstm.Y).ToArray3D(); cy1 = exe.GetTensor(lstm.CY).Reshape(batchSize, hiddenSize).ToArray2D(); hy1 = exe.GetTensor(lstm.HY).Reshape(batchSize, hiddenSize).ToArray2D(); exe.AssignGradient(lstm.Y, dy.AsTensor(), replace: true); exe.Backward(); dx1 = exe.GetGradient(lstm.X).ToArray3D(); dcx1 = exe.GetGradient(lstm.CX).Reshape(batchSize, hiddenSize).ToArray2D(); dhx1 = exe.GetGradient(lstm.HX).Reshape(batchSize, hiddenSize).ToArray2D(); dw1 = exe.GetGradient(lstm.W).ToArray(); // cuDNN weight is 1D linear blob } { var x = Variable <float>(PartialShape.Create(-1, -1, inputSize)); var lstm = new RnnDynamic <float>(new LstmRnnType(), x, 1, hiddenSize, dropout: 0.0); var exe = new Executor(ctx, lstm.Y); exe.Initalize(); // set input exe.AssignTensor(lstm.X, data.AsTensor()); // set states exe.AssignTensor(lstm.CX, c0.AsTensor(Shape.Create(1, batchSize, hiddenSize))); exe.AssignTensor(lstm.HX, h0.AsTensor(Shape.Create(1, batchSize, hiddenSize))); // set weigths, cuDNN matrices order: IFAO var w = exe.GetTensor(lstm.W).Reshape(inputSize * 4 + hiddenSize * 4 + 2 * 4, hiddenSize); SetWeights(ctx, w, inputSize, hiddenSize); exe.Forward(); y2 = exe.GetTensor(lstm.Y).ToArray3D(); cy2 = exe.GetTensor(lstm.CY).Reshape(batchSize, hiddenSize).ToArray2D(); hy2 = exe.GetTensor(lstm.HY).Reshape(batchSize, hiddenSize).ToArray2D(); exe.AssignGradient(lstm.Y, dy.AsTensor(), replace: true); exe.Backward(); dx2 = exe.GetGradient(lstm.X).ToArray3D(); dcx2 = exe.GetGradient(lstm.CX).Reshape(batchSize, hiddenSize).ToArray2D(); dhx2 = exe.GetGradient(lstm.HX).Reshape(batchSize, hiddenSize).ToArray2D(); dw2 = exe.GetGradient(lstm.W).ToArray(); } AreClose(y1, y2, error); AreClose(cy1, cy2, error); AreClose(hy1, hy2, error); AreClose(dx1, dx2, error); AreClose(dcx1, dcx2, error); AreClose(dhx1, dhx2, error); AreClose(dw1, dw2, error); }
public static void PiEstimationGpu() { EstimatePi(Context.GpuContext(0), 100, 10000000, 1e-3); }
public static void LoopStyle() { var inputVar = Variable <double>(); var statesVar = Variable <double>(); var weightVar = Variable <double>(); var loop = new LoopDemo(inputVar, statesVar, weightVar); var outputVar = loop.Output; // create executor var ctx = Context.GpuContext(0); var exe = new Executor(ctx, outputVar) { AssignAllGradient = true }; exe.Initalize(); // fake forward data const int steps = 4; const int n = 5; var input = new double[n, n]; var states = new double[steps, n, n]; var weight = new double[n, n]; var rng = new Random(42); UniformRandomArray(input, rng); UniformRandomArray(states, rng); UniformRandomArray(weight, rng); exe.AssignTensor(inputVar, input.AsTensor()); exe.AssignTensor(statesVar, states.AsTensor()); exe.AssignTensor(weightVar, weight.AsTensor()); // run forward exe.Forward(); var outputTensor = exe.GetTensor(outputVar); outputTensor.Print(); // fake backward data var dOutput = new double[n, n]; UniformRandomArray(dOutput, rng); exe.AssignGradient(outputVar, dOutput.AsTensor(), replace: true); // run backward exe.Backward(); // verify gradients var bump = 1e-7; var dInputTensor = exe.GetGradient(inputVar); var dInputTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, inputVar, bump: bump); //dInputTensor.Print(); //dInputTensor_FD.Print(); AreClose(dInputTensor_FD.ToArray2D(), dInputTensor.ToArray2D(), 1e-7); var dStatesTensor = exe.GetGradient(statesVar); var dStatesTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, statesVar, bump: bump); //dStatesTensor.Reshape(steps, -1).Print(); //dStatesTensor_FD.Reshape(steps, -1).Print(); AreClose(dStatesTensor_FD.ToArray3D(), dStatesTensor.ToArray3D(), 1e-7); var dWeightTensor = exe.GetGradient(weightVar); var dWeightTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, weightVar, bump: bump); //dWeightTensor.Print(); //dWeightTensor_FD.Print(); AreClose(dWeightTensor_FD.ToArray2D(), dWeightTensor.ToArray2D(), 1e-3); }
public static void UnrollingStyle() { // create unrolling graph const int steps = 4; var inputVar = Variable <double>(); var stateVars = Enumerable.Range(0, steps).Select(_ => Variable <double>()).ToArray(); var weightVar = Variable <double>(); var outputVar = CreateUnrollingGraph(inputVar, stateVars, weightVar); // create executor var ctx = Context.GpuContext(0); var exe = new Executor(ctx, outputVar) { AssignAllGradient = true }; exe.Initalize(); // fake forward data const int n = 5; var input = new double[n, n]; var states = Enumerable.Range(0, steps).Select(_ => new double[n, n]).ToArray(); var weight = new double[n, n]; var rng = new Random(42); UniformRandomArray(input, rng); foreach (var state in states) { UniformRandomArray(state, rng); } UniformRandomArray(weight, rng); exe.AssignTensor(inputVar, input.AsTensor()); for (var i = 0; i < steps; ++i) { exe.AssignTensor(stateVars[i], states[i].AsTensor()); } exe.AssignTensor(weightVar, weight.AsTensor()); // run forward exe.Forward(); var outputTensor = exe.GetTensor(outputVar); outputTensor.Print(); // fake backward data var dOutput = new double[n, n]; UniformRandomArray(dOutput, rng); exe.AssignGradient(outputVar, dOutput.AsTensor(), replace: true); // run backward exe.Backward(); // verify gradients var bump = 1e-7; var dInputTensor = exe.GetGradient(inputVar); var dInputTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, inputVar, bump: bump); //dInputTensor.Print(); //dInputTensor_FD.Print(); AreClose(dInputTensor_FD.ToArray2D(), dInputTensor.ToArray2D(), 1e-7); for (var i = 0; i < steps; ++i) { var stateVar = stateVars[i]; var dStateTensor = exe.GetGradient(stateVar); var dStateTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, stateVar, bump: bump); //dStateTensor.Print(); //dStateTensor_FD.Print(); AreClose(dStateTensor_FD.ToArray2D(), dStateTensor.ToArray2D(), 1e-7); } var dWeightTensor = exe.GetGradient(weightVar); var dWeightTensor_FD = GradientChecker.FiniteDifferenceGradient(exe, weightVar, bump: bump); //dWeightTensor.Print(); //dWeightTensor_FD.Print(); AreClose(dWeightTensor_FD.ToArray2D(), dWeightTensor.ToArray2D(), 1e-3); }
public static void TestAttention() { //var batch = 4; //var encoderHiddenSize = 5; //var decoderHiddenSize = 4; //var attentionDim = 3; var batch = 10; var encoderHiddenSize = 20; var decoderHiddenSize = 25; var attentionDim = 30; // (encoderSeqLength, batch, encoderHiddenSize) var encoderHiddenStates = Variable <double>(PartialShape.Create(-1, batch, encoderHiddenSize)); var decoderHiddenStates = Variable <double>(PartialShape.Create(batch, decoderHiddenSize)); var attention = new Attention <double>(encoderHiddenStates, decoderHiddenStates, attentionDim); var ctx = Context.GpuContext(0); var exe = new Executor(ctx, attention.Output) { AssignAllGradient = true }; exe.Initalize(); // encoderSeqLength is flexibly at runtime var encoderSeqLength = 3; var dataEncoderHiddenStates = new double[encoderSeqLength, batch, encoderHiddenSize]; UniformRandomArray(dataEncoderHiddenStates); var dataDecoderHiddenStates = new double[batch, decoderHiddenSize]; UniformRandomArray(dataDecoderHiddenStates); exe.AssignTensor(encoderHiddenStates, dataEncoderHiddenStates.AsTensor()); exe.AssignTensor(decoderHiddenStates, dataDecoderHiddenStates.AsTensor()); exe.Forward(); var tensorOutput = exe.GetTensor(attention.Output); //Console.WriteLine(tensorOutput.Shape); //tensorOutput.Print(); var dataDOutput = new double[batch, encoderHiddenSize]; UniformRandomArray(dataDOutput); exe.AssignGradient(attention.Output, dataDOutput.AsTensor(), replace: true); exe.Backward(); var tensorDWh = exe.GetGradient(attention.Wh); //tensorDWh.Print(); var tensorDWd = exe.GetGradient(attention.Wd); //tensorDWd.Print(); var tensorDH = exe.GetGradient(attention.EncoderHiddenStates); //Console.WriteLine(tensorDH.Shape); //tensorDH.Reshape(-1, encoderHiddenSize).Print(); var tensorDD = exe.GetGradient(attention.DecoderHiddenStates); //Console.WriteLine(tensorDD.Shape); //tensorDD.Print(); var bump = 1e-7; var tensorDWh_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.Wh, bump: bump); //tensorDWh.Print(); //tensorDWh_fd.Print(); AreClose(tensorDWh.ToArray2D(), tensorDWh_fd.ToArray2D(), 1e-7); var tensorDWd_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.Wd, bump: bump); //tensorDWd.Print(); //tensorDWd_fd.Print(); AreClose(tensorDWd.ToArray2D(), tensorDWd_fd.ToArray2D(), 1e-7); var tensorDH_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.EncoderHiddenStates, bump: bump); //tensorDH.Reshape(-1, encoderHiddenSize).Print(); //tensorDH_fd.Reshape(-1, encoderHiddenSize).Print(); AreClose(tensorDH.ToArray3D(), tensorDH_fd.ToArray3D(), 1e-7); var tensorDD_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.DecoderHiddenStates, bump: bump); //tensorDD.Print(); //tensorDD_fd.Print(); AreClose(tensorDD.ToArray2D(), tensorDD_fd.ToArray2D(), 1e-7); }