public override void Backward(Executor executor) { var a = executor.GetTensor(A); var b = executor.GetTensor(B); var dC = executor.GetGradient(C); executor.AssignGradient(A, Dot(dC, b.T)); executor.AssignGradient(B, Dot(a.T, dC)); }
public override void Backward(Executor executor) { var data = executor.GetTensor(Data); var weights = executor.GetTensor(Weights); var dOutput = executor.GetGradient(Output); executor.AssignGradient(Data, Dot(dOutput, weights.T).Reshape(data.Shape.AsArray)); executor.AssignGradient(Weights, Dot(data.Reshape(data.Shape[0], -1).T, dOutput)); executor.AssignGradient(Bias, ReduceSum(dOutput, 0)); }
public override void Backward(Executor executor) { var ctx = executor.Context; var indices = executor.GetTensor(Indices); var gradout = executor.GetGradient(Output); // for performance fix. if (ctx.Type == ContextType.Gpu && gradout.Layout.IsInnerChangeMostFullyPacked && indices.Layout.IsInnerChangeMostFullyPacked) { var embedDim = EmbedDim; var batchSize = (int)indices.Shape.Length; var threadSize = 256; // first set all to 0 executor.AssignGradient(Weights, Fill(executor.GetTensor(Weights).Shape, ScalarOps.Conv <T>(0.0))); var dW = executor.GetGradient(Weights); // then use a 1 block kernel to update it, cause usually the batch size is not huge, but the embedsize is huge! var stream = ctx.ToGpuContext().Stream; var iPtr = indices.Buffer.Ptr; // the following kernel is for 1 block, so there is no need for synchornization, // there could be further optimized. if (typeof(T) == typeof(float)) { var dOPtr = gradout.Buffer.Ptr.Reinterpret <float>(); var dWPtr = dW.Buffer.Ptr.Reinterpret <float>(); var lp = new LaunchParam(1, threadSize); //Console.WriteLine($"{indices.Shape} {gradout.Shape} {dW.Shape}"); stream.Launch(() => { for (var i = 0; i < batchSize; ++i) { var row = iPtr[i]; for (var k = threadIdx.x; k < embedDim; k += blockDim.x) { dWPtr[row * embedDim + k] += dOPtr[i * embedDim + k]; } } }, lp); return; } throw new NotImplementedException(); } else { executor.AssignGradient(Weights, TakeGrad(indices, gradout, EmbedSize)); } }
public override void Backward(Executor executor) { var dOutput = executor.GetGradient(Output); var mask = executor.GetTensor(Mask); executor.AssignGradient(Input, Dropout(dOutput, mask, Threshold, Scale)); }
public override void Backward(Executor executor) { var p = executor.GetTensor(Pred); var y = executor.GetTensor(Label); executor.AssignGradient(Input, p - y); }
public override void Backward(Executor executor) { var pred = executor.GetTensor(Pred); var label = executor.GetTensor(Label); executor.AssignGradient(Pred, 2.0.AsScalar <T>() * (pred - label)); }
public override void Backward(Executor executor) { var output = executor.GetTensor(Output); var dOutput = executor.GetGradient(Output); executor.AssignGradient(Input, BackwardExpr(output) * dOutput); }
public static void Gradient_WeightedSumReduce_02_GPU() { var rng = new Random(42); var x = Variable <double>(); var w = Variable <double>(); var wsr = new WeightedSumReduce <double>(w.Reshape(-1, 1), x); var y = wsr.Output; var ctx = gpu; var exe = new Executor(ctx, y) { AssignAllGradient = true }; var n = 5; var d = 3; var hx = new double[n, d]; var hw = new double[n]; UniformRandomArray(hx, rng); UniformRandomArray(hw, rng); var hy = new double[d]; for (var i = 0; i < d; ++i) { var acc = 0.0; for (var j = 0; j < n; ++j) { acc += hw[j] * hx[j, i]; } hy[i] = acc; } exe.AssignTensor(x, hx.AsTensor()); exe.AssignTensor(w, hw.AsTensor()); exe.Forward(); var ty = exe.GetTensor(y); ty.Print(); AreClose(hy, ty.ToArray(), 1e-10); var hdy = new double[d]; UniformRandomArray(hdy, rng); exe.AssignGradient(y, hdy.AsTensor(), replace: true); exe.Backward(); //var tdx = exe.GetGradient(x); //var tdw = exe.GetGradient(w); //tdx.Print(); //tdw.Print(); //var bump = 1e-8; //var hdx = GradientChecker.FiniteDifferenceGradient(exe, x, bump: bump); ////var hdw = GradientChecker.FiniteDifferenceGradient(exe, w, bump: bump); //hdx.Print(); ////hdw.Print(); //AreClose(hdx.ToArray2D(), tdx.ToArray2D(), 1e-7); ////AreClose(hdw.ToArray2D(), tdw.ToArray2D(), 1e-7); }
public override void Backward(Executor executor) { var a = executor.GetTensor(A); var b = executor.GetTensor(B); var dC = executor.GetGradient(C); var dA = a.Shape.Rank < dC.Shape.Rank ? ReduceSum(dC, Enumerable.Range(0, dC.Shape.Rank - a.Shape.Rank).ToArray()) : dC; var dB = b.Shape.Rank < dC.Shape.Rank ? ReduceSum(dC, Enumerable.Range(0, dC.Shape.Rank - b.Shape.Rank).ToArray()) : dC; executor.AssignGradient(A, dA); executor.AssignGradient(B, dB); }
public static void Gradient_Dot_GPU() { var rng = new Random(); var m = 10; var k = 5; var n = 3; var x = Variable <double>(); var y = Variable <double>(); var z = Dot(x, y); var ctx = gpu; var exe = new Executor(ctx, z) { AssignAllGradient = true }; //var l = 10; var hx = new double[m, k]; var hy = new double[k, n]; UniformRandomArray(hx, rng); UniformRandomArray(hy, rng); var hz = Dot(hx, hy); //for (var i = 0; i < l; ++i) hz[i] = hx[i] + hy[i]; //hx.AsTensor().Print(); //hy.AsTensor().Print(); exe.AssignTensor(x, hx.AsTensor()); exe.AssignTensor(y, hy.AsTensor()); exe.Forward(); var tz = exe.GetTensor(z); //tz.Print(); AreClose(hz, tz.ToArray2D(), 1e-10); var hdz = new double[m, n]; UniformRandomArray(hdz, rng); //hdz.AsTensor().Print(); exe.AssignGradient(z, hdz.AsTensor(), replace: true); exe.Backward(); var tdx = exe.GetGradient(x); var tdy = exe.GetGradient(y); tdx.Print(); tdy.Print(); var bump = 1e-6; var hdx = GradientChecker.FiniteDifferenceGradient(exe, x, bump: bump); var hdy = GradientChecker.FiniteDifferenceGradient(exe, y, bump: bump); hdx.Print(); hdy.Print(); AreClose(tdx.ToArray(), hdx.ToArray(), 1e-6); AreClose(tdy.ToArray(), hdy.ToArray(), 1e-6); }
public void Backward(Executor executor) { var context = executor.Context.ToGpuContext(); var dnn = context.Dnn; var rnnDesc = RnnDesc; var filterDesc = WDesc; Util.EnsureTrue(IsTraining); dnn.RNNBackwardData( rnnDesc, 1, YDesc, Output.Buffer.Ptr, YDesc, DOutput.Buffer.Ptr, StateDesc, DHY.Buffer.Ptr, StateDesc, DCY.Buffer.Ptr, filterDesc, executor.GetTensor(W).Buffer.Ptr, StateDesc, HX.Buffer.Ptr, StateDesc, CX.Buffer.Ptr, XDesc, DInput.Buffer.Ptr, StateDesc, DHX.Buffer.Ptr, StateDesc, DCX.Buffer.Ptr, Workspace.Buffer.Ptr, (IntPtr)Workspace.Shape.Length, ReserveSpace.Buffer.Ptr, (IntPtr)ReserveSpace.Shape.Length); if (executor.IncreaseGradientAggregationCounter(W) == 0) { executor.AssignGradient(W, ScalarOps.Conv <T>(0.0).AsScalar(), replace: true); } dnn.RNNBackwardWeights( rnnDesc, 1, XDesc, Input.Buffer.Ptr, StateDesc, HX.Buffer.Ptr, YDesc, Output.Buffer.Ptr, Workspace.Buffer.Ptr, (IntPtr)Workspace.Shape.Length, WDesc, executor.GetGradient(W).Buffer.Ptr, ReserveSpace.Buffer.Ptr, (IntPtr)ReserveSpace.Shape.Length); }
public static void Gradient_Add_VectorMatrix_GPU() { var rng = new Random(); var x = Variable <double>(); var y = Variable <double>(); var z = x + y; var ctx = gpu; var exe = new Executor(ctx, z) { AssignAllGradient = true }; var l = 10; var hx = rng.NextDouble(); var hy = new double[l]; var hz = new double[l]; UniformRandomArray(hy, rng); for (var i = 0; i < l; ++i) { hz[i] = hx + hy[i]; } //hx.AsTensor().Print(); //hy.AsTensor().Print(); exe.AssignTensor(x, (new[] { hx }).AsTensor()); exe.AssignTensor(y, hy.AsTensor()); exe.Forward(); var tz = exe.GetTensor(z); //tz.Print(); AreClose(hz, tz.ToArray(), 1e-10); var hdz = new double[l]; UniformRandomArray(hdz, rng); //hdz.AsTensor().Print(); exe.AssignGradient(z, hdz.AsTensor()); exe.Backward(); var tdx = exe.GetGradient(x); var tdy = exe.GetGradient(y); tdx.Print(); tdy.Print(); //var bump = 1e-6; //var hdx = GradientChecker.FiniteDifferenceGradient(exe, x, bump: bump); //var hdy = GradientChecker.FiniteDifferenceGradient(exe, y, bump: bump); //hdx.Print(); //hdy.Print(); //AreClose(tdx.ToArray(), hdx.ToArray(), 1e-6); }
// this is just a workaround here, it can be moved to framework later public static void AssignOrSetGradient <T>(Executor executor, Variable <T> var, Tensor <T> gradient) { if (gradient.Device == executor.Context.Device) { executor.SetGradient(var, gradient); } else { executor.AssignGradient(var, gradient, replace: true); } }
public override void Backward(Executor executor) { var vectors = executor.GetTensor(Vectors); var weights = executor.GetTensor(Weights); var dOutput = executor.GetGradient(Output); Console.WriteLine((vectors * dOutput).Shape); executor.AssignGradient(Vectors, weights * dOutput); throw new Exception("TODO"); //executor.AssignGradient(softmax, vectors*dOutput); }
public override void Backward(Executor executor) { var p = executor.GetTensor(LogPred); var y = executor.GetTensor(Label); Util.EnsureTrue(p.Shape.Rank == 2); var n = (int)p.Shape[0]; var classes = (int)p.Shape[1]; executor.AssignGradient(Input, Exp(p)); var g = executor.GetGradient(Input); var ctx = executor.Context; if (ctx.Type == ContextType.Gpu) { var stream = ctx.ToGpuContext().Stream; if (typeof(T) == typeof(float)) { var gptr = g.Buffer.Ptr.Reinterpret <float>(); var idxptr = y.Buffer.Ptr; DeviceFor.For(stream, 0, n, i => { var idx = idxptr[i]; gptr[i * classes + idx] -= 1.0f; }); return; } else if (typeof(T) == typeof(double)) { var gptr = g.Buffer.Ptr.Reinterpret <double>(); var idxptr = y.Buffer.Ptr; DeviceFor.For(stream, 0, n, i => { var idx = idxptr[i]; gptr[i * classes + idx] -= 1.0; }); return; } else { throw new NotImplementedException(); } } throw new NotImplementedException(); }
public static void RnnAgainstRnnDynamic() { var ctx = Context.GpuContext(0); var inputSize = 5; var seqLength = 3; var batchSize = 2; var hiddenSize = 4; var error = 1e-5; var data = Context.CpuContext.Eval(RandomUniform <float>(-1, 1, Shape.Create(seqLength, batchSize, inputSize))).ToArray3D(); data.AsTensor(Shape.Create(seqLength * batchSize, inputSize)).Print(); var h0 = Context.CpuContext.Eval(RandomNormal <float>(Shape.Create(batchSize, hiddenSize))).ToArray2D(); var c0 = Context.CpuContext.Eval(RandomNormal <float>(Shape.Create(batchSize, hiddenSize))).ToArray2D(); var dy = Context.CpuContext.Eval(RandomUniform <float>(-1, 1, Shape.Create(seqLength, batchSize, hiddenSize))).ToArray3D(); float[,,] y1, y2, dx1, dx2; float[,] cy1, cy2, hy1, hy2; float[,] dcx1, dcx2, dhx1, dhx2; float[] dw1, dw2; { var x = Variable <float>(PartialShape.Create(seqLength, batchSize, inputSize)); var lstm = new Rnn <float>(new LstmRnnType(), x, 1, hiddenSize, dropout: 0.0); var exe = new Executor(ctx, lstm.Y); exe.Initalize(); // set input exe.AssignTensor(lstm.X, data.AsTensor()); // set states exe.AssignTensor(lstm.CX, c0.AsTensor(Shape.Create(1, batchSize, hiddenSize))); exe.AssignTensor(lstm.HX, h0.AsTensor(Shape.Create(1, batchSize, hiddenSize))); // set weigths, cuDNN matrices order: IFAO var w = exe.GetTensor(lstm.W).Reshape(inputSize * 4 + hiddenSize * 4 + 2 * 4, hiddenSize); SetWeights(ctx, w, inputSize, hiddenSize); exe.Forward(); y1 = exe.GetTensor(lstm.Y).ToArray3D(); cy1 = exe.GetTensor(lstm.CY).Reshape(batchSize, hiddenSize).ToArray2D(); hy1 = exe.GetTensor(lstm.HY).Reshape(batchSize, hiddenSize).ToArray2D(); exe.AssignGradient(lstm.Y, dy.AsTensor(), replace: true); exe.Backward(); dx1 = exe.GetGradient(lstm.X).ToArray3D(); dcx1 = exe.GetGradient(lstm.CX).Reshape(batchSize, hiddenSize).ToArray2D(); dhx1 = exe.GetGradient(lstm.HX).Reshape(batchSize, hiddenSize).ToArray2D(); dw1 = exe.GetGradient(lstm.W).ToArray(); // cuDNN weight is 1D linear blob } { var x = Variable <float>(PartialShape.Create(-1, -1, inputSize)); var lstm = new RnnDynamic <float>(new LstmRnnType(), x, 1, hiddenSize, dropout: 0.0); var exe = new Executor(ctx, lstm.Y); exe.Initalize(); // set input exe.AssignTensor(lstm.X, data.AsTensor()); // set states exe.AssignTensor(lstm.CX, c0.AsTensor(Shape.Create(1, batchSize, hiddenSize))); exe.AssignTensor(lstm.HX, h0.AsTensor(Shape.Create(1, batchSize, hiddenSize))); // set weigths, cuDNN matrices order: IFAO var w = exe.GetTensor(lstm.W).Reshape(inputSize * 4 + hiddenSize * 4 + 2 * 4, hiddenSize); SetWeights(ctx, w, inputSize, hiddenSize); exe.Forward(); y2 = exe.GetTensor(lstm.Y).ToArray3D(); cy2 = exe.GetTensor(lstm.CY).Reshape(batchSize, hiddenSize).ToArray2D(); hy2 = exe.GetTensor(lstm.HY).Reshape(batchSize, hiddenSize).ToArray2D(); exe.AssignGradient(lstm.Y, dy.AsTensor(), replace: true); exe.Backward(); dx2 = exe.GetGradient(lstm.X).ToArray3D(); dcx2 = exe.GetGradient(lstm.CX).Reshape(batchSize, hiddenSize).ToArray2D(); dhx2 = exe.GetGradient(lstm.HX).Reshape(batchSize, hiddenSize).ToArray2D(); dw2 = exe.GetGradient(lstm.W).ToArray(); } AreClose(y1, y2, error); AreClose(cy1, cy2, error); AreClose(hy1, hy2, error); AreClose(dx1, dx2, error); AreClose(dcx1, dcx2, error); AreClose(dhx1, dhx2, error); AreClose(dw1, dw2, error); }
public static void TestLstmAgainstReferenceResults() { var mfr = new MatFileReader(@"lstm_small.mat"); var inputSize = mfr.GetInt("InputSize"); var seqLength = mfr.GetInt("SeqLength"); var hiddenSize = mfr.GetInt("HiddenSize"); var batchSize = mfr.GetInt("BatchSize"); var x = Variable <float>(PartialShape.Create(seqLength, batchSize, inputSize)); var lstm = new Lstm <float>(x, hiddenSize); var ctx = Context.GpuContext(0); var exe = new Executor(ctx, lstm.Y); exe.Initalize(); var h0 = mfr.GetDoubleArray("h0").Select(n => (float)n).ToArray(); var c0 = mfr.GetDoubleArray("c0").Select(n => (float)n).ToArray(); exe.AssignTensor(lstm.CX, c0.AsTensor(Shape.Create(batchSize, hiddenSize))); exe.AssignTensor(lstm.HX, h0.AsTensor(Shape.Create(batchSize, hiddenSize))); var input = mfr.GetDoubleArray("X").Select(n => (float)n).ToArray(); exe.AssignTensor(x, input.AsTensor(Shape.Create(seqLength, batchSize, inputSize))); var w = mfr.GetDoubleArray("W").Select(n => (float)n).ToArray(); w.AsTensor(Shape.Create(inputSize + hiddenSize + 1, 4 * hiddenSize)).Print(); exe.AssignTensor(lstm.W, w.AsTensor(Shape.Create(inputSize + hiddenSize + 1, 4 * hiddenSize))); exe.Forward(); var H = mfr.GetDoubleArray("H").Select(n => (float)n).ToArray(); H.AsTensor(Shape.Create(seqLength * batchSize, hiddenSize)).Print(); var myH = exe.GetTensor(lstm.Y).ToArray(); myH.AsTensor(Shape.Create(seqLength * batchSize, hiddenSize)).Print(); AreClose(H, myH, 1e-6); var CN = mfr.GetDoubleArray("cn").Select(n => (float)n).ToArray(); CN.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); var myCN = exe.GetTensor(lstm.CY).ToArray(); myCN.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); AreClose(CN, myCN, 1e-6); var HN = mfr.GetDoubleArray("hn").Select(n => (float)n).ToArray(); HN.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); var myHN = exe.GetTensor(lstm.HY).ToArray(); myHN.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); AreClose(HN, myHN, 1e-6); var dH = mfr.GetDoubleArray("dH").Select(n => (float)n).ToArray(); exe.AssignGradient(lstm.Y, dH.AsTensor(Shape.Create(seqLength, batchSize, hiddenSize)), replace: true); exe.Backward(); var dX = mfr.GetDoubleArray("dX").Select(n => (float)n).ToArray(); dX.AsTensor(Shape.Create(seqLength * batchSize, inputSize)).Print(); var dXmy = exe.GetGradient(lstm.X).ToArray(); dXmy.AsTensor(Shape.Create(seqLength * batchSize, inputSize)).Print(); AreClose(dX, dXmy, 1e-6); var dW = mfr.GetDoubleArray("dW").Select(n => (float)n).ToArray(); dW.AsTensor(Shape.Create(inputSize + hiddenSize + 1, 4 * hiddenSize)).Print(); var dWmy = exe.GetGradient(lstm.W).ToArray(); dWmy.AsTensor(Shape.Create(lstm.W.Shape.AsArray)).Print(); AreClose(dW, dWmy, 1e-6); var dc0 = mfr.GetDoubleArray("dc0").Select(n => (float)n).ToArray(); dc0.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); var dc0my = exe.GetGradient(lstm.CX).ToArray(); dc0my.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); AreClose(dc0, dc0my, 1e-6); var dh0 = mfr.GetDoubleArray("dh0").Select(n => (float)n).ToArray(); dh0.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); var dh0my = exe.GetGradient(lstm.HX).ToArray(); dh0my.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); AreClose(dh0, dh0my, 1e-6); ctx.ToGpuContext().Stream.Synchronize(); }
public static void TestLstmAgainstCuDnnVersion() { var ctx = Context.GpuContext(0); var inputSize = 5; var seqLength = 3; var batchSize = 2; var hiddenSize = 4; var error = 1e-5; var data = Context.CpuContext.Eval((2.0f.AsScalar() * RandomUniform <float>(Shape.Create(seqLength, batchSize, inputSize)) - 1.0f.AsScalar())).ToArray3D(); //data.AsTensor(Shape.Create(seqLength*batchSize, inputSize)).Print(); var h0 = Context.CpuContext.Eval(RandomNormal <float>(Shape.Create(batchSize, hiddenSize))).ToArray2D(); var c0 = Context.CpuContext.Eval(RandomNormal <float>(Shape.Create(batchSize, hiddenSize))).ToArray2D(); var dy = Context.CpuContext.Eval((2.0f.AsScalar() * RandomUniform <float>(Shape.Create(seqLength, batchSize, hiddenSize)) - 1.0f.AsScalar())).ToArray3D(); //dy.AsTensor(Shape.Create(seqLength * batchSize, hiddenSize)).Print(); var wi = 0.5f; var wf = 0.4f; var wo = 0.3f; var wa = 0.2f; var ui = 0.5f; var uf = 0.4f; var uo = 0.3f; var ua = 0.1f; var bi = 0.5f; var bf = 0.4f; var bo = 0.3f; var ba = 0.2f; float[,,] y1, y2, dx1, dx2; float[,] cy1, cy2, hy1, hy2; float[,] dcx1, dcx2, dhx1, dhx2; float[,] dw1, dw2; { // calc with cuDNN var x = Variable <float>(PartialShape.Create(seqLength, batchSize, inputSize)); var lstm = new Rnn <float>(new LstmRnnType(), x, 1, hiddenSize, dropout: 0.0); var exe = new Executor(ctx, lstm.Y); exe.Initalize(); // set input exe.AssignTensor(lstm.X, data.AsTensor()); // set states exe.AssignTensor(lstm.CX, c0.AsTensor(Shape.Create(1, batchSize, hiddenSize))); exe.AssignTensor(lstm.HX, h0.AsTensor(Shape.Create(1, batchSize, hiddenSize))); // set weigths // cuDNN matrices order: IFAO var w = exe.GetTensor(lstm.W).Reshape(inputSize * 4 + hiddenSize * 4 + 2 * 4, hiddenSize); var offset = 0; // Wi ctx.Assign(w.Slice(Range(offset, offset + inputSize)), Fill(Shape.Create(inputSize, hiddenSize), wi)); offset += inputSize; // Wf ctx.Assign(w.Slice(Range(offset, offset + inputSize)), Fill(Shape.Create(inputSize, hiddenSize), wf)); offset += inputSize; // Wa ctx.Assign(w.Slice(Range(offset, offset + inputSize)), Fill(Shape.Create(inputSize, hiddenSize), wa)); offset += inputSize; // Wo ctx.Assign(w.Slice(Range(offset, offset + inputSize)), Fill(Shape.Create(inputSize, hiddenSize), wo)); offset += inputSize; // Ui ctx.Assign(w.Slice(Range(offset, offset + hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), ui)); offset += hiddenSize; // Uf ctx.Assign(w.Slice(Range(offset, offset + hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), uf)); offset += hiddenSize; // Ua ctx.Assign(w.Slice(Range(offset, offset + hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), ua)); offset += hiddenSize; // Uo ctx.Assign(w.Slice(Range(offset, offset + hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), uo)); offset += hiddenSize; // Bi ctx.Assign(w.Slice(offset), Fill(Shape.Create(1, hiddenSize), bi)); offset++; // Bf ctx.Assign(w.Slice(offset), Fill(Shape.Create(1, hiddenSize), bf)); offset++; // Ba ctx.Assign(w.Slice(offset), Fill(Shape.Create(1, hiddenSize), ba)); offset++; // Bo ctx.Assign(w.Slice(offset), Fill(Shape.Create(1, hiddenSize), bo)); exe.Forward(); y1 = exe.GetTensor(lstm.Y).ToArray3D(); cy1 = exe.GetTensor(lstm.CY).Reshape(batchSize, hiddenSize).ToArray2D(); hy1 = exe.GetTensor(lstm.HY).Reshape(batchSize, hiddenSize).ToArray2D(); exe.AssignGradient(lstm.Y, dy.AsTensor(), replace: true); exe.Backward(); dx1 = exe.GetGradient(lstm.X).ToArray3D(); dcx1 = exe.GetGradient(lstm.CX).Reshape(batchSize, hiddenSize).ToArray2D(); dhx1 = exe.GetGradient(lstm.HX).Reshape(batchSize, hiddenSize).ToArray2D(); // we make dw follow the shape as (1 + inputSize + hiddenSize, 4*hiddenSize), need to transpose because cuDNN uses Fortran storge order var dwCUDNN = exe.GetGradient(lstm.W).ToArray().AsTensor(); dw1 = new float[1 + inputSize + hiddenSize, 4 * hiddenSize]; var dw1Tensor = Reference <float>(dw1); var cpu = Context.CpuContext; offset = 0; // cuDNN order: IFAO, need to transpose because cuDNN uses Fortran storge order // Wi cpu.Assign(dw1Tensor.Slice(Range(1, inputSize + 1), Range(0, hiddenSize)), dwCUDNN.Slice(Range(offset, offset + inputSize * hiddenSize)).Reshape(hiddenSize, inputSize).T); offset += inputSize * hiddenSize; // Wf cpu.Assign(dw1Tensor.Slice(Range(1, inputSize + 1), Range(hiddenSize, 2 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + inputSize * hiddenSize)).Reshape(hiddenSize, inputSize).T); offset += inputSize * hiddenSize; // Wa cpu.Assign(dw1Tensor.Slice(Range(1, inputSize + 1), Range(3 * hiddenSize, 4 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + inputSize * hiddenSize)).Reshape(hiddenSize, inputSize).T); offset += inputSize * hiddenSize; // Wo cpu.Assign(dw1Tensor.Slice(Range(1, inputSize + 1), Range(2 * hiddenSize, 3 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + inputSize * hiddenSize)).Reshape(hiddenSize, inputSize).T); offset += inputSize * hiddenSize; // Ui cpu.Assign(dw1Tensor.Slice(Range(inputSize + 1, -1), Range(0, hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize * hiddenSize)).Reshape(hiddenSize, hiddenSize).T); offset += hiddenSize * hiddenSize; // Uf cpu.Assign(dw1Tensor.Slice(Range(inputSize + 1, -1), Range(hiddenSize, 2 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize * hiddenSize)).Reshape(hiddenSize, hiddenSize).T); offset += hiddenSize * hiddenSize; // Ua cpu.Assign(dw1Tensor.Slice(Range(inputSize + 1, -1), Range(3 * hiddenSize, 4 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize * hiddenSize)).Reshape(hiddenSize, hiddenSize).T); offset += hiddenSize * hiddenSize; // Uo cpu.Assign(dw1Tensor.Slice(Range(inputSize + 1, -1), Range(2 * hiddenSize, 3 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize * hiddenSize)).Reshape(hiddenSize, hiddenSize).T); offset += hiddenSize * hiddenSize; // Bi cpu.Assign(dw1Tensor.Slice(0, Range(0, hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize)).Reshape(hiddenSize, 1).T); offset += hiddenSize; // Bf cpu.Assign(dw1Tensor.Slice(0, Range(hiddenSize, 2 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize)).Reshape(hiddenSize, 1).T); offset += hiddenSize; // Ba cpu.Assign(dw1Tensor.Slice(0, Range(3 * hiddenSize, 4 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize)).Reshape(hiddenSize, 1).T); offset += hiddenSize; // Bo cpu.Assign(dw1Tensor.Slice(0, Range(2 * hiddenSize, 3 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize)).Reshape(hiddenSize, 1).T); } { // calc with direct LSTM implementation var x = Variable <float>(PartialShape.Create(seqLength, batchSize, inputSize)); var lstm = new Lstm <float>(x, hiddenSize, forgetBiasInit: 0.0); var exe = new Executor(ctx, lstm.Y); exe.Initalize(); // set input exe.AssignTensor(lstm.X, data.AsTensor()); // set states exe.AssignTensor(lstm.CX, c0.AsTensor()); exe.AssignTensor(lstm.HX, h0.AsTensor()); // set weights var w = exe.GetTensor(lstm.W); // Wi ctx.Assign(w.Slice(Range(1, inputSize + 1), Range(0, hiddenSize)), Fill(Shape.Create(inputSize, hiddenSize), wi)); // Wf ctx.Assign(w.Slice(Range(1, inputSize + 1), Range(hiddenSize, 2 * hiddenSize)), Fill(Shape.Create(inputSize, hiddenSize), wf)); // Wo ctx.Assign(w.Slice(Range(1, inputSize + 1), Range(2 * hiddenSize, 3 * hiddenSize)), Fill(Shape.Create(inputSize, hiddenSize), wo)); // Wa ctx.Assign(w.Slice(Range(1, inputSize + 1), Range(3 * hiddenSize, 4 * hiddenSize)), Fill(Shape.Create(inputSize, hiddenSize), wa)); // Ui ctx.Assign(w.Slice(Range(inputSize + 1, -1), Range(0, hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), ui)); // Uf ctx.Assign(w.Slice(Range(inputSize + 1, -1), Range(hiddenSize, 2 * hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), uf)); // Uo ctx.Assign(w.Slice(Range(inputSize + 1, -1), Range(2 * hiddenSize, 3 * hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), uo)); // Ua ctx.Assign(w.Slice(Range(inputSize + 1, -1), Range(3 * hiddenSize, 4 * hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), ua)); // Bi ctx.Assign(w.Slice(0, Range(0, hiddenSize)), Fill(Shape.Create(1, hiddenSize), bi)); // Bf ctx.Assign(w.Slice(0, Range(hiddenSize, 2 * hiddenSize)), Fill(Shape.Create(1, hiddenSize), bf)); // Bo ctx.Assign(w.Slice(0, Range(2 * hiddenSize, 3 * hiddenSize)), Fill(Shape.Create(1, hiddenSize), bo)); // Ba ctx.Assign(w.Slice(0, Range(3 * hiddenSize, 4 * hiddenSize)), Fill(Shape.Create(1, hiddenSize), ba)); exe.Forward(); y2 = exe.GetTensor(lstm.Y).ToArray3D(); cy2 = exe.GetTensor(lstm.CY).ToArray2D(); hy2 = exe.GetTensor(lstm.HY).ToArray2D(); exe.AssignGradient(lstm.Y, dy.AsTensor(), replace: true); exe.Backward(); dx2 = exe.GetGradient(lstm.X).ToArray3D(); dcx2 = exe.GetGradient(lstm.CX).Reshape(batchSize, hiddenSize).ToArray2D(); dhx2 = exe.GetGradient(lstm.HX).Reshape(batchSize, hiddenSize).ToArray2D(); dw2 = exe.GetGradient(lstm.W).ToArray2D(); } AreClose(y1, y2, error); AreClose(cy1, cy2, error); AreClose(hy1, hy2, error); AreClose(dx1, dx2, error); AreClose(dcx1, dcx2, error); AreClose(dhx1, dhx2, error); AreClose(dw1, dw2, error); }
public static void Test() { // compile the graph on one context, then get the forward and backward computation delegate from the // returned tuple. var ctx = Context.GpuContext(0); var funcs = Compile <double, double, double, double>(ctx, Foo); var forward = funcs.Item1; var backward = funcs.Item2; // create host arrays var m = 100; var k = 90; var n = 80; var x = new double[m, k]; var w = new double[k, n]; var b = new double[n]; // randomly set the host arrays var rng = new Random(42); AleaTKUtil.Common.UniformRandomArray(x, rng); AleaTKUtil.Common.UniformRandomArray(w, rng); AleaTKUtil.Common.UniformRandomArray(b, rng); // you can calc the output var y = forward(x.AsTensor(), w.AsTensor(), b.AsTensor()); //y.Print(); // fake some gradient var dy = new double[m, n]; AleaTKUtil.Common.UniformRandomArray(dy, rng); // calc the gradients, they are in a tuple var gradients = backward(dy.AsTensor()); var dx = gradients.Item1; var dw = gradients.Item2; var db = gradients.Item3; // the following code is just to verify the gradients with finite difference. var varX = Variable <double>(); var varW = Variable <double>(); var varB = Variable <double>(); var varY = Foo(varX, varW, varB); var exe = new Executor(ctx, varY); exe.AssignTensor(varX, x.AsTensor()); exe.AssignTensor(varW, w.AsTensor()); exe.AssignTensor(varB, b.AsTensor()); exe.AssignGradient(varY, dy.AsTensor(), replace: true); var bump = 1e-7; var dx_fd = GradientChecker.FiniteDifferenceGradient(exe, varX, bump: bump); //dx.Print(); //dx_fd.Print(); AleaTKUtil.Common.AreClose(dx_fd.ToArray2D(), dx.ToArray2D(), 1e-6); var dw_fd = GradientChecker.FiniteDifferenceGradient(exe, varW, bump: bump); //dw.Print(); //dw_fd.Print(); AleaTKUtil.Common.AreClose(dw_fd.ToArray2D(), dw.ToArray2D(), 1e-6); var db_fd = GradientChecker.FiniteDifferenceGradient(exe, varB, bump: bump); //db.Print(); //db_fd.Print(); AleaTKUtil.Common.AreClose(db_fd.ToArray(), db.ToArray(), 1e-5); }
public void ZeroTerminalGradient(Executor executor) { executor.AssignGradient(HY, Fill(Shape.Create(HY.Shape.AsArray), ScalarOps.Conv <T>(0.0)), replace: true); executor.AssignGradient(CY, Fill(Shape.Create(CY.Shape.AsArray), ScalarOps.Conv <T>(0.0)), replace: true); }
public override void Backward(Executor executor) { Util.EnsureTrue(IsTraining); var context = executor.Context.ToGpuContext(); var dnn = context.Dnn; if (executor.IncreaseGradientAggregationCounter(X) != 0) { throw new InvalidOperationException(); } if (executor.IncreaseGradientAggregationCounter(HX) != 0) { throw new InvalidOperationException(); } if (executor.IncreaseGradientAggregationCounter(CX) != 0) { throw new InvalidOperationException(); } dnn.RNNBackwardData( executor.RnnDescDict[RnnDesc], SeqLength, YDesc, executor.GetTensor(Y).Buffer.Ptr, YDesc, executor.GetGradient(Y).Buffer.Ptr, StateDesc, new deviceptr <T>(), // executor.GetGradient(HY).Buffer.Ptr, StateDesc, new deviceptr <T>(), // executor.GetGradient(CY).Buffer.Ptr, executor.FilterDescDict[WDesc], executor.GetTensor(W).Buffer.Ptr, StateDesc, executor.GetTensor(HX).Buffer.Ptr, StateDesc, executor.GetTensor(CX).Buffer.Ptr, XDesc, executor.GetGradient(X).Buffer.Ptr, StateDesc, executor.GetGradient(HX).Buffer.Ptr, StateDesc, executor.GetGradient(CX).Buffer.Ptr, executor.GetTensor(Workspace).Buffer.Ptr, (IntPtr)executor.GetTensor(Workspace).Shape.Length, executor.GetTensor(ReserveSpace).Buffer.Ptr, (IntPtr)executor.GetTensor(ReserveSpace).Shape.Length); if (executor.IncreaseGradientAggregationCounter(W) == 0) { executor.AssignGradient(W, ScalarOps.Conv <T>(0.0).AsScalar(), replace: true); } dnn.RNNBackwardWeights( executor.RnnDescDict[RnnDesc], SeqLength, XDesc, executor.GetTensor(X).Buffer.Ptr, StateDesc, executor.GetTensor(HX).Buffer.Ptr, YDesc, executor.GetTensor(Y).Buffer.Ptr, executor.GetTensor(Workspace).Buffer.Ptr, (IntPtr)executor.GetTensor(Workspace).Shape.Length, executor.FilterDescDict[WDesc], executor.GetGradient(W).Buffer.Ptr, executor.GetTensor(ReserveSpace).Buffer.Ptr, (IntPtr)executor.GetTensor(ReserveSpace).Shape.Length); }
public void AssignTerminalGradient(Executor executor, Tensor <T> dhy, Tensor <T> dcy) { executor.AssignGradient(HY, dhy, replace: true); executor.AssignGradient(CY, dcy, replace: true); }