public static Tensor <double> FiniteDifferenceGradient(Executor executor, Variable <double> input, double bump = 1e-5f, Variable <double> output = null) { if (output == null) { output = (Variable <double>)executor.Output; } // first, backup the x var ctx = executor.Context; var inputTensor = executor.GetTensor(input); var inputShape = inputTensor.Shape; var inputTensorBackup = ctx.Device.Allocate <double>(inputShape); ctx.Assign(inputTensorBackup, inputTensor); // evaluator Func <double[], double[]> evaluator = inputBlob => { executor.AssignTensor(input, inputBlob.AsTensor(inputShape)); executor.Forward(); var outputTensor = executor.GetTensor(output); return(outputTensor.ToArray()); }; var inputArray = inputTensor.ToArray(); var outputGradientArray = executor.GetGradient(output).ToArray(); var inputGradientArray = AleaTKUtil.GradientChecker.FiniteDifferenceGradient(inputArray, outputGradientArray, evaluator, bump); var inputGradientTensor = inputGradientArray.AsTensor(inputShape); // now we need recover the data executor.AssignTensor(input, inputTensorBackup); executor.Forward(); return(inputGradientTensor); }
public override void Forward(Executor executor) { var ctx = executor.Context; var input = executor.GetTensor(Input); // TODO: make sure the offset is correct in one training. executor.AssignTensor(Mask, RandomUniform <uint>(input.Shape)); var mask = executor.GetTensor(Mask); executor.AssignTensor(Output, Dropout(input, mask, Threshold, Scale)); }
public override void Forward(Executor executor) { var pred = executor.GetTensor(Pred); var label = executor.GetTensor(Label); executor.AssignTensor(Loss, ReduceSum((pred - label) * (pred - label))); }
public override void Forward(Executor executor) { var data = executor.GetTensor(Data); var weights = executor.GetTensor(Weights); var bias = executor.GetTensor(Bias); executor.AssignTensor(Output, Dot(data.Reshape(data.Shape[0], -1), weights) + bias); }
public override void Forward(Executor executor) { var wh = executor.GetTensor(Wh); var wd = executor.GetTensor(Wd); var v = executor.GetTensor(V); var h = executor.GetTensor(EncoderHiddenStates).Reshape(SeqLength * Batch, -1); var d = executor.GetTensor(DecoderHiddenState); var whh = Dot(h, wh); // [n*b, EncoderHiddenSize] * [EncoderHiddenSize, AttentionDim] = [n*b, AttentionDim] var wdd = Dot(d, wd); // [b, DecoderHiddenSize] * [DecoderHiddenSize, AttentionDim] = [b, AttentionDim] var whd = Tanh(whh + wdd); // broadcasting to [n*b, AttentionDim] var u = Dot(whd, v); // [n*b, AttentionDim] * [AttentionDim] = [n*b] var expu = Exp(u.Reshape(SeqLength, Batch)); var softmax = expu / ReduceSum(expu, true, 0); // [n, b] executor.AssignTensor(Softmax, softmax); var ctx = executor.Context; if (ctx.Type == ContextType.Gpu && typeof(T) == typeof(float)) { var stream = ctx.ToGpuContext().Stream; var hPtr = h.Buffer.Ptr.Reinterpret <float>(); var softmaxPtr = executor.GetTensor(Softmax).Buffer.Ptr.Reinterpret <float>(); var attentionState = executor.GetTensor(AttentionState).Buffer.Ptr.Reinterpret <float>(); var batchSize = Batch; var seqLength = SeqLength; var encoderHiddenSize = EncoderHiddenSize; // strides for hPtr: [n*b, b, 1] // TODO proper size var lp = new LaunchParam(new dim3(batchSize / 32, encoderHiddenSize / 32, 1), new dim3(32, 32)); stream.Launch(() => { var batch = blockIdx.x * blockDim.x + threadIdx.x; var hidden = blockIdx.y * blockDim.y + threadIdx.y; if (batch < batchSize && hidden < EncoderHiddenSize) { var sum = 0.0f; for (var i = 0; i < seqLength; ++i) { var alpha = softmaxPtr[i * batchSize + batch]; sum += alpha * hPtr[i * seqLength * batchSize + batch * batchSize + hidden]; } attentionState[batch * encoderHiddenSize + hidden] = sum; } }, lp); } else { throw new NotImplementedException(); } }
// this is just a workaround here, it can be moved to framework later public static void AssignOrSetTensor <T>(Executor executor, Variable <T> var, Tensor <T> tensor) { if (tensor.Device == executor.Context.Device) { executor.SetTensor(var, tensor); } else { executor.AssignTensor(var, tensor); } }
public override void Forward(Executor executor) { var z = executor.GetTensor(Input); var y = executor.GetTensor(Label); Util.EnsureTrue(z.Shape.Rank == 2); Util.EnsureTrue(Dnn.IsAvailable, "TODO: make non-cuDnn implementation."); var n = (int)z.Shape[0]; var classes = (int)z.Shape[1]; using (var xDesc = executor.TensorDescRepo.Acquire()) using (var yDesc = executor.TensorDescRepo.Acquire()) { var dnn = executor.Context.ToGpuContext().Dnn; xDesc.Value.SetND(Dnn.DataTypeOf(typeof(T)), new[] { n, classes, 1, 1 }, new[] { classes, 1, 1, 1 }); yDesc.Value.SetND(Dnn.DataTypeOf(typeof(T)), new[] { n, classes, 1, 1 }, new[] { classes, 1, 1, 1 }); var xPtr = executor.GetTensor(Input).Buffer.Ptr; var yPtr = executor.GetTensor(LogPred, Shape.Create(n, classes)).Buffer.Ptr; var alpha = ScalarOps.Conv <T>(1.0); var beta = ScalarOps.Conv <T>(0.0); const SoftmaxAlgorithm algorithm = SoftmaxAlgorithm.LOG; const SoftmaxMode mode = SoftmaxMode.INSTANCE; dnn.SoftmaxForward(algorithm, mode, alpha, xDesc.Value, xPtr, beta, yDesc.Value, yPtr); } // TODO: make it expression var logPred = executor.GetTensor(LogPred); var temp = executor.GetTensor(Temp, Shape.Create(n)); var ctx = executor.Context; if (ctx.Type == ContextType.Gpu && logPred.Layout.IsInnerChangeMostFullyPacked) { var stream = ctx.ToGpuContext().Stream; var tempPtr = temp.Buffer.Ptr; var logPredPtr = logPred.Buffer.Ptr; var idxPtr = y.Buffer.Ptr; DeviceFor.For(stream, 0, n, i => { var idx = idxPtr[i]; tempPtr[i] = logPredPtr[i * classes + idx]; }); executor.AssignTensor(Loss, -ReduceSum(temp)); return; } throw new NotImplementedException(); }
public override void Forward(Executor executor) { var z = executor.GetTensor(Input); var y = executor.GetTensor(Label); // ---- old solution // pred is the output of softmax //executor.AssignTensor(LogPred, Exp(z) / ReduceSum(Exp(z).Reshape(-1, z.Shape[z.Shape.Rank - 1]), true, 1)); // loss is the cross entropy //var p = executor.GetTensor(LogPred); //executor.AssignTensor(Loss, -ReduceMean(ReduceSum(y * Log(p).Reshape(-1, z.Shape[z.Shape.Rank - 1]), 1))); // ---- more stable solution executor.AssignTensor(M, ReduceMax(z.Reshape(-1, z.Shape[z.Shape.Rank - 1]), true, 1)); var m = executor.GetTensor(M); executor.AssignTensor(N, z - m - Log(ReduceSum(Exp(z - m), true, 1))); var n = executor.GetTensor(N); executor.AssignTensor(Loss, -ReduceMean(ReduceSum(y * n, 1))); executor.AssignTensor(Pred, Exp(n)); }
public static void TestLstmAgainstReferenceResults() { var mfr = new MatFileReader(@"lstm_small.mat"); var inputSize = mfr.GetInt("InputSize"); var seqLength = mfr.GetInt("SeqLength"); var hiddenSize = mfr.GetInt("HiddenSize"); var batchSize = mfr.GetInt("BatchSize"); var x = Variable <float>(PartialShape.Create(seqLength, batchSize, inputSize)); var lstm = new Lstm <float>(x, hiddenSize); var ctx = Context.GpuContext(0); var exe = new Executor(ctx, lstm.Y); exe.Initalize(); var h0 = mfr.GetDoubleArray("h0").Select(n => (float)n).ToArray(); var c0 = mfr.GetDoubleArray("c0").Select(n => (float)n).ToArray(); exe.AssignTensor(lstm.CX, c0.AsTensor(Shape.Create(batchSize, hiddenSize))); exe.AssignTensor(lstm.HX, h0.AsTensor(Shape.Create(batchSize, hiddenSize))); var input = mfr.GetDoubleArray("X").Select(n => (float)n).ToArray(); exe.AssignTensor(x, input.AsTensor(Shape.Create(seqLength, batchSize, inputSize))); var w = mfr.GetDoubleArray("W").Select(n => (float)n).ToArray(); w.AsTensor(Shape.Create(inputSize + hiddenSize + 1, 4 * hiddenSize)).Print(); exe.AssignTensor(lstm.W, w.AsTensor(Shape.Create(inputSize + hiddenSize + 1, 4 * hiddenSize))); exe.Forward(); var H = mfr.GetDoubleArray("H").Select(n => (float)n).ToArray(); H.AsTensor(Shape.Create(seqLength * batchSize, hiddenSize)).Print(); var myH = exe.GetTensor(lstm.Y).ToArray(); myH.AsTensor(Shape.Create(seqLength * batchSize, hiddenSize)).Print(); AreClose(H, myH, 1e-6); var CN = mfr.GetDoubleArray("cn").Select(n => (float)n).ToArray(); CN.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); var myCN = exe.GetTensor(lstm.CY).ToArray(); myCN.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); AreClose(CN, myCN, 1e-6); var HN = mfr.GetDoubleArray("hn").Select(n => (float)n).ToArray(); HN.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); var myHN = exe.GetTensor(lstm.HY).ToArray(); myHN.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); AreClose(HN, myHN, 1e-6); var dH = mfr.GetDoubleArray("dH").Select(n => (float)n).ToArray(); exe.AssignGradient(lstm.Y, dH.AsTensor(Shape.Create(seqLength, batchSize, hiddenSize)), replace: true); exe.Backward(); var dX = mfr.GetDoubleArray("dX").Select(n => (float)n).ToArray(); dX.AsTensor(Shape.Create(seqLength * batchSize, inputSize)).Print(); var dXmy = exe.GetGradient(lstm.X).ToArray(); dXmy.AsTensor(Shape.Create(seqLength * batchSize, inputSize)).Print(); AreClose(dX, dXmy, 1e-6); var dW = mfr.GetDoubleArray("dW").Select(n => (float)n).ToArray(); dW.AsTensor(Shape.Create(inputSize + hiddenSize + 1, 4 * hiddenSize)).Print(); var dWmy = exe.GetGradient(lstm.W).ToArray(); dWmy.AsTensor(Shape.Create(lstm.W.Shape.AsArray)).Print(); AreClose(dW, dWmy, 1e-6); var dc0 = mfr.GetDoubleArray("dc0").Select(n => (float)n).ToArray(); dc0.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); var dc0my = exe.GetGradient(lstm.CX).ToArray(); dc0my.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); AreClose(dc0, dc0my, 1e-6); var dh0 = mfr.GetDoubleArray("dh0").Select(n => (float)n).ToArray(); dh0.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); var dh0my = exe.GetGradient(lstm.HX).ToArray(); dh0my.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); AreClose(dh0, dh0my, 1e-6); ctx.ToGpuContext().Stream.Synchronize(); }
public static void TestLstmAgainstCuDnnVersion() { var ctx = Context.GpuContext(0); var inputSize = 5; var seqLength = 3; var batchSize = 2; var hiddenSize = 4; var error = 1e-5; var data = Context.CpuContext.Eval((2.0f.AsScalar() * RandomUniform <float>(Shape.Create(seqLength, batchSize, inputSize)) - 1.0f.AsScalar())).ToArray3D(); //data.AsTensor(Shape.Create(seqLength*batchSize, inputSize)).Print(); var h0 = Context.CpuContext.Eval(RandomNormal <float>(Shape.Create(batchSize, hiddenSize))).ToArray2D(); var c0 = Context.CpuContext.Eval(RandomNormal <float>(Shape.Create(batchSize, hiddenSize))).ToArray2D(); var dy = Context.CpuContext.Eval((2.0f.AsScalar() * RandomUniform <float>(Shape.Create(seqLength, batchSize, hiddenSize)) - 1.0f.AsScalar())).ToArray3D(); //dy.AsTensor(Shape.Create(seqLength * batchSize, hiddenSize)).Print(); var wi = 0.5f; var wf = 0.4f; var wo = 0.3f; var wa = 0.2f; var ui = 0.5f; var uf = 0.4f; var uo = 0.3f; var ua = 0.1f; var bi = 0.5f; var bf = 0.4f; var bo = 0.3f; var ba = 0.2f; float[,,] y1, y2, dx1, dx2; float[,] cy1, cy2, hy1, hy2; float[,] dcx1, dcx2, dhx1, dhx2; float[,] dw1, dw2; { // calc with cuDNN var x = Variable <float>(PartialShape.Create(seqLength, batchSize, inputSize)); var lstm = new Rnn <float>(new LstmRnnType(), x, 1, hiddenSize, dropout: 0.0); var exe = new Executor(ctx, lstm.Y); exe.Initalize(); // set input exe.AssignTensor(lstm.X, data.AsTensor()); // set states exe.AssignTensor(lstm.CX, c0.AsTensor(Shape.Create(1, batchSize, hiddenSize))); exe.AssignTensor(lstm.HX, h0.AsTensor(Shape.Create(1, batchSize, hiddenSize))); // set weigths // cuDNN matrices order: IFAO var w = exe.GetTensor(lstm.W).Reshape(inputSize * 4 + hiddenSize * 4 + 2 * 4, hiddenSize); var offset = 0; // Wi ctx.Assign(w.Slice(Range(offset, offset + inputSize)), Fill(Shape.Create(inputSize, hiddenSize), wi)); offset += inputSize; // Wf ctx.Assign(w.Slice(Range(offset, offset + inputSize)), Fill(Shape.Create(inputSize, hiddenSize), wf)); offset += inputSize; // Wa ctx.Assign(w.Slice(Range(offset, offset + inputSize)), Fill(Shape.Create(inputSize, hiddenSize), wa)); offset += inputSize; // Wo ctx.Assign(w.Slice(Range(offset, offset + inputSize)), Fill(Shape.Create(inputSize, hiddenSize), wo)); offset += inputSize; // Ui ctx.Assign(w.Slice(Range(offset, offset + hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), ui)); offset += hiddenSize; // Uf ctx.Assign(w.Slice(Range(offset, offset + hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), uf)); offset += hiddenSize; // Ua ctx.Assign(w.Slice(Range(offset, offset + hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), ua)); offset += hiddenSize; // Uo ctx.Assign(w.Slice(Range(offset, offset + hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), uo)); offset += hiddenSize; // Bi ctx.Assign(w.Slice(offset), Fill(Shape.Create(1, hiddenSize), bi)); offset++; // Bf ctx.Assign(w.Slice(offset), Fill(Shape.Create(1, hiddenSize), bf)); offset++; // Ba ctx.Assign(w.Slice(offset), Fill(Shape.Create(1, hiddenSize), ba)); offset++; // Bo ctx.Assign(w.Slice(offset), Fill(Shape.Create(1, hiddenSize), bo)); exe.Forward(); y1 = exe.GetTensor(lstm.Y).ToArray3D(); cy1 = exe.GetTensor(lstm.CY).Reshape(batchSize, hiddenSize).ToArray2D(); hy1 = exe.GetTensor(lstm.HY).Reshape(batchSize, hiddenSize).ToArray2D(); exe.AssignGradient(lstm.Y, dy.AsTensor(), replace: true); exe.Backward(); dx1 = exe.GetGradient(lstm.X).ToArray3D(); dcx1 = exe.GetGradient(lstm.CX).Reshape(batchSize, hiddenSize).ToArray2D(); dhx1 = exe.GetGradient(lstm.HX).Reshape(batchSize, hiddenSize).ToArray2D(); // we make dw follow the shape as (1 + inputSize + hiddenSize, 4*hiddenSize), need to transpose because cuDNN uses Fortran storge order var dwCUDNN = exe.GetGradient(lstm.W).ToArray().AsTensor(); dw1 = new float[1 + inputSize + hiddenSize, 4 * hiddenSize]; var dw1Tensor = Reference <float>(dw1); var cpu = Context.CpuContext; offset = 0; // cuDNN order: IFAO, need to transpose because cuDNN uses Fortran storge order // Wi cpu.Assign(dw1Tensor.Slice(Range(1, inputSize + 1), Range(0, hiddenSize)), dwCUDNN.Slice(Range(offset, offset + inputSize * hiddenSize)).Reshape(hiddenSize, inputSize).T); offset += inputSize * hiddenSize; // Wf cpu.Assign(dw1Tensor.Slice(Range(1, inputSize + 1), Range(hiddenSize, 2 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + inputSize * hiddenSize)).Reshape(hiddenSize, inputSize).T); offset += inputSize * hiddenSize; // Wa cpu.Assign(dw1Tensor.Slice(Range(1, inputSize + 1), Range(3 * hiddenSize, 4 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + inputSize * hiddenSize)).Reshape(hiddenSize, inputSize).T); offset += inputSize * hiddenSize; // Wo cpu.Assign(dw1Tensor.Slice(Range(1, inputSize + 1), Range(2 * hiddenSize, 3 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + inputSize * hiddenSize)).Reshape(hiddenSize, inputSize).T); offset += inputSize * hiddenSize; // Ui cpu.Assign(dw1Tensor.Slice(Range(inputSize + 1, -1), Range(0, hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize * hiddenSize)).Reshape(hiddenSize, hiddenSize).T); offset += hiddenSize * hiddenSize; // Uf cpu.Assign(dw1Tensor.Slice(Range(inputSize + 1, -1), Range(hiddenSize, 2 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize * hiddenSize)).Reshape(hiddenSize, hiddenSize).T); offset += hiddenSize * hiddenSize; // Ua cpu.Assign(dw1Tensor.Slice(Range(inputSize + 1, -1), Range(3 * hiddenSize, 4 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize * hiddenSize)).Reshape(hiddenSize, hiddenSize).T); offset += hiddenSize * hiddenSize; // Uo cpu.Assign(dw1Tensor.Slice(Range(inputSize + 1, -1), Range(2 * hiddenSize, 3 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize * hiddenSize)).Reshape(hiddenSize, hiddenSize).T); offset += hiddenSize * hiddenSize; // Bi cpu.Assign(dw1Tensor.Slice(0, Range(0, hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize)).Reshape(hiddenSize, 1).T); offset += hiddenSize; // Bf cpu.Assign(dw1Tensor.Slice(0, Range(hiddenSize, 2 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize)).Reshape(hiddenSize, 1).T); offset += hiddenSize; // Ba cpu.Assign(dw1Tensor.Slice(0, Range(3 * hiddenSize, 4 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize)).Reshape(hiddenSize, 1).T); offset += hiddenSize; // Bo cpu.Assign(dw1Tensor.Slice(0, Range(2 * hiddenSize, 3 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize)).Reshape(hiddenSize, 1).T); } { // calc with direct LSTM implementation var x = Variable <float>(PartialShape.Create(seqLength, batchSize, inputSize)); var lstm = new Lstm <float>(x, hiddenSize, forgetBiasInit: 0.0); var exe = new Executor(ctx, lstm.Y); exe.Initalize(); // set input exe.AssignTensor(lstm.X, data.AsTensor()); // set states exe.AssignTensor(lstm.CX, c0.AsTensor()); exe.AssignTensor(lstm.HX, h0.AsTensor()); // set weights var w = exe.GetTensor(lstm.W); // Wi ctx.Assign(w.Slice(Range(1, inputSize + 1), Range(0, hiddenSize)), Fill(Shape.Create(inputSize, hiddenSize), wi)); // Wf ctx.Assign(w.Slice(Range(1, inputSize + 1), Range(hiddenSize, 2 * hiddenSize)), Fill(Shape.Create(inputSize, hiddenSize), wf)); // Wo ctx.Assign(w.Slice(Range(1, inputSize + 1), Range(2 * hiddenSize, 3 * hiddenSize)), Fill(Shape.Create(inputSize, hiddenSize), wo)); // Wa ctx.Assign(w.Slice(Range(1, inputSize + 1), Range(3 * hiddenSize, 4 * hiddenSize)), Fill(Shape.Create(inputSize, hiddenSize), wa)); // Ui ctx.Assign(w.Slice(Range(inputSize + 1, -1), Range(0, hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), ui)); // Uf ctx.Assign(w.Slice(Range(inputSize + 1, -1), Range(hiddenSize, 2 * hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), uf)); // Uo ctx.Assign(w.Slice(Range(inputSize + 1, -1), Range(2 * hiddenSize, 3 * hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), uo)); // Ua ctx.Assign(w.Slice(Range(inputSize + 1, -1), Range(3 * hiddenSize, 4 * hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), ua)); // Bi ctx.Assign(w.Slice(0, Range(0, hiddenSize)), Fill(Shape.Create(1, hiddenSize), bi)); // Bf ctx.Assign(w.Slice(0, Range(hiddenSize, 2 * hiddenSize)), Fill(Shape.Create(1, hiddenSize), bf)); // Bo ctx.Assign(w.Slice(0, Range(2 * hiddenSize, 3 * hiddenSize)), Fill(Shape.Create(1, hiddenSize), bo)); // Ba ctx.Assign(w.Slice(0, Range(3 * hiddenSize, 4 * hiddenSize)), Fill(Shape.Create(1, hiddenSize), ba)); exe.Forward(); y2 = exe.GetTensor(lstm.Y).ToArray3D(); cy2 = exe.GetTensor(lstm.CY).ToArray2D(); hy2 = exe.GetTensor(lstm.HY).ToArray2D(); exe.AssignGradient(lstm.Y, dy.AsTensor(), replace: true); exe.Backward(); dx2 = exe.GetGradient(lstm.X).ToArray3D(); dcx2 = exe.GetGradient(lstm.CX).Reshape(batchSize, hiddenSize).ToArray2D(); dhx2 = exe.GetGradient(lstm.HX).Reshape(batchSize, hiddenSize).ToArray2D(); dw2 = exe.GetGradient(lstm.W).ToArray2D(); } AreClose(y1, y2, error); AreClose(cy1, cy2, error); AreClose(hy1, hy2, error); AreClose(dx1, dx2, error); AreClose(dcx1, dcx2, error); AreClose(dhx1, dhx2, error); AreClose(dw1, dw2, error); }
public static void Test() { // compile the graph on one context, then get the forward and backward computation delegate from the // returned tuple. var ctx = Context.GpuContext(0); var funcs = Compile <double, double, double, double>(ctx, Foo); var forward = funcs.Item1; var backward = funcs.Item2; // create host arrays var m = 100; var k = 90; var n = 80; var x = new double[m, k]; var w = new double[k, n]; var b = new double[n]; // randomly set the host arrays var rng = new Random(42); AleaTKUtil.Common.UniformRandomArray(x, rng); AleaTKUtil.Common.UniformRandomArray(w, rng); AleaTKUtil.Common.UniformRandomArray(b, rng); // you can calc the output var y = forward(x.AsTensor(), w.AsTensor(), b.AsTensor()); //y.Print(); // fake some gradient var dy = new double[m, n]; AleaTKUtil.Common.UniformRandomArray(dy, rng); // calc the gradients, they are in a tuple var gradients = backward(dy.AsTensor()); var dx = gradients.Item1; var dw = gradients.Item2; var db = gradients.Item3; // the following code is just to verify the gradients with finite difference. var varX = Variable <double>(); var varW = Variable <double>(); var varB = Variable <double>(); var varY = Foo(varX, varW, varB); var exe = new Executor(ctx, varY); exe.AssignTensor(varX, x.AsTensor()); exe.AssignTensor(varW, w.AsTensor()); exe.AssignTensor(varB, b.AsTensor()); exe.AssignGradient(varY, dy.AsTensor(), replace: true); var bump = 1e-7; var dx_fd = GradientChecker.FiniteDifferenceGradient(exe, varX, bump: bump); //dx.Print(); //dx_fd.Print(); AleaTKUtil.Common.AreClose(dx_fd.ToArray2D(), dx.ToArray2D(), 1e-6); var dw_fd = GradientChecker.FiniteDifferenceGradient(exe, varW, bump: bump); //dw.Print(); //dw_fd.Print(); AleaTKUtil.Common.AreClose(dw_fd.ToArray2D(), dw.ToArray2D(), 1e-6); var db_fd = GradientChecker.FiniteDifferenceGradient(exe, varB, bump: bump); //db.Print(); //db_fd.Print(); AleaTKUtil.Common.AreClose(db_fd.ToArray(), db.ToArray(), 1e-5); }
public override void Initialize(Executor executor) { var context = executor.Context.ToGpuContext(); var dnn = context.Dnn; // dropout var dropoutDesc = executor.DropoutDescDict[DropoutDesc]; IntPtr dropoutStatesSize; dnn.DropoutGetStatesSize(out dropoutStatesSize); var dropoutStates = executor.GetTensor(DropoutStates, Shape.Create(dropoutStatesSize.ToInt64())); dropoutDesc.Set(dnn, (float)Dropout, dropoutStates.Buffer.Ptr, dropoutStatesSize, DropoutSeed); // rnn descriptor var rnnDesc = executor.RnnDescDict[RnnDesc]; var mode = Type.Mode; rnnDesc.Set(HiddenSize, NumLayers, dropoutDesc, RNNInputMode.LINEAR_INPUT, DirectionMode.UNIDIRECTIONAL, mode, Dnn.DataTypeOf <T>()); // weight var wDesc = executor.FilterDescDict[WDesc]; IntPtr weightsSize; dnn.GetRNNParamsSize(rnnDesc, XDesc[0], out weightsSize, Dnn.DataTypeOf <T>()); Util.EnsureTrue(weightsSize.ToInt64() % Gpu.SizeOf <T>() == 0); var shapeW = Shape.Create(weightsSize.ToInt64() / Alea.Gpu.SizeOf <T>()); wDesc.SetND(Dnn.DataTypeOf <T>(), TensorFormat.CUDNN_TENSOR_NCHW, new [] { (int)shapeW[0], 1, 1 }); // workspace and reserved space IntPtr workSize; dnn.GetRNNWorkspaceSize(rnnDesc, SeqLength, XDesc, out workSize); executor.GetTensor(Workspace, Shape.Create(workSize.ToInt64())); if (IsTraining) { IntPtr reserveSize; dnn.GetRNNTrainingReserveSize(rnnDesc, SeqLength, XDesc, out reserveSize); executor.GetTensor(ReserveSpace, Shape.Create(reserveSize.ToInt64())); } // since we are using cuDNN, we'd better make sure these varaibles are allocated executor.GetTensor(W, shapeW); if (IsTraining) { executor.GetGradient(W, shapeW); } executor.GetTensor(Y, Shape.Create(Y.Shape.AsArray)); executor.GetTensor(HX, Shape.Create(HX.Shape.AsArray)); executor.GetTensor(CX, Shape.Create(CX.Shape.AsArray)); executor.GetTensor(HY, Shape.Create(HY.Shape.AsArray)); executor.GetTensor(CY, Shape.Create(CY.Shape.AsArray)); if (IsTraining) { executor.GetGradient(X, Shape.Create(X.Shape.AsArray)); executor.GetGradient(Y, Shape.Create(Y.Shape.AsArray)); executor.GetGradient(HX, Shape.Create(HX.Shape.AsArray)); executor.GetGradient(CX, Shape.Create(CX.Shape.AsArray)); } // init weights var numLinearLayers = Type.NumLinLayers; using (var filterDesc = new FilterDescriptor()) { var w = executor.GetTensor(W); var filterDimA = new int[3]; for (var layer = 0; layer < NumLayers; ++layer) { for (var linLayerId = 0; linLayerId < numLinearLayers; ++linLayerId) { int nbDims; DataType dataType; TensorFormat format; deviceptr <T> linLayerMat; dnn.GetRNNLinLayerMatrixParams(rnnDesc, layer, XDesc[0], wDesc, w.Buffer.Ptr, linLayerId, filterDesc, out linLayerMat); filterDesc.GetND(out dataType, out format, out nbDims, filterDimA); var length = filterDimA.Aggregate(ScalarOps.Mul); var linLayerMatBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerMat); var linLayerMatTensor = new Tensor <T>(linLayerMatBuffer); context.Assign(linLayerMatTensor, RandomNormal <T>(Shape.Create(length)) / (Math.Sqrt(HiddenSize + InputSize).AsScalar <T>())); deviceptr <T> linLayerBias; dnn.GetRNNLinLayerBiasParams(rnnDesc, layer, XDesc[0], wDesc, w.Buffer.Ptr, linLayerId, filterDesc, out linLayerBias); filterDesc.GetND(out dataType, out format, out nbDims, filterDimA); length = filterDimA.Aggregate(ScalarOps.Mul); var linLayerBiasBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerBias); var linLayerBiasTensor = new Tensor <T>(linLayerBiasBuffer); Type.InitBias(context, layer, linLayerId, linLayerBiasTensor); } } } base.Initialize(executor); const double value = 0.0; executor.AssignTensor(HX, Fill(Shape.Create(HX.Shape.AsArray), ScalarOps.Conv <T>(value))); executor.AssignTensor(CX, Fill(Shape.Create(CX.Shape.AsArray), ScalarOps.Conv <T>(value))); }
public static void RnnAgainstRnnDynamic() { var ctx = Context.GpuContext(0); var inputSize = 5; var seqLength = 3; var batchSize = 2; var hiddenSize = 4; var error = 1e-5; var data = Context.CpuContext.Eval(RandomUniform <float>(-1, 1, Shape.Create(seqLength, batchSize, inputSize))).ToArray3D(); data.AsTensor(Shape.Create(seqLength * batchSize, inputSize)).Print(); var h0 = Context.CpuContext.Eval(RandomNormal <float>(Shape.Create(batchSize, hiddenSize))).ToArray2D(); var c0 = Context.CpuContext.Eval(RandomNormal <float>(Shape.Create(batchSize, hiddenSize))).ToArray2D(); var dy = Context.CpuContext.Eval(RandomUniform <float>(-1, 1, Shape.Create(seqLength, batchSize, hiddenSize))).ToArray3D(); float[,,] y1, y2, dx1, dx2; float[,] cy1, cy2, hy1, hy2; float[,] dcx1, dcx2, dhx1, dhx2; float[] dw1, dw2; { var x = Variable <float>(PartialShape.Create(seqLength, batchSize, inputSize)); var lstm = new Rnn <float>(new LstmRnnType(), x, 1, hiddenSize, dropout: 0.0); var exe = new Executor(ctx, lstm.Y); exe.Initalize(); // set input exe.AssignTensor(lstm.X, data.AsTensor()); // set states exe.AssignTensor(lstm.CX, c0.AsTensor(Shape.Create(1, batchSize, hiddenSize))); exe.AssignTensor(lstm.HX, h0.AsTensor(Shape.Create(1, batchSize, hiddenSize))); // set weigths, cuDNN matrices order: IFAO var w = exe.GetTensor(lstm.W).Reshape(inputSize * 4 + hiddenSize * 4 + 2 * 4, hiddenSize); SetWeights(ctx, w, inputSize, hiddenSize); exe.Forward(); y1 = exe.GetTensor(lstm.Y).ToArray3D(); cy1 = exe.GetTensor(lstm.CY).Reshape(batchSize, hiddenSize).ToArray2D(); hy1 = exe.GetTensor(lstm.HY).Reshape(batchSize, hiddenSize).ToArray2D(); exe.AssignGradient(lstm.Y, dy.AsTensor(), replace: true); exe.Backward(); dx1 = exe.GetGradient(lstm.X).ToArray3D(); dcx1 = exe.GetGradient(lstm.CX).Reshape(batchSize, hiddenSize).ToArray2D(); dhx1 = exe.GetGradient(lstm.HX).Reshape(batchSize, hiddenSize).ToArray2D(); dw1 = exe.GetGradient(lstm.W).ToArray(); // cuDNN weight is 1D linear blob } { var x = Variable <float>(PartialShape.Create(-1, -1, inputSize)); var lstm = new RnnDynamic <float>(new LstmRnnType(), x, 1, hiddenSize, dropout: 0.0); var exe = new Executor(ctx, lstm.Y); exe.Initalize(); // set input exe.AssignTensor(lstm.X, data.AsTensor()); // set states exe.AssignTensor(lstm.CX, c0.AsTensor(Shape.Create(1, batchSize, hiddenSize))); exe.AssignTensor(lstm.HX, h0.AsTensor(Shape.Create(1, batchSize, hiddenSize))); // set weigths, cuDNN matrices order: IFAO var w = exe.GetTensor(lstm.W).Reshape(inputSize * 4 + hiddenSize * 4 + 2 * 4, hiddenSize); SetWeights(ctx, w, inputSize, hiddenSize); exe.Forward(); y2 = exe.GetTensor(lstm.Y).ToArray3D(); cy2 = exe.GetTensor(lstm.CY).Reshape(batchSize, hiddenSize).ToArray2D(); hy2 = exe.GetTensor(lstm.HY).Reshape(batchSize, hiddenSize).ToArray2D(); exe.AssignGradient(lstm.Y, dy.AsTensor(), replace: true); exe.Backward(); dx2 = exe.GetGradient(lstm.X).ToArray3D(); dcx2 = exe.GetGradient(lstm.CX).Reshape(batchSize, hiddenSize).ToArray2D(); dhx2 = exe.GetGradient(lstm.HX).Reshape(batchSize, hiddenSize).ToArray2D(); dw2 = exe.GetGradient(lstm.W).ToArray(); } AreClose(y1, y2, error); AreClose(cy1, cy2, error); AreClose(hy1, hy2, error); AreClose(dx1, dx2, error); AreClose(dcx1, dcx2, error); AreClose(dhx1, dhx2, error); AreClose(dw1, dw2, error); }
public void AssignInitialStates(Executor executor, Tensor <T> hx, Tensor <T> cx) { executor.AssignTensor(Rnn.HX, hx); executor.AssignTensor(Rnn.CX, cx); }
public void ZeroInitialStates(Executor executor) { executor.AssignTensor(Rnn.HX, Fill(Shape.Create(Rnn.HX.Shape.AsArray), ScalarOps.Conv <T>(0.0))); executor.AssignTensor(Rnn.CX, Fill(Shape.Create(Rnn.CX.Shape.AsArray), ScalarOps.Conv <T>(0.0))); }
public override void Forward(Executor executor) { var input = executor.GetTensor(Input); executor.AssignTensor(Output, ForwardExpr(input)); }