Compile <T1, T2, T3, TR>(Context ctx, Func <Variable <T1>, Variable <T2>, Variable <T3>, Variable <TR> > function) { var var1 = Variable <T1>(); var var2 = Variable <T2>(); var var3 = Variable <T3>(); var varR = function(var1, var2, var3); var executor = new Executor(ctx, varR) { AssignAllGradient = true }; Func <Tensor <T1>, Tensor <T2>, Tensor <T3>, Tensor <TR> > forward = (tensor1, tensor2, tensor3) => { AssignOrSetTensor(executor, var1, tensor1); AssignOrSetTensor(executor, var2, tensor2); AssignOrSetTensor(executor, var3, tensor3); executor.Forward(); return(executor.GetTensor(varR)); }; Func <Tensor <TR>, Tuple <Tensor <T1>, Tensor <T2>, Tensor <T3> > > backward = gradientR => { AssignOrSetGradient(executor, varR, gradientR); executor.Backward(); var gradient1 = executor.GetGradient(var1); var gradient2 = executor.GetGradient(var2); var gradient3 = executor.GetGradient(var3); return(Tuple.Create(gradient1, gradient2, gradient3)); }; return(Tuple.Create(forward, backward)); }
public static void Gradient_WeightedSumReduce_02_GPU() { var rng = new Random(42); var x = Variable <double>(); var w = Variable <double>(); var wsr = new WeightedSumReduce <double>(w.Reshape(-1, 1), x); var y = wsr.Output; var ctx = gpu; var exe = new Executor(ctx, y) { AssignAllGradient = true }; var n = 5; var d = 3; var hx = new double[n, d]; var hw = new double[n]; UniformRandomArray(hx, rng); UniformRandomArray(hw, rng); var hy = new double[d]; for (var i = 0; i < d; ++i) { var acc = 0.0; for (var j = 0; j < n; ++j) { acc += hw[j] * hx[j, i]; } hy[i] = acc; } exe.AssignTensor(x, hx.AsTensor()); exe.AssignTensor(w, hw.AsTensor()); exe.Forward(); var ty = exe.GetTensor(y); ty.Print(); AreClose(hy, ty.ToArray(), 1e-10); var hdy = new double[d]; UniformRandomArray(hdy, rng); exe.AssignGradient(y, hdy.AsTensor(), replace: true); exe.Backward(); //var tdx = exe.GetGradient(x); //var tdw = exe.GetGradient(w); //tdx.Print(); //tdw.Print(); //var bump = 1e-8; //var hdx = GradientChecker.FiniteDifferenceGradient(exe, x, bump: bump); ////var hdw = GradientChecker.FiniteDifferenceGradient(exe, w, bump: bump); //hdx.Print(); ////hdw.Print(); //AreClose(hdx.ToArray2D(), tdx.ToArray2D(), 1e-7); ////AreClose(hdw.ToArray2D(), tdw.ToArray2D(), 1e-7); }
public static void Gradient_Dot_GPU() { var rng = new Random(); var m = 10; var k = 5; var n = 3; var x = Variable <double>(); var y = Variable <double>(); var z = Dot(x, y); var ctx = gpu; var exe = new Executor(ctx, z) { AssignAllGradient = true }; //var l = 10; var hx = new double[m, k]; var hy = new double[k, n]; UniformRandomArray(hx, rng); UniformRandomArray(hy, rng); var hz = Dot(hx, hy); //for (var i = 0; i < l; ++i) hz[i] = hx[i] + hy[i]; //hx.AsTensor().Print(); //hy.AsTensor().Print(); exe.AssignTensor(x, hx.AsTensor()); exe.AssignTensor(y, hy.AsTensor()); exe.Forward(); var tz = exe.GetTensor(z); //tz.Print(); AreClose(hz, tz.ToArray2D(), 1e-10); var hdz = new double[m, n]; UniformRandomArray(hdz, rng); //hdz.AsTensor().Print(); exe.AssignGradient(z, hdz.AsTensor(), replace: true); exe.Backward(); var tdx = exe.GetGradient(x); var tdy = exe.GetGradient(y); tdx.Print(); tdy.Print(); var bump = 1e-6; var hdx = GradientChecker.FiniteDifferenceGradient(exe, x, bump: bump); var hdy = GradientChecker.FiniteDifferenceGradient(exe, y, bump: bump); hdx.Print(); hdy.Print(); AreClose(tdx.ToArray(), hdx.ToArray(), 1e-6); AreClose(tdy.ToArray(), hdy.ToArray(), 1e-6); }
public static void Gradient_Add_VectorMatrix_GPU() { var rng = new Random(); var x = Variable <double>(); var y = Variable <double>(); var z = x + y; var ctx = gpu; var exe = new Executor(ctx, z) { AssignAllGradient = true }; var l = 10; var hx = rng.NextDouble(); var hy = new double[l]; var hz = new double[l]; UniformRandomArray(hy, rng); for (var i = 0; i < l; ++i) { hz[i] = hx + hy[i]; } //hx.AsTensor().Print(); //hy.AsTensor().Print(); exe.AssignTensor(x, (new[] { hx }).AsTensor()); exe.AssignTensor(y, hy.AsTensor()); exe.Forward(); var tz = exe.GetTensor(z); //tz.Print(); AreClose(hz, tz.ToArray(), 1e-10); var hdz = new double[l]; UniformRandomArray(hdz, rng); //hdz.AsTensor().Print(); exe.AssignGradient(z, hdz.AsTensor()); exe.Backward(); var tdx = exe.GetGradient(x); var tdy = exe.GetGradient(y); tdx.Print(); tdy.Print(); //var bump = 1e-6; //var hdx = GradientChecker.FiniteDifferenceGradient(exe, x, bump: bump); //var hdy = GradientChecker.FiniteDifferenceGradient(exe, y, bump: bump); //hdx.Print(); //hdy.Print(); //AreClose(tdx.ToArray(), hdx.ToArray(), 1e-6); }
public static void TestLstmAgainstReferenceResults() { var mfr = new MatFileReader(@"lstm_small.mat"); var inputSize = mfr.GetInt("InputSize"); var seqLength = mfr.GetInt("SeqLength"); var hiddenSize = mfr.GetInt("HiddenSize"); var batchSize = mfr.GetInt("BatchSize"); var x = Variable <float>(PartialShape.Create(seqLength, batchSize, inputSize)); var lstm = new Lstm <float>(x, hiddenSize); var ctx = Context.GpuContext(0); var exe = new Executor(ctx, lstm.Y); exe.Initalize(); var h0 = mfr.GetDoubleArray("h0").Select(n => (float)n).ToArray(); var c0 = mfr.GetDoubleArray("c0").Select(n => (float)n).ToArray(); exe.AssignTensor(lstm.CX, c0.AsTensor(Shape.Create(batchSize, hiddenSize))); exe.AssignTensor(lstm.HX, h0.AsTensor(Shape.Create(batchSize, hiddenSize))); var input = mfr.GetDoubleArray("X").Select(n => (float)n).ToArray(); exe.AssignTensor(x, input.AsTensor(Shape.Create(seqLength, batchSize, inputSize))); var w = mfr.GetDoubleArray("W").Select(n => (float)n).ToArray(); w.AsTensor(Shape.Create(inputSize + hiddenSize + 1, 4 * hiddenSize)).Print(); exe.AssignTensor(lstm.W, w.AsTensor(Shape.Create(inputSize + hiddenSize + 1, 4 * hiddenSize))); exe.Forward(); var H = mfr.GetDoubleArray("H").Select(n => (float)n).ToArray(); H.AsTensor(Shape.Create(seqLength * batchSize, hiddenSize)).Print(); var myH = exe.GetTensor(lstm.Y).ToArray(); myH.AsTensor(Shape.Create(seqLength * batchSize, hiddenSize)).Print(); AreClose(H, myH, 1e-6); var CN = mfr.GetDoubleArray("cn").Select(n => (float)n).ToArray(); CN.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); var myCN = exe.GetTensor(lstm.CY).ToArray(); myCN.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); AreClose(CN, myCN, 1e-6); var HN = mfr.GetDoubleArray("hn").Select(n => (float)n).ToArray(); HN.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); var myHN = exe.GetTensor(lstm.HY).ToArray(); myHN.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); AreClose(HN, myHN, 1e-6); var dH = mfr.GetDoubleArray("dH").Select(n => (float)n).ToArray(); exe.AssignGradient(lstm.Y, dH.AsTensor(Shape.Create(seqLength, batchSize, hiddenSize)), replace: true); exe.Backward(); var dX = mfr.GetDoubleArray("dX").Select(n => (float)n).ToArray(); dX.AsTensor(Shape.Create(seqLength * batchSize, inputSize)).Print(); var dXmy = exe.GetGradient(lstm.X).ToArray(); dXmy.AsTensor(Shape.Create(seqLength * batchSize, inputSize)).Print(); AreClose(dX, dXmy, 1e-6); var dW = mfr.GetDoubleArray("dW").Select(n => (float)n).ToArray(); dW.AsTensor(Shape.Create(inputSize + hiddenSize + 1, 4 * hiddenSize)).Print(); var dWmy = exe.GetGradient(lstm.W).ToArray(); dWmy.AsTensor(Shape.Create(lstm.W.Shape.AsArray)).Print(); AreClose(dW, dWmy, 1e-6); var dc0 = mfr.GetDoubleArray("dc0").Select(n => (float)n).ToArray(); dc0.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); var dc0my = exe.GetGradient(lstm.CX).ToArray(); dc0my.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); AreClose(dc0, dc0my, 1e-6); var dh0 = mfr.GetDoubleArray("dh0").Select(n => (float)n).ToArray(); dh0.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); var dh0my = exe.GetGradient(lstm.HX).ToArray(); dh0my.AsTensor(Shape.Create(batchSize, hiddenSize)).Print(); AreClose(dh0, dh0my, 1e-6); ctx.ToGpuContext().Stream.Synchronize(); }
public static void TestLstmAgainstCuDnnVersion() { var ctx = Context.GpuContext(0); var inputSize = 5; var seqLength = 3; var batchSize = 2; var hiddenSize = 4; var error = 1e-5; var data = Context.CpuContext.Eval((2.0f.AsScalar() * RandomUniform <float>(Shape.Create(seqLength, batchSize, inputSize)) - 1.0f.AsScalar())).ToArray3D(); //data.AsTensor(Shape.Create(seqLength*batchSize, inputSize)).Print(); var h0 = Context.CpuContext.Eval(RandomNormal <float>(Shape.Create(batchSize, hiddenSize))).ToArray2D(); var c0 = Context.CpuContext.Eval(RandomNormal <float>(Shape.Create(batchSize, hiddenSize))).ToArray2D(); var dy = Context.CpuContext.Eval((2.0f.AsScalar() * RandomUniform <float>(Shape.Create(seqLength, batchSize, hiddenSize)) - 1.0f.AsScalar())).ToArray3D(); //dy.AsTensor(Shape.Create(seqLength * batchSize, hiddenSize)).Print(); var wi = 0.5f; var wf = 0.4f; var wo = 0.3f; var wa = 0.2f; var ui = 0.5f; var uf = 0.4f; var uo = 0.3f; var ua = 0.1f; var bi = 0.5f; var bf = 0.4f; var bo = 0.3f; var ba = 0.2f; float[,,] y1, y2, dx1, dx2; float[,] cy1, cy2, hy1, hy2; float[,] dcx1, dcx2, dhx1, dhx2; float[,] dw1, dw2; { // calc with cuDNN var x = Variable <float>(PartialShape.Create(seqLength, batchSize, inputSize)); var lstm = new Rnn <float>(new LstmRnnType(), x, 1, hiddenSize, dropout: 0.0); var exe = new Executor(ctx, lstm.Y); exe.Initalize(); // set input exe.AssignTensor(lstm.X, data.AsTensor()); // set states exe.AssignTensor(lstm.CX, c0.AsTensor(Shape.Create(1, batchSize, hiddenSize))); exe.AssignTensor(lstm.HX, h0.AsTensor(Shape.Create(1, batchSize, hiddenSize))); // set weigths // cuDNN matrices order: IFAO var w = exe.GetTensor(lstm.W).Reshape(inputSize * 4 + hiddenSize * 4 + 2 * 4, hiddenSize); var offset = 0; // Wi ctx.Assign(w.Slice(Range(offset, offset + inputSize)), Fill(Shape.Create(inputSize, hiddenSize), wi)); offset += inputSize; // Wf ctx.Assign(w.Slice(Range(offset, offset + inputSize)), Fill(Shape.Create(inputSize, hiddenSize), wf)); offset += inputSize; // Wa ctx.Assign(w.Slice(Range(offset, offset + inputSize)), Fill(Shape.Create(inputSize, hiddenSize), wa)); offset += inputSize; // Wo ctx.Assign(w.Slice(Range(offset, offset + inputSize)), Fill(Shape.Create(inputSize, hiddenSize), wo)); offset += inputSize; // Ui ctx.Assign(w.Slice(Range(offset, offset + hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), ui)); offset += hiddenSize; // Uf ctx.Assign(w.Slice(Range(offset, offset + hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), uf)); offset += hiddenSize; // Ua ctx.Assign(w.Slice(Range(offset, offset + hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), ua)); offset += hiddenSize; // Uo ctx.Assign(w.Slice(Range(offset, offset + hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), uo)); offset += hiddenSize; // Bi ctx.Assign(w.Slice(offset), Fill(Shape.Create(1, hiddenSize), bi)); offset++; // Bf ctx.Assign(w.Slice(offset), Fill(Shape.Create(1, hiddenSize), bf)); offset++; // Ba ctx.Assign(w.Slice(offset), Fill(Shape.Create(1, hiddenSize), ba)); offset++; // Bo ctx.Assign(w.Slice(offset), Fill(Shape.Create(1, hiddenSize), bo)); exe.Forward(); y1 = exe.GetTensor(lstm.Y).ToArray3D(); cy1 = exe.GetTensor(lstm.CY).Reshape(batchSize, hiddenSize).ToArray2D(); hy1 = exe.GetTensor(lstm.HY).Reshape(batchSize, hiddenSize).ToArray2D(); exe.AssignGradient(lstm.Y, dy.AsTensor(), replace: true); exe.Backward(); dx1 = exe.GetGradient(lstm.X).ToArray3D(); dcx1 = exe.GetGradient(lstm.CX).Reshape(batchSize, hiddenSize).ToArray2D(); dhx1 = exe.GetGradient(lstm.HX).Reshape(batchSize, hiddenSize).ToArray2D(); // we make dw follow the shape as (1 + inputSize + hiddenSize, 4*hiddenSize), need to transpose because cuDNN uses Fortran storge order var dwCUDNN = exe.GetGradient(lstm.W).ToArray().AsTensor(); dw1 = new float[1 + inputSize + hiddenSize, 4 * hiddenSize]; var dw1Tensor = Reference <float>(dw1); var cpu = Context.CpuContext; offset = 0; // cuDNN order: IFAO, need to transpose because cuDNN uses Fortran storge order // Wi cpu.Assign(dw1Tensor.Slice(Range(1, inputSize + 1), Range(0, hiddenSize)), dwCUDNN.Slice(Range(offset, offset + inputSize * hiddenSize)).Reshape(hiddenSize, inputSize).T); offset += inputSize * hiddenSize; // Wf cpu.Assign(dw1Tensor.Slice(Range(1, inputSize + 1), Range(hiddenSize, 2 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + inputSize * hiddenSize)).Reshape(hiddenSize, inputSize).T); offset += inputSize * hiddenSize; // Wa cpu.Assign(dw1Tensor.Slice(Range(1, inputSize + 1), Range(3 * hiddenSize, 4 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + inputSize * hiddenSize)).Reshape(hiddenSize, inputSize).T); offset += inputSize * hiddenSize; // Wo cpu.Assign(dw1Tensor.Slice(Range(1, inputSize + 1), Range(2 * hiddenSize, 3 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + inputSize * hiddenSize)).Reshape(hiddenSize, inputSize).T); offset += inputSize * hiddenSize; // Ui cpu.Assign(dw1Tensor.Slice(Range(inputSize + 1, -1), Range(0, hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize * hiddenSize)).Reshape(hiddenSize, hiddenSize).T); offset += hiddenSize * hiddenSize; // Uf cpu.Assign(dw1Tensor.Slice(Range(inputSize + 1, -1), Range(hiddenSize, 2 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize * hiddenSize)).Reshape(hiddenSize, hiddenSize).T); offset += hiddenSize * hiddenSize; // Ua cpu.Assign(dw1Tensor.Slice(Range(inputSize + 1, -1), Range(3 * hiddenSize, 4 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize * hiddenSize)).Reshape(hiddenSize, hiddenSize).T); offset += hiddenSize * hiddenSize; // Uo cpu.Assign(dw1Tensor.Slice(Range(inputSize + 1, -1), Range(2 * hiddenSize, 3 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize * hiddenSize)).Reshape(hiddenSize, hiddenSize).T); offset += hiddenSize * hiddenSize; // Bi cpu.Assign(dw1Tensor.Slice(0, Range(0, hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize)).Reshape(hiddenSize, 1).T); offset += hiddenSize; // Bf cpu.Assign(dw1Tensor.Slice(0, Range(hiddenSize, 2 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize)).Reshape(hiddenSize, 1).T); offset += hiddenSize; // Ba cpu.Assign(dw1Tensor.Slice(0, Range(3 * hiddenSize, 4 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize)).Reshape(hiddenSize, 1).T); offset += hiddenSize; // Bo cpu.Assign(dw1Tensor.Slice(0, Range(2 * hiddenSize, 3 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize)).Reshape(hiddenSize, 1).T); } { // calc with direct LSTM implementation var x = Variable <float>(PartialShape.Create(seqLength, batchSize, inputSize)); var lstm = new Lstm <float>(x, hiddenSize, forgetBiasInit: 0.0); var exe = new Executor(ctx, lstm.Y); exe.Initalize(); // set input exe.AssignTensor(lstm.X, data.AsTensor()); // set states exe.AssignTensor(lstm.CX, c0.AsTensor()); exe.AssignTensor(lstm.HX, h0.AsTensor()); // set weights var w = exe.GetTensor(lstm.W); // Wi ctx.Assign(w.Slice(Range(1, inputSize + 1), Range(0, hiddenSize)), Fill(Shape.Create(inputSize, hiddenSize), wi)); // Wf ctx.Assign(w.Slice(Range(1, inputSize + 1), Range(hiddenSize, 2 * hiddenSize)), Fill(Shape.Create(inputSize, hiddenSize), wf)); // Wo ctx.Assign(w.Slice(Range(1, inputSize + 1), Range(2 * hiddenSize, 3 * hiddenSize)), Fill(Shape.Create(inputSize, hiddenSize), wo)); // Wa ctx.Assign(w.Slice(Range(1, inputSize + 1), Range(3 * hiddenSize, 4 * hiddenSize)), Fill(Shape.Create(inputSize, hiddenSize), wa)); // Ui ctx.Assign(w.Slice(Range(inputSize + 1, -1), Range(0, hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), ui)); // Uf ctx.Assign(w.Slice(Range(inputSize + 1, -1), Range(hiddenSize, 2 * hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), uf)); // Uo ctx.Assign(w.Slice(Range(inputSize + 1, -1), Range(2 * hiddenSize, 3 * hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), uo)); // Ua ctx.Assign(w.Slice(Range(inputSize + 1, -1), Range(3 * hiddenSize, 4 * hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), ua)); // Bi ctx.Assign(w.Slice(0, Range(0, hiddenSize)), Fill(Shape.Create(1, hiddenSize), bi)); // Bf ctx.Assign(w.Slice(0, Range(hiddenSize, 2 * hiddenSize)), Fill(Shape.Create(1, hiddenSize), bf)); // Bo ctx.Assign(w.Slice(0, Range(2 * hiddenSize, 3 * hiddenSize)), Fill(Shape.Create(1, hiddenSize), bo)); // Ba ctx.Assign(w.Slice(0, Range(3 * hiddenSize, 4 * hiddenSize)), Fill(Shape.Create(1, hiddenSize), ba)); exe.Forward(); y2 = exe.GetTensor(lstm.Y).ToArray3D(); cy2 = exe.GetTensor(lstm.CY).ToArray2D(); hy2 = exe.GetTensor(lstm.HY).ToArray2D(); exe.AssignGradient(lstm.Y, dy.AsTensor(), replace: true); exe.Backward(); dx2 = exe.GetGradient(lstm.X).ToArray3D(); dcx2 = exe.GetGradient(lstm.CX).Reshape(batchSize, hiddenSize).ToArray2D(); dhx2 = exe.GetGradient(lstm.HX).Reshape(batchSize, hiddenSize).ToArray2D(); dw2 = exe.GetGradient(lstm.W).ToArray2D(); } AreClose(y1, y2, error); AreClose(cy1, cy2, error); AreClose(hy1, hy2, error); AreClose(dx1, dx2, error); AreClose(dcx1, dcx2, error); AreClose(dhx1, dhx2, error); AreClose(dw1, dw2, error); }
public static void RnnAgainstRnnDynamic() { var ctx = Context.GpuContext(0); var inputSize = 5; var seqLength = 3; var batchSize = 2; var hiddenSize = 4; var error = 1e-5; var data = Context.CpuContext.Eval(RandomUniform <float>(-1, 1, Shape.Create(seqLength, batchSize, inputSize))).ToArray3D(); data.AsTensor(Shape.Create(seqLength * batchSize, inputSize)).Print(); var h0 = Context.CpuContext.Eval(RandomNormal <float>(Shape.Create(batchSize, hiddenSize))).ToArray2D(); var c0 = Context.CpuContext.Eval(RandomNormal <float>(Shape.Create(batchSize, hiddenSize))).ToArray2D(); var dy = Context.CpuContext.Eval(RandomUniform <float>(-1, 1, Shape.Create(seqLength, batchSize, hiddenSize))).ToArray3D(); float[,,] y1, y2, dx1, dx2; float[,] cy1, cy2, hy1, hy2; float[,] dcx1, dcx2, dhx1, dhx2; float[] dw1, dw2; { var x = Variable <float>(PartialShape.Create(seqLength, batchSize, inputSize)); var lstm = new Rnn <float>(new LstmRnnType(), x, 1, hiddenSize, dropout: 0.0); var exe = new Executor(ctx, lstm.Y); exe.Initalize(); // set input exe.AssignTensor(lstm.X, data.AsTensor()); // set states exe.AssignTensor(lstm.CX, c0.AsTensor(Shape.Create(1, batchSize, hiddenSize))); exe.AssignTensor(lstm.HX, h0.AsTensor(Shape.Create(1, batchSize, hiddenSize))); // set weigths, cuDNN matrices order: IFAO var w = exe.GetTensor(lstm.W).Reshape(inputSize * 4 + hiddenSize * 4 + 2 * 4, hiddenSize); SetWeights(ctx, w, inputSize, hiddenSize); exe.Forward(); y1 = exe.GetTensor(lstm.Y).ToArray3D(); cy1 = exe.GetTensor(lstm.CY).Reshape(batchSize, hiddenSize).ToArray2D(); hy1 = exe.GetTensor(lstm.HY).Reshape(batchSize, hiddenSize).ToArray2D(); exe.AssignGradient(lstm.Y, dy.AsTensor(), replace: true); exe.Backward(); dx1 = exe.GetGradient(lstm.X).ToArray3D(); dcx1 = exe.GetGradient(lstm.CX).Reshape(batchSize, hiddenSize).ToArray2D(); dhx1 = exe.GetGradient(lstm.HX).Reshape(batchSize, hiddenSize).ToArray2D(); dw1 = exe.GetGradient(lstm.W).ToArray(); // cuDNN weight is 1D linear blob } { var x = Variable <float>(PartialShape.Create(-1, -1, inputSize)); var lstm = new RnnDynamic <float>(new LstmRnnType(), x, 1, hiddenSize, dropout: 0.0); var exe = new Executor(ctx, lstm.Y); exe.Initalize(); // set input exe.AssignTensor(lstm.X, data.AsTensor()); // set states exe.AssignTensor(lstm.CX, c0.AsTensor(Shape.Create(1, batchSize, hiddenSize))); exe.AssignTensor(lstm.HX, h0.AsTensor(Shape.Create(1, batchSize, hiddenSize))); // set weigths, cuDNN matrices order: IFAO var w = exe.GetTensor(lstm.W).Reshape(inputSize * 4 + hiddenSize * 4 + 2 * 4, hiddenSize); SetWeights(ctx, w, inputSize, hiddenSize); exe.Forward(); y2 = exe.GetTensor(lstm.Y).ToArray3D(); cy2 = exe.GetTensor(lstm.CY).Reshape(batchSize, hiddenSize).ToArray2D(); hy2 = exe.GetTensor(lstm.HY).Reshape(batchSize, hiddenSize).ToArray2D(); exe.AssignGradient(lstm.Y, dy.AsTensor(), replace: true); exe.Backward(); dx2 = exe.GetGradient(lstm.X).ToArray3D(); dcx2 = exe.GetGradient(lstm.CX).Reshape(batchSize, hiddenSize).ToArray2D(); dhx2 = exe.GetGradient(lstm.HX).Reshape(batchSize, hiddenSize).ToArray2D(); dw2 = exe.GetGradient(lstm.W).ToArray(); } AreClose(y1, y2, error); AreClose(cy1, cy2, error); AreClose(hy1, hy2, error); AreClose(dx1, dx2, error); AreClose(dcx1, dcx2, error); AreClose(dhx1, dhx2, error); AreClose(dw1, dw2, error); }
private static void Mlp() { var symX = Symbol.Variable("X"); var symLabel = Symbol.Variable("label"); const int nLayers = 2; var layerSizes = new List <int>(new[] { 512, 10 }); var weights = new Symbol[nLayers]; var biases = new Symbol[nLayers]; var outputs = new Symbol[nLayers]; for (var i = 0; i < nLayers; i++) { var istr = i.ToString(); weights[i] = Symbol.Variable($"w{istr}"); biases[i] = Symbol.Variable($"b{istr}"); var fc = Operators.FullyConnected($"fc{istr}", i == 0 ? symX : outputs[i - 1], weights[i], biases[i], layerSizes[i]); outputs[i] = Operators.LeakyReLU($"act{istr}", fc); } var sym_out = Operators.SoftmaxOutput("softmax", outputs[nLayers - 1], symLabel); var ctx_dev = new Context(DeviceType.CPU, 0); var array_x = new NDArray(new Shape(128, 28), ctx_dev, false); var array_y = new NDArray(new Shape(128), ctx_dev, false); var aptr_x = new mx_float[128 * 28]; var aptr_y = new mx_float[128]; // we make the data by hand, in 10 classes, with some pattern for (var i = 0; i < 128; i++) { for (var j = 0; j < 28; j++) { aptr_x[i * 28 + j] = i % 10 * 1.0f; } aptr_y[i] = i % 10; } array_x.SyncCopyFromCPU(aptr_x, 128 * 28); array_x.WaitToRead(); array_y.SyncCopyFromCPU(aptr_y, 128); array_y.WaitToRead(); // init the parameters var array_w_1 = new NDArray(new Shape(512, 28), ctx_dev, false); var array_b_1 = new NDArray(new Shape(512), ctx_dev, false); var array_w_2 = new NDArray(new Shape(10, 512), ctx_dev, false); var array_b_2 = new NDArray(new Shape(10), ctx_dev, false); // the parameters should be initialized in some kind of distribution, // so it learns fast // but here just give a const value by hand array_w_1.Set(0.5f); array_b_1.Set(0.0f); array_w_2.Set(0.5f); array_b_2.Set(0.0f); // the grads var array_w_1_g = new NDArray(new Shape(512, 28), ctx_dev, false); var array_b_1_g = new NDArray(new Shape(512), ctx_dev, false); var array_w_2_g = new NDArray(new Shape(10, 512), ctx_dev, false); var array_b_2_g = new NDArray(new Shape(10), ctx_dev, false); // Bind the symolic network with the ndarray // all the input args var inArgs = new List <NDArray>(); inArgs.Add(array_x); inArgs.Add(array_w_1); inArgs.Add(array_b_1); inArgs.Add(array_w_2); inArgs.Add(array_b_2); inArgs.Add(array_y); // all the grads var argGradStore = new List <NDArray>(); argGradStore.Add(new NDArray()); // we don't need the grad of the input argGradStore.Add(array_w_1_g); argGradStore.Add(array_b_1_g); argGradStore.Add(array_w_2_g); argGradStore.Add(array_b_2_g); argGradStore.Add( new NDArray()); // neither do we need the grad of the loss // how to handle the grad var gradReqType = new List <OpReqType>(); gradReqType.Add(OpReqType.NullOp); gradReqType.Add(OpReqType.WriteTo); gradReqType.Add(OpReqType.WriteTo); gradReqType.Add(OpReqType.WriteTo); gradReqType.Add(OpReqType.WriteTo); gradReqType.Add(OpReqType.NullOp); var auxStates = new List <NDArray>(); Logging.LG("make the Executor"); using (var exe = new Executor(sym_out, ctx_dev, inArgs, argGradStore, gradReqType, auxStates)) { Logging.LG("Training"); const int maxIters = 20000; const float learningRate = 0.0001f; for (var iter = 0; iter < maxIters; ++iter) { exe.Forward(true); if (iter % 100 == 0) { Logging.LG($"epoch {iter}"); var @out = exe.Outputs; var cptr = new float[128 * 10]; @out[0].SyncCopyToCPU(cptr); NDArray.WaitAll(); OutputAccuracy(cptr, aptr_y); } // update the parameters exe.Backward(); for (var i = 1; i < 5; ++i) { using (var tmp = argGradStore[i] * learningRate) inArgs[i].Subtract(tmp); } NDArray.WaitAll(); } } }
public void Run() { Symbol lenet = CreateLenet(); //Symbol lenet = CreateFrom(@"C:\Works\Projects\80_Project_Python\mxnet\ocr\model\mnist-symbol.json"); /*setup basic configs*/ int valFold = 1; int W = 28; int H = 28; uint batchSize = 256; int maxEpoch = 20; float learning_rate = 0.05f; float weight_decay = 0.0001f; MnistDataSet ds = new MnistDataSet(@"C:\素材\data\train-images.idx3-ubyte", @"C:\素材\data\train-labels.idx1-ubyte"); //ds.Print(); List <float> listData = ds.Data; List <float> listLabel = ds.Label; int dataCount = ds.Count; using (FloatListHolder hData = listData.GetHolder()) using (FloatListHolder hLabel = listLabel.GetHolder()) { NDArray data_array = new NDArray(new Shape((uint)dataCount, 1, (uint)W, (uint)H), ctx_cpu, false); // store in main memory, and copy to // device memory while training NDArray label_array = new NDArray(new Shape((uint)dataCount), ctx_cpu, false); // it's also ok if just store them all in device memory data_array.SyncCopyFromCPU(hData.Handle, (ulong)(dataCount * W * H)); label_array.SyncCopyFromCPU(hLabel.Handle, (ulong)dataCount); data_array.WaitToRead(); label_array.WaitToRead(); uint train_num = (uint)(dataCount * (1 - valFold / 10.0)); train_data = data_array.Slice(0, train_num); train_label = label_array.Slice(0, train_num); val_data = data_array.Slice(train_num, (uint)dataCount); val_label = label_array.Slice(train_num, (uint)dataCount); Console.WriteLine("Data loaded ok!"); /*init some of the args*/ args_map["data"] = data_array.Slice(0, (uint)batchSize).Clone(ctx_dev); args_map["data_label"] = label_array.Slice(0, (uint)batchSize).Clone(ctx_dev); NDArray.WaitAll(); Console.WriteLine("Data sliced ok!"); lenet.InferArgsMap(ctx_dev, args_map, args_map, new XavierInitializer(2)); Optimizer opt = OptimizerRegistry.Find("sgd"); opt.SetParam("momentum", 0.9).SetParam("rescale_grad", 1.0 / batchSize); for (int ITER = 0; ITER < maxEpoch; ++ITER) { Stopwatch sw = new Stopwatch(); sw.Start(); uint start_index = 0; while (start_index < train_num) { if (start_index + batchSize > train_num) { start_index = train_num - batchSize; } args_map["data"] = train_data.Slice(start_index, start_index + batchSize).Clone(ctx_dev); args_map["data_label"] = train_label.Slice(start_index, start_index + batchSize).Clone(ctx_dev); start_index += batchSize; NDArray.WaitAll(); Executor exe = lenet.SimpleBind(ctx_dev, args_map, new XavierInitializer(2)); exe.Forward(true); exe.Backward(); exe.UpdateAll(opt, learning_rate, weight_decay); exe.Dispose(); } sw.Stop(); Console.WriteLine("Epoch[" + ITER + "] validation accuracy = " + ValAccuracy(batchSize, lenet) + ", time cost " + sw.Elapsed.TotalSeconds.ToString("0.00") + "s"); } } NDArray.Save("lenet.params", args_map); }