[Test] public void CompareMultiLayerPerceptron() { var model = MultiLayerPerceptronModel(); var ctx = Context.GpuContext(0); var memMb = ctx.ToGpuContext().Gpu.Device.TotalMemory / 1024.0 / 1024.0; if (memMb < 4096.0) { Assert.Inconclusive("Need more Gpu memory."); } var opt = new GradientDescentOptimizer(ctx, model.Loss.Loss, 0.00008); // now we need to initalize the parameters for the optimizer opt.Initalize(); // load mnist data var mnist = new MNIST(); var batcher = new Batcher(ctx, mnist.TrainImages, mnist.TrainLabels); var timer = Stopwatch.StartNew(); for (var i = 0; i < 1; ++i) { batcher.Next(5000, opt, model.Images, model.Labels); opt.Forward(); opt.Backward(); opt.Optimize(); } timer.Stop(); Console.WriteLine(timer.Elapsed); timer.Restart(); for (var i = 0; i < 5; ++i) { batcher.Next(10000, opt, model.Images, model.Labels); opt.Forward(); opt.Backward(); opt.Optimize(); } ctx.ToGpuContext().Stream.Synchronize(); timer.Stop(); Console.WriteLine(timer.Elapsed); timer.Restart(); PrintResult(opt, model, mnist.TestImages, mnist.TestLabels); timer.Stop(); Console.WriteLine(timer.Elapsed); CleanMem_(); }
[Test] public void CompareConvolutionalNeuralNetwork() { var model = ConvolutionalNeuralNetworkModel(); var ctx = Context.GpuContext(0); var memMb = ctx.ToGpuContext().Gpu.Device.TotalMemory / 1024.0 / 1024.0; if (memMb < 4096.0) { Assert.Inconclusive("Need more Gpu memory."); } var opt = new GradientDescentOptimizer(ctx, model.Loss.Loss, 0.000008); opt.Initalize(); var mnist = new MNIST(); var batcher = new Batcher(ctx, mnist.TrainImages, mnist.TrainLabels); var timer = Stopwatch.StartNew(); for (var i = 0; i < 2; ++i) { batcher.Next(2500, opt, model.Images, model.Labels); opt.Forward(); opt.Backward(); opt.Optimize(); } timer.Stop(); Console.WriteLine(timer.Elapsed); timer.Restart(); for (var i = 0; i < 20; ++i) { batcher.Next(2500, opt, model.Images, model.Labels); opt.Forward(); opt.Backward(); opt.Optimize(); } ctx.ToGpuContext().Stream.Synchronize(); timer.Stop(); Console.WriteLine(timer.Elapsed); timer.Restart(); PrintResult(opt, model, mnist.TestImages, mnist.TestLabels); timer.Stop(); Console.WriteLine(timer.Elapsed); CleanMem_(); }
[Test] public void MultinomialRegression() { CleanMem_(); const long batchSize = 1000L; const long epochs = 3; var model = MultinomialRegressionModel(); var ctx = Context.GpuContext(0); var opt = new GradientDescentOptimizer(ctx, model.Loss.Loss, 0.0005); opt.Initalize(); var mnist = new MNIST(); var batcher = new Batcher(ctx, mnist.TrainImages, mnist.TrainLabels); for (var e = 1; e <= epochs; ++e) { var i = 0; while (batcher.Next(batchSize, opt, model.Images, model.Labels)) { i++; opt.Forward(); opt.Backward(); opt.Optimize(); if ((i % 10 == 0) || ((i == 1) && (e == 1))) { PrintStatus(e, i, opt, model, mnist.ValidationImages, mnist.ValidationLabels); } } } PrintResult(opt, model, mnist.TestImages, mnist.TestLabels); CleanMem_(); }
//runs the backbone of the network (forward and backward prop and other stuff) [Test] public void LinearNeuron(Datas data) { CleanMem_(); const long BatchSize = 1000L; const long Epoch = 5; var model = LinearModel(); var ctx = Context.GpuContext(0); //makes sure gpu has 2 or more GB of ram var memMB = ctx.ToGpuContext().Gpu.Device.TotalMemory / 1024.0 / 1024.0; if (memMB < 4096.0) { Assert.Inconclusive("Need more gpu mem"); } var opt = new GradientDescentOptimizer(ctx, model.Loss.Loss, 0.00005); opt.Initalize(); var batcher = new Batcher(ctx, data.TrainText, data.TrainStory); for (var e = 1; e < Epoch; e++) { int i = 0; while (batcher.Next(BatchSize, opt, model.Text, model.Story)) { i++; opt.Forward(); opt.Backward(); opt.Optimize(); if ((i % 10 == 0) || (i == 1 && e == 1)) { PrintStatus(e, i, opt, model, data.TrainText, data.TrainStory); } } } //PrintResult(opt, model, data.TestText, data.TestStory); //Need to make some place to dump the weights and biases, but not exactly sure how... maybe just have it write a story right away? CleanMem_(); }
public static void SimpleLogisticRegression() { //const int N = 8; //const int D = 5; //const int P = 3; //const double learn = 0.001; const int N = 100; const int D = 784; const int P = 10; const double learn = 0.00005; var input = Variable <double>(); var label = Variable <double>(); var weights = Parameter(0.01 * RandomUniform <double>(Shape.Create(D, P))); var pred = Dot(input, weights); var loss = L2Loss(pred, label); var ctx = Context.GpuContext(0); var opt = new GradientDescentOptimizer(ctx, loss, learn); // set some data var inputData = new double[N, D]; var matA = new double[D, P]; var matB = new double[N, P]; NormalRandomArray(inputData); NormalRandomArray(matA); NormalRandomArray(matB); var labelData = Dot(inputData, matA).Add(matB.Mul(0.1)); opt.AssignTensor(input, inputData.AsTensor()); opt.AssignTensor(label, labelData.AsTensor()); opt.Initalize(); for (var i = 0; i < 800; ++i) { opt.Forward(); opt.Backward(); opt.Optimize(); if (i % 20 == 0) { Console.WriteLine($"loss = {opt.GetTensor(loss).ToScalar()}"); } } }
public Model(Context ctx, Config cfg, bool isTraining = true, bool usingCuDnn = true) { Config = cfg; IsTraining = isTraining; UsingCuDnn = usingCuDnn; Inputs = Variable <int>(PartialShape.Create(cfg.NumSteps, cfg.BatchSize)); Targets = Variable <int>(PartialShape.Create(cfg.NumSteps, cfg.BatchSize)); // embedding Embedding = new Embedding <float>(Inputs, cfg.VocabSize, cfg.HiddenSize, initScale: cfg.InitScale); // add dropout EmbeddedOutput = Embedding.Output; if (isTraining && cfg.KeepProb < 1.0) { var dropout = new Dropout <float>(EmbeddedOutput, dropoutProb: 1.0 - cfg.KeepProb); EmbeddedOutput = dropout.Output; } // rnn layer, dropout for intermediate lstm layers and for output if (usingCuDnn) { RnnAccelerated = new Rnn <float>(new LstmRnnType(forgetBiasInit: 0.0), EmbeddedOutput, cfg.NumLayers, cfg.HiddenSize, isTraining: isTraining, dropout: isTraining && cfg.KeepProb < 1.0 ? 1.0 - Config.KeepProb : 0.0); RnnOutput = RnnAccelerated.Y; if (isTraining && cfg.KeepProb < 1.0) { var dropout = new Dropout <float>(RnnOutput, dropoutProb: 1.0 - cfg.KeepProb); RnnOutput = dropout.Output; } } else { RnnDirect = new Lstm <float> [cfg.NumLayers]; for (var i = 0; i < cfg.NumLayers; ++i) { var lstm = new Lstm <float>(i == 0 ? EmbeddedOutput : RnnOutput, cfg.HiddenSize, forgetBiasInit: 0.0); RnnDirect[i] = lstm; RnnOutput = lstm.Y; if (isTraining && cfg.KeepProb < 1.0) { var dropout = new Dropout <float>(RnnOutput, dropoutProb: 1.0 - cfg.KeepProb); RnnOutput = dropout.Output; } } } FC = new FullyConnected <float>(RnnOutput.Reshape(RnnOutput.Shape[0] * RnnOutput.Shape[1], RnnOutput.Shape[2]), cfg.VocabSize); Loss = new SoftmaxCrossEntropySparse <float>(FC.Output, Targets.Reshape(Targets.Shape[0] * Targets.Shape[1])); Optimizer = new GradientDescentOptimizer(ctx, Loss.Loss, cfg.LearningRate, new GlobalNormGradientClipper(cfg.MaxGradNorm)); // warmup to force JIT compilation to get timings without JIT overhead Optimizer.Initalize(); ResetStates(); Optimizer.AssignTensor(Inputs, Fill(Shape.Create(Inputs.Shape.AsArray), 0)); Optimizer.AssignTensor(Targets, Fill(Shape.Create(Targets.Shape.AsArray), 0)); Optimizer.Forward(); if (isTraining) { Optimizer.Backward(); } // now reset states Optimizer.Initalize(); ResetStates(); }