private static void Main() { const int imageSize = 28; int[] layers = { 128, 64, 10 }; const int batchSize = 100; const int maxEpoch = 10; const float learningRate = 0.1f; const float weightDecay = 1e-2f; var trainIter = new MXDataIter("MNISTIter") .SetParam("image", "./mnist_data/train-images-idx3-ubyte") .SetParam("label", "./mnist_data/train-labels-idx1-ubyte") .SetParam("batch_size", batchSize) .SetParam("flat", 1) .CreateDataIter(); var valIter = new MXDataIter("MNISTIter") .SetParam("image", "./mnist_data/t10k-images-idx3-ubyte") .SetParam("label", "./mnist_data/t10k-labels-idx1-ubyte") .SetParam("batch_size", batchSize) .SetParam("flat", 1) .CreateDataIter(); var net = Mlp(layers); Context ctx = Context.Cpu(); // Use CPU for training var args = new SortedDictionary <string, NDArray>(); args["X"] = new NDArray(new Shape(batchSize, imageSize * imageSize), ctx); args["label"] = new NDArray(new Shape(batchSize), ctx); // Let MXNet infer shapes other parameters such as weights net.InferArgsMap(ctx, args, args); // Initialize all parameters with uniform distribution U(-0.01, 0.01) var initializer = new Uniform(0.01f); foreach (var arg in args) { // arg.first is parameter name, and arg.second is the value initializer.Operator(arg.Key, arg.Value); } // Create sgd optimizer var opt = OptimizerRegistry.Find("sgd"); opt.SetParam("rescale_grad", 1.0 / batchSize) .SetParam("lr", learningRate) .SetParam("wd", weightDecay); // Create executor by binding parameters to the model using (var exec = net.SimpleBind(ctx, args)) { var argNames = net.ListArguments(); // Start training var sw = new Stopwatch(); for (var iter = 0; iter < maxEpoch; ++iter) { var samples = 0; trainIter.Reset(); sw.Restart(); while (trainIter.Next()) { samples += batchSize; var dataBatch = trainIter.GetDataBatch(); // Set data and label dataBatch.Data.CopyTo(args["X"]); dataBatch.Label.CopyTo(args["label"]); // Compute gradients exec.Forward(true); exec.Backward(); // Update parameters for (var i = 0; i < argNames.Count; ++i) { if (argNames[i] == "X" || argNames[i] == "label") { continue; } opt.Update(i, exec.ArgmentArrays[i], exec.GradientArrays[i]); } } sw.Stop(); var accuracy = new Accuracy(); valIter.Reset(); while (valIter.Next()) { var dataBatch = valIter.GetDataBatch(); dataBatch.Data.CopyTo(args["X"]); dataBatch.Label.CopyTo(args["label"]); // Forward pass is enough as no gradient is needed when evaluating exec.Forward(false); accuracy.Update(dataBatch.Label, exec.Outputs[0]); } var duration = sw.ElapsedMilliseconds / 1000.0; Logging.LG($"Epoch: {iter} {samples / duration} samples/sec Accuracy: {accuracy.Get()}"); } } MXNet.MXNotifyShutdown(); }
private static void Main() { /*basic config*/ const int batchSize = 256; const int maxEpo = 100; const float learningRate = 1e-4f; const float weightDecay = 1e-4f; /*context and net symbol*/ var ctx = Context.Gpu(); var net = AlexnetSymbol(2); /*args_map and aux_map is used for parameters' saving*/ var argsMap = new Dictionary <string, NDArray>(); var auxMap = new Dictionary <string, NDArray>(); /*we should tell mxnet the shape of data and label*/ argsMap["data"] = new NDArray(new Shape(batchSize, 3, 256, 256), ctx); argsMap["label"] = new NDArray(new Shape(batchSize), ctx); /*with data and label, executor can be generated varmatically*/ using (var exec = net.SimpleBind(ctx, argsMap)) { var argNames = net.ListArguments(); var auxiliaryDictionary = exec.AuxiliaryDictionary(); var argmentDictionary = exec.ArgmentDictionary(); /*if fine tune from some pre-trained model, we should load the parameters*/ // NDArray.Load("./model/alex_params_3", nullptr, &args_map); /*else, we should use initializer Xavier to init the params*/ var xavier = new Xavier(RandType.Gaussian, FactorType.In, 2.34f); foreach (var arg in argmentDictionary) { /*be careful here, the arg's name must has some specific ends or starts for * initializer to call*/ xavier.Operator(arg.Key, arg.Value); } /*print out to check the shape of the net*/ foreach (var s in net.ListArguments()) { Logging.LG(s); var sb = new StringBuilder(); var k = argmentDictionary[s].GetShape(); foreach (var i in k) { sb.Append($"{i} "); } Logging.LG(sb.ToString()); } /*these binary files should be generated using im2rc tools, which can be found * in mxnet/bin*/ var trainIter = new MXDataIter("ImageRecordIter") .SetParam("path_imglist", "./data/train.lst") .SetParam("path_imgrec", "./data/train.rec") .SetParam("data_shape", new Shape(3, 256, 256)) .SetParam("batch_size", batchSize) .SetParam("shuffle", 1) .CreateDataIter(); var valIter = new MXDataIter("ImageRecordIter") .SetParam("path_imglist", "./data/val.lst") .SetParam("path_imgrec", "./data/val.rec") .SetParam("data_shape", new Shape(3, 256, 256)) .SetParam("batch_size", batchSize) .CreateDataIter(); var opt = OptimizerRegistry.Find("ccsgd"); opt.SetParam("momentum", 0.9) .SetParam("rescale_grad", 1.0 / batchSize) .SetParam("clip_gradient", 10) .SetParam("lr", learningRate) .SetParam("wd", weightDecay); var accuracyTrain = new Accuracy(); var accuracyVal = new Accuracy(); var loglossVal = new LogLoss(); for (var iter = 0; iter < maxEpo; ++iter) { Logging.LG($"Train Epoch: {iter}"); /*reset the metric every epoch*/ accuracyTrain.Reset(); /*reset the data iter every epoch*/ trainIter.Reset(); while (trainIter.Next()) { var batch = trainIter.GetDataBatch(); Logging.LG($"{trainIter.GetDataBatch().Index.Length}"); /*use copyto to feed new data and label to the executor*/ batch.Data.CopyTo(argmentDictionary["data"]); batch.Label.CopyTo(argmentDictionary["label"]); exec.Forward(true); exec.Backward(); for (var i = 0; i < argNames.Count; ++i) { if (argNames[i] == "data" || argNames[i] == "label") { continue; } opt.Update(i, exec.ArgmentArrays[i], exec.GradientArrays[i]); } NDArray.WaitAll(); accuracyTrain.Update(batch.Label, exec.Outputs[0]); } Logging.LG($"ITER: {iter} Train Accuracy: {accuracyTrain.Get()}"); Logging.LG($"Val Epoch: {iter}"); accuracyVal.Reset(); valIter.Reset(); loglossVal.Reset(); while (valIter.Next()) { var batch = valIter.GetDataBatch(); Logging.LG($"{valIter.GetDataBatch().Index.Length}"); batch.Data.CopyTo(argmentDictionary["data"]); batch.Label.CopyTo(argmentDictionary["label"]); exec.Forward(false); NDArray.WaitAll(); accuracyVal.Update(batch.Label, exec.Outputs[0]); loglossVal.Update(batch.Label, exec.Outputs[0]); } Logging.LG($"ITER: {iter} Val Accuracy: {accuracyVal.Get()}"); Logging.LG($"ITER: {iter} Val LogLoss: {loglossVal.Get()}"); /*save the parameters*/ var savePathParam = $"./model/alex_param_{iter}"; var saveArgs = argmentDictionary; /*we do not want to save the data and label*/ if (saveArgs.ContainsKey("data")) { saveArgs.Remove("data"); } if (saveArgs.ContainsKey("label")) { saveArgs.Remove("label"); } /*the alexnet does not get any aux array, so we do not need to save * aux_map*/ Logging.LG($"ITER: {iter} Saving to...{savePathParam}"); NDArray.Save(savePathParam, saveArgs); } /*don't foget to release the executor*/ } MXNet.MXNotifyShutdown(); }
private static void Main(string[] args) { //var minScore = float.Parse(args[0], NumberStyles.Float, null); var minScore = 0.9f; const int imageSize = 28; var layers = new[] { 128, 64, 10 }; const int batchSize = 100; const int maxEpoch = 10; const float learningRate = 0.1f; const float weightDecay = 1e-2f; var trainIter = new MXDataIter("MNISTIter") .SetParam("image", "./mnist_data/train-images-idx3-ubyte") .SetParam("label", "./mnist_data/train-labels-idx1-ubyte") .SetParam("batch_size", batchSize) .SetParam("flat", 1) .CreateDataIter(); var valIter = new MXDataIter("MNISTIter") .SetParam("image", "./mnist_data/t10k-images-idx3-ubyte") .SetParam("label", "./mnist_data/t10k-labels-idx1-ubyte") .SetParam("batch_size", batchSize) .SetParam("flat", 1) .CreateDataIter(); var net = Mlp(layers); var ctx = Context.Cpu(); // Use GPU for training var dictionary = new Dictionary <string, NDArray>(); dictionary["X"] = new NDArray(new Shape(batchSize, imageSize * imageSize), ctx); dictionary["label"] = new NDArray(new Shape(batchSize), ctx); // Let MXNet infer shapes of other parameters such as weights net.InferArgsMap(ctx, dictionary, dictionary); // Initialize all parameters with uniform distribution U(-0.01, 0.01) var initializer = new Uniform(0.01f); foreach (var arg in dictionary) { // arg.first is parameter name, and arg.second is the value initializer.Operator(arg.Key, arg.Value); } // Create sgd optimizer var opt = OptimizerRegistry.Find("sgd"); opt.SetParam("rescale_grad", 1.0 / batchSize) .SetParam("lr", learningRate) .SetParam("wd", weightDecay); var lrSch = new UniquePtr <LRScheduler>(new FactorScheduler(5000, 0.1f)); opt.SetLearningRateScheduler(lrSch); // Create executor by binding parameters to the model using (var exec = net.SimpleBind(ctx, dictionary)) { var argNames = net.ListArguments(); float score = 0; // Start training var sw = new Stopwatch(); for (var iter = 0; iter < maxEpoch; ++iter) { var samples = 0; trainIter.Reset(); sw.Restart(); while (trainIter.Next()) { samples += batchSize; var dataBatch = trainIter.GetDataBatch(); // Data provided by DataIter are stored in memory, should be copied to GPU first. dataBatch.Data.CopyTo(dictionary["X"]); dataBatch.Label.CopyTo(dictionary["label"]); // CopyTo is imperative, need to wait for it to complete. NDArray.WaitAll(); // Compute gradients exec.Forward(true); exec.Backward(); // Update parameters for (var i = 0; i < argNames.Count; ++i) { if (argNames[i] == "X" || argNames[i] == "label") { continue; } var weight = exec.ArgmentArrays[i]; var grad = exec.GradientArrays[i]; opt.Update(i, weight, grad); } } sw.Stop(); var acc = new Accuracy(); valIter.Reset(); while (valIter.Next()) { var dataBatch = valIter.GetDataBatch(); dataBatch.Data.CopyTo(dictionary["X"]); dataBatch.Label.CopyTo(dictionary["label"]); NDArray.WaitAll(); // Only forward pass is enough as no gradient is needed when evaluating exec.Forward(false); acc.Update(dataBatch.Label, exec.Outputs[0]); } var duration = sw.ElapsedMilliseconds / 1000.0; var message = $"Epoch: {iter} {samples / duration} samples/sec Accuracy: {acc.Get()}"; Logging.LG(message); score = acc.Get(); } MXNet.MXNotifyShutdown(); var ret = score >= minScore ? 0 : 1; Console.WriteLine($"{ret}"); } }
private static void Main() { /*setup basic configs*/ const int W = 28; const int H = 28; const int batchSize = 128; const int maxEpoch = 100; const float learningRate = 1e-4f; const float weightDecay = 1e-4f; var contest = Context.Gpu(); var lenet = LenetSymbol(); var argsMap = new SortedDictionary <string, NDArray>(); argsMap["data"] = new NDArray(new Shape(batchSize, 1, W, H), contest); argsMap["data_label"] = new NDArray(new Shape(batchSize), contest); lenet.InferArgsMap(contest, argsMap, argsMap); argsMap["fc1_w"] = new NDArray(new Shape(500, 4 * 4 * 50), contest); NDArray.SampleGaussian(0, 1, argsMap["fc1_w"]); argsMap["fc2_b"] = new NDArray(new Shape(10), contest); argsMap["fc2_b"].Set(0); var trainIter = new MXDataIter("MNISTIter") .SetParam("image", "./mnist_data/train-images-idx3-ubyte") .SetParam("label", "./mnist_data/train-labels-idx1-ubyte") .SetParam("batch_size", batchSize) .SetParam("shuffle", 1) .SetParam("flat", 0) .CreateDataIter(); var valIter = new MXDataIter("MNISTIter") .SetParam("image", "./mnist_data/t10k-images-idx3-ubyte") .SetParam("label", "./mnist_data/t10k-labels-idx1-ubyte") .CreateDataIter(); var opt = OptimizerRegistry.Find("ccsgd"); opt.SetParam("momentum", 0.9) .SetParam("rescale_grad", 1.0) .SetParam("clip_gradient", 10) .SetParam("lr", learningRate) .SetParam("wd", weightDecay); using (var exec = lenet.SimpleBind(contest, argsMap)) { var argNames = lenet.ListArguments(); // Create metrics var trainAccuracy = new Accuracy(); var valAccuracy = new Accuracy(); var sw = new Stopwatch(); for (var iter = 0; iter < maxEpoch; ++iter) { var samples = 0; trainIter.Reset(); trainAccuracy.Reset(); sw.Restart(); while (trainIter.Next()) { samples += batchSize; var dataBatch = trainIter.GetDataBatch(); dataBatch.Data.CopyTo(argsMap["data"]); dataBatch.Label.CopyTo(argsMap["data_label"]); NDArray.WaitAll(); // Compute gradients exec.Forward(true); exec.Backward(); // Update parameters for (var i = 0; i < argNames.Count; ++i) { if (argNames[i] == "data" || argNames[i] == "data_label") { continue; } opt.Update(i, exec.ArgmentArrays[i], exec.GradientArrays[i]); } // Update metric trainAccuracy.Update(dataBatch.Label, exec.Outputs[0]); } // one epoch of training is finished sw.Stop(); var duration = sw.ElapsedMilliseconds / 1000.0; Logging.LG($"Epoch[{iter}] {samples / duration} samples/sec Train-Accuracy={trainAccuracy.Get()}"); valIter.Reset(); valAccuracy.Reset(); var accuracy = new Accuracy(); valIter.Reset(); while (valIter.Next()) { var dataBatch = valIter.GetDataBatch(); dataBatch.Data.CopyTo(argsMap["data"]); dataBatch.Label.CopyTo(argsMap["data_label"]); NDArray.WaitAll(); // Only forward pass is enough as no gradient is needed when evaluating exec.Forward(false); NDArray.WaitAll(); accuracy.Update(dataBatch.Label, exec.Outputs[0]); valAccuracy.Update(dataBatch.Label, exec.Outputs[0]); } Logging.LG($"Epoch[{iter}] Val-Accuracy={valAccuracy.Get()}"); } } MXNet.MXNotifyShutdown(); }
private static void Main() { const uint batchSize = 50; const uint maxEpoch = 100; const float learningRate = 1e-4f; const float weightDecay = 1e-4f; var googlenet = GoogleNetSymbol(101 + 1); // +1 is BACKGROUND_Google var argsMap = new Dictionary <string, NDArray>(); var auxMap = new Dictionary <string, NDArray>(); // change device type if you want to use GPU var context = Context.Cpu(); argsMap["data"] = new NDArray(new Shape(batchSize, 3, 256, 256), context); argsMap["data_label"] = new NDArray(new Shape(batchSize), context); googlenet.InferArgsMap(Context.Cpu(), argsMap, argsMap); var trainIter = new MXDataIter("ImageRecordIter") .SetParam("path_imglist", "train.lst") .SetParam("path_imgrec", "train.rec") .SetParam("data_shape", new Shape(3, 256, 256)) .SetParam("batch_size", batchSize) .SetParam("shuffle", 1) .CreateDataIter(); var valIter = new MXDataIter("ImageRecordIter") .SetParam("path_imglist", "val.lst") .SetParam("path_imgrec", "val.rec") .SetParam("data_shape", new Shape(3, 256, 256)) .SetParam("batch_size", batchSize) .CreateDataIter(); var opt = OptimizerRegistry.Find("ccsgd"); opt.SetParam("momentum", 0.9) .SetParam("rescale_grad", 1.0 / batchSize) .SetParam("clip_gradient", 10) .SetParam("lr", learningRate) .SetParam("wd", weightDecay); using (var exec = googlenet.SimpleBind(Context.Cpu(), argsMap)) { var argNames = googlenet.ListArguments(); for (var iter = 0; iter < maxEpoch; ++iter) { Logging.LG($"Epoch: {iter}"); trainIter.Reset(); while (trainIter.Next()) { var dataBatch = trainIter.GetDataBatch(); dataBatch.Data.CopyTo(argsMap["data"]); dataBatch.Label.CopyTo(argsMap["data_label"]); NDArray.WaitAll(); exec.Forward(true); exec.Backward(); for (var i = 0; i < argNames.Count; ++i) { if (argNames[i] == "data" || argNames[i] == "data_label") { continue; } var weight = exec.ArgmentArrays[i]; var grad = exec.GradientArrays[i]; opt.Update(i, weight, grad); } } var acu = new Accuracy(); valIter.Reset(); while (valIter.Next()) { var dataBatch = valIter.GetDataBatch(); dataBatch.Data.CopyTo(argsMap["data"]); dataBatch.Label.CopyTo(argsMap["data_label"]); NDArray.WaitAll(); exec.Forward(false); NDArray.WaitAll(); acu.Update(dataBatch.Label, exec.Outputs[0]); } Logging.LG($"Accuracy: {acu.Get()}"); } } MXNet.MXNotifyShutdown(); }