예제 #1
0
        private static void MNISTTraining()
        {
            uint batchSize = 32;
            var  trainIter = new MXDataIter("MNISTIter")
                             .SetParam("image", "./mnist_data/train-images-idx3-ubyte")
                             .SetParam("label", "./mnist_data/train-labels-idx1-ubyte")
                             .SetParam("batch_size", batchSize)
                             .SetParam("flat", 1)
                             .CreateDataIter();
            var valIter = new MXDataIter("MNISTIter")
                          .SetParam("image", "./mnist_data/t10k-images-idx3-ubyte")
                          .SetParam("label", "./mnist_data/t10k-labels-idx1-ubyte")
                          .SetParam("batch_size", batchSize)
                          .SetParam("flat", 1)
                          .CreateDataIter();

            var model = new Sequential(new Shape(28 * 28), 10);

            model.AddHidden(new Dense(28 * 28, ActivationType.ReLU, new GlorotUniform()));
            model.AddHidden(new Dropout(0.25f));
            model.AddHidden(new Dense(28 * 28, ActivationType.ReLU, new GlorotUniform()));

            model.Compile(OptimizerType.SGD, LossType.CategorialCrossEntropy, "accuracy");
            model.Fit(trainIter, 10, batchSize, valIter);
        }
예제 #2
0
        public static ValueTuple <DataIter, DataIter> MNIST(string trainImagesPath, string trainLabelPath, string valImagesPath, string valLabelPath, uint batch_size = 32, int flat = 0)
        {
            var trainIter = new MXDataIter("MNISTIter");

            trainIter.SetParam("image", trainImagesPath);
            trainIter.SetParam("label", trainLabelPath);
            trainIter.SetParam("batch_size", batch_size);
            trainIter.SetParam("dtype", "float32");
            trainIter.SetParam("flat", flat.ToString());

            var valIter = new MXDataIter("MNISTIter");

            valIter.SetParam("image", valImagesPath);
            valIter.SetParam("label", valLabelPath);
            valIter.SetParam("batch_size", batch_size);
            valIter.SetParam("dtype", "float32");
            valIter.SetParam("flat", flat.ToString());

            return(ValueTuple.Create(trainIter, valIter));
        }
예제 #3
0
        private static void Main()
        {
            const int imageSize = 28;

            int[]       layers       = { 128, 64, 10 };
            const int   batchSize    = 100;
            const int   maxEpoch     = 10;
            const float learningRate = 0.1f;
            const float weightDecay  = 1e-2f;

            var trainIter = new MXDataIter("MNISTIter")
                            .SetParam("image", "./mnist_data/train-images-idx3-ubyte")
                            .SetParam("label", "./mnist_data/train-labels-idx1-ubyte")
                            .SetParam("batch_size", batchSize)
                            .SetParam("flat", 1)
                            .CreateDataIter();
            var valIter = new MXDataIter("MNISTIter")
                          .SetParam("image", "./mnist_data/t10k-images-idx3-ubyte")
                          .SetParam("label", "./mnist_data/t10k-labels-idx1-ubyte")
                          .SetParam("batch_size", batchSize)
                          .SetParam("flat", 1)
                          .CreateDataIter();

            var net = Mlp(layers);

            Context ctx = Context.Cpu();  // Use CPU for training

            var args = new SortedDictionary <string, NDArray>();

            args["X"]     = new NDArray(new Shape(batchSize, imageSize * imageSize), ctx);
            args["label"] = new NDArray(new Shape(batchSize), ctx);
            // Let MXNet infer shapes other parameters such as weights
            net.InferArgsMap(ctx, args, args);

            // Initialize all parameters with uniform distribution U(-0.01, 0.01)
            var initializer = new Uniform(0.01f);

            foreach (var arg in args)
            {
                // arg.first is parameter name, and arg.second is the value
                initializer.Operator(arg.Key, arg.Value);
            }

            // Create sgd optimizer
            var opt = OptimizerRegistry.Find("sgd");

            opt.SetParam("rescale_grad", 1.0 / batchSize)
            .SetParam("lr", learningRate)
            .SetParam("wd", weightDecay);

            // Create executor by binding parameters to the model
            using (var exec = net.SimpleBind(ctx, args))
            {
                var argNames = net.ListArguments();

                // Start training
                var sw = new Stopwatch();
                for (var iter = 0; iter < maxEpoch; ++iter)
                {
                    var samples = 0;
                    trainIter.Reset();

                    sw.Restart();

                    while (trainIter.Next())
                    {
                        samples += batchSize;
                        var dataBatch = trainIter.GetDataBatch();
                        // Set data and label
                        dataBatch.Data.CopyTo(args["X"]);
                        dataBatch.Label.CopyTo(args["label"]);

                        // Compute gradients
                        exec.Forward(true);
                        exec.Backward();
                        // Update parameters
                        for (var i = 0; i < argNames.Count; ++i)
                        {
                            if (argNames[i] == "X" || argNames[i] == "label")
                            {
                                continue;
                            }

                            opt.Update(i, exec.ArgmentArrays[i], exec.GradientArrays[i]);
                        }
                    }

                    sw.Stop();

                    var accuracy = new Accuracy();
                    valIter.Reset();
                    while (valIter.Next())
                    {
                        var dataBatch = valIter.GetDataBatch();
                        dataBatch.Data.CopyTo(args["X"]);
                        dataBatch.Label.CopyTo(args["label"]);
                        // Forward pass is enough as no gradient is needed when evaluating
                        exec.Forward(false);
                        accuracy.Update(dataBatch.Label, exec.Outputs[0]);
                    }

                    var duration = sw.ElapsedMilliseconds / 1000.0;
                    Logging.LG($"Epoch: {iter} {samples / duration} samples/sec Accuracy: {accuracy.Get()}");
                }
            }

            MXNet.MXNotifyShutdown();
        }
예제 #4
0
        private static void Main()
        {
            /*basic config*/
            const int   batchSize    = 256;
            const int   maxEpo       = 100;
            const float learningRate = 1e-4f;
            const float weightDecay  = 1e-4f;

            /*context and net symbol*/
            var ctx = Context.Gpu();
            var net = AlexnetSymbol(2);

            /*args_map and aux_map is used for parameters' saving*/
            var argsMap = new Dictionary <string, NDArray>();
            var auxMap  = new Dictionary <string, NDArray>();

            /*we should tell mxnet the shape of data and label*/
            argsMap["data"]  = new NDArray(new Shape(batchSize, 3, 256, 256), ctx);
            argsMap["label"] = new NDArray(new Shape(batchSize), ctx);

            /*with data and label, executor can be generated varmatically*/
            using (var exec = net.SimpleBind(ctx, argsMap))
            {
                var argNames            = net.ListArguments();
                var auxiliaryDictionary = exec.AuxiliaryDictionary();
                var argmentDictionary   = exec.ArgmentDictionary();

                /*if fine tune from some pre-trained model, we should load the parameters*/
                // NDArray.Load("./model/alex_params_3", nullptr, &args_map);
                /*else, we should use initializer Xavier to init the params*/
                var xavier = new Xavier(RandType.Gaussian, FactorType.In, 2.34f);
                foreach (var arg in argmentDictionary)
                {
                    /*be careful here, the arg's name must has some specific ends or starts for
                     * initializer to call*/
                    xavier.Operator(arg.Key, arg.Value);
                }

                /*print out to check the shape of the net*/
                foreach (var s in net.ListArguments())
                {
                    Logging.LG(s);

                    var sb = new StringBuilder();
                    var k  = argmentDictionary[s].GetShape();
                    foreach (var i in k)
                    {
                        sb.Append($"{i} ");
                    }

                    Logging.LG(sb.ToString());
                }

                /*these binary files should be generated using im2rc tools, which can be found
                 * in mxnet/bin*/
                var trainIter = new MXDataIter("ImageRecordIter")
                                .SetParam("path_imglist", "./data/train.lst")
                                .SetParam("path_imgrec", "./data/train.rec")
                                .SetParam("data_shape", new Shape(3, 256, 256))
                                .SetParam("batch_size", batchSize)
                                .SetParam("shuffle", 1)
                                .CreateDataIter();
                var valIter = new MXDataIter("ImageRecordIter")
                              .SetParam("path_imglist", "./data/val.lst")
                              .SetParam("path_imgrec", "./data/val.rec")
                              .SetParam("data_shape", new Shape(3, 256, 256))
                              .SetParam("batch_size", batchSize)
                              .CreateDataIter();

                var opt = OptimizerRegistry.Find("ccsgd");
                opt.SetParam("momentum", 0.9)
                .SetParam("rescale_grad", 1.0 / batchSize)
                .SetParam("clip_gradient", 10)
                .SetParam("lr", learningRate)
                .SetParam("wd", weightDecay);

                var accuracyTrain = new Accuracy();
                var accuracyVal   = new Accuracy();
                var loglossVal    = new LogLoss();
                for (var iter = 0; iter < maxEpo; ++iter)
                {
                    Logging.LG($"Train Epoch: {iter}");
                    /*reset the metric every epoch*/
                    accuracyTrain.Reset();
                    /*reset the data iter every epoch*/
                    trainIter.Reset();
                    while (trainIter.Next())
                    {
                        var batch = trainIter.GetDataBatch();
                        Logging.LG($"{trainIter.GetDataBatch().Index.Length}");
                        /*use copyto to feed new data and label to the executor*/
                        batch.Data.CopyTo(argmentDictionary["data"]);
                        batch.Label.CopyTo(argmentDictionary["label"]);
                        exec.Forward(true);
                        exec.Backward();
                        for (var i = 0; i < argNames.Count; ++i)
                        {
                            if (argNames[i] == "data" || argNames[i] == "label")
                            {
                                continue;
                            }
                            opt.Update(i, exec.ArgmentArrays[i], exec.GradientArrays[i]);
                        }

                        NDArray.WaitAll();
                        accuracyTrain.Update(batch.Label, exec.Outputs[0]);
                    }
                    Logging.LG($"ITER: {iter} Train Accuracy: {accuracyTrain.Get()}");

                    Logging.LG($"Val Epoch: {iter}");
                    accuracyVal.Reset();
                    valIter.Reset();
                    loglossVal.Reset();
                    while (valIter.Next())
                    {
                        var batch = valIter.GetDataBatch();
                        Logging.LG($"{valIter.GetDataBatch().Index.Length}");
                        batch.Data.CopyTo(argmentDictionary["data"]);
                        batch.Label.CopyTo(argmentDictionary["label"]);
                        exec.Forward(false);
                        NDArray.WaitAll();
                        accuracyVal.Update(batch.Label, exec.Outputs[0]);
                        loglossVal.Update(batch.Label, exec.Outputs[0]);
                    }
                    Logging.LG($"ITER: {iter} Val Accuracy: {accuracyVal.Get()}");
                    Logging.LG($"ITER: {iter} Val LogLoss: {loglossVal.Get()}");

                    /*save the parameters*/
                    var savePathParam = $"./model/alex_param_{iter}";
                    var saveArgs      = argmentDictionary;
                    /*we do not want to save the data and label*/
                    if (saveArgs.ContainsKey("data"))
                    {
                        saveArgs.Remove("data");
                    }
                    if (saveArgs.ContainsKey("label"))
                    {
                        saveArgs.Remove("label");
                    }

                    /*the alexnet does not get any aux array, so we do not need to save
                     * aux_map*/
                    Logging.LG($"ITER: {iter} Saving to...{savePathParam}");
                    NDArray.Save(savePathParam, saveArgs);
                }
                /*don't foget to release the executor*/
            }

            MXNet.MXNotifyShutdown();
        }
예제 #5
0
        private static void Main()
        {
            /*setup basic configs*/
            const int   W            = 28;
            const int   H            = 28;
            const int   batchSize    = 128;
            const int   maxEpoch     = 100;
            const float learningRate = 1e-4f;
            const float weightDecay  = 1e-4f;

            var contest = Context.Gpu();

            var lenet   = LenetSymbol();
            var argsMap = new SortedDictionary <string, NDArray>();

            argsMap["data"]       = new NDArray(new Shape(batchSize, 1, W, H), contest);
            argsMap["data_label"] = new NDArray(new Shape(batchSize), contest);
            lenet.InferArgsMap(contest, argsMap, argsMap);

            argsMap["fc1_w"] = new NDArray(new Shape(500, 4 * 4 * 50), contest);
            NDArray.SampleGaussian(0, 1, argsMap["fc1_w"]);
            argsMap["fc2_b"] = new NDArray(new Shape(10), contest);
            argsMap["fc2_b"].Set(0);

            var trainIter = new MXDataIter("MNISTIter")
                            .SetParam("image", "./mnist_data/train-images-idx3-ubyte")
                            .SetParam("label", "./mnist_data/train-labels-idx1-ubyte")
                            .SetParam("batch_size", batchSize)
                            .SetParam("shuffle", 1)
                            .SetParam("flat", 0)
                            .CreateDataIter();
            var valIter = new MXDataIter("MNISTIter")
                          .SetParam("image", "./mnist_data/t10k-images-idx3-ubyte")
                          .SetParam("label", "./mnist_data/t10k-labels-idx1-ubyte")
                          .CreateDataIter();

            var opt = OptimizerRegistry.Find("ccsgd");

            opt.SetParam("momentum", 0.9)
            .SetParam("rescale_grad", 1.0)
            .SetParam("clip_gradient", 10)
            .SetParam("lr", learningRate)
            .SetParam("wd", weightDecay);

            using (var exec = lenet.SimpleBind(contest, argsMap))
            {
                var argNames = lenet.ListArguments();

                // Create metrics
                var trainAccuracy = new Accuracy();
                var valAccuracy   = new Accuracy();

                var sw = new Stopwatch();
                for (var iter = 0; iter < maxEpoch; ++iter)
                {
                    var samples = 0;
                    trainIter.Reset();
                    trainAccuracy.Reset();

                    sw.Restart();

                    while (trainIter.Next())
                    {
                        samples += batchSize;
                        var dataBatch = trainIter.GetDataBatch();

                        dataBatch.Data.CopyTo(argsMap["data"]);
                        dataBatch.Label.CopyTo(argsMap["data_label"]);
                        NDArray.WaitAll();

                        // Compute gradients
                        exec.Forward(true);
                        exec.Backward();

                        // Update parameters
                        for (var i = 0; i < argNames.Count; ++i)
                        {
                            if (argNames[i] == "data" || argNames[i] == "data_label")
                            {
                                continue;
                            }
                            opt.Update(i, exec.ArgmentArrays[i], exec.GradientArrays[i]);
                        }

                        // Update metric
                        trainAccuracy.Update(dataBatch.Label, exec.Outputs[0]);
                    }

                    // one epoch of training is finished
                    sw.Stop();
                    var duration = sw.ElapsedMilliseconds / 1000.0;
                    Logging.LG($"Epoch[{iter}] {samples / duration} samples/sec Train-Accuracy={trainAccuracy.Get()}");

                    valIter.Reset();
                    valAccuracy.Reset();

                    var accuracy = new Accuracy();
                    valIter.Reset();
                    while (valIter.Next())
                    {
                        var dataBatch = valIter.GetDataBatch();
                        dataBatch.Data.CopyTo(argsMap["data"]);
                        dataBatch.Label.CopyTo(argsMap["data_label"]);
                        NDArray.WaitAll();

                        // Only forward pass is enough as no gradient is needed when evaluating
                        exec.Forward(false);
                        NDArray.WaitAll();
                        accuracy.Update(dataBatch.Label, exec.Outputs[0]);
                        valAccuracy.Update(dataBatch.Label, exec.Outputs[0]);
                    }

                    Logging.LG($"Epoch[{iter}] Val-Accuracy={valAccuracy.Get()}");
                }
            }

            MXNet.MXNotifyShutdown();
        }
예제 #6
0
        private static void Main(string[] args)
        {
            //var minScore = float.Parse(args[0], NumberStyles.Float, null);
            var minScore = 0.9f;

            const int   imageSize    = 28;
            var         layers       = new[] { 128, 64, 10 };
            const int   batchSize    = 100;
            const int   maxEpoch     = 10;
            const float learningRate = 0.1f;
            const float weightDecay  = 1e-2f;

            var trainIter = new MXDataIter("MNISTIter")
                            .SetParam("image", "./mnist_data/train-images-idx3-ubyte")
                            .SetParam("label", "./mnist_data/train-labels-idx1-ubyte")
                            .SetParam("batch_size", batchSize)
                            .SetParam("flat", 1)
                            .CreateDataIter();
            var valIter = new MXDataIter("MNISTIter")
                          .SetParam("image", "./mnist_data/t10k-images-idx3-ubyte")
                          .SetParam("label", "./mnist_data/t10k-labels-idx1-ubyte")
                          .SetParam("batch_size", batchSize)
                          .SetParam("flat", 1)
                          .CreateDataIter();

            var net = Mlp(layers);

            var ctx = Context.Cpu();  // Use GPU for training

            var dictionary = new Dictionary <string, NDArray>();

            dictionary["X"]     = new NDArray(new Shape(batchSize, imageSize * imageSize), ctx);
            dictionary["label"] = new NDArray(new Shape(batchSize), ctx);
            // Let MXNet infer shapes of other parameters such as weights
            net.InferArgsMap(ctx, dictionary, dictionary);

            // Initialize all parameters with uniform distribution U(-0.01, 0.01)
            var initializer = new Uniform(0.01f);

            foreach (var arg in dictionary)
            {
                // arg.first is parameter name, and arg.second is the value
                initializer.Operator(arg.Key, arg.Value);
            }

            // Create sgd optimizer
            var opt = OptimizerRegistry.Find("sgd");

            opt.SetParam("rescale_grad", 1.0 / batchSize)
            .SetParam("lr", learningRate)
            .SetParam("wd", weightDecay);
            var lrSch = new UniquePtr <LRScheduler>(new FactorScheduler(5000, 0.1f));

            opt.SetLearningRateScheduler(lrSch);

            // Create executor by binding parameters to the model
            using (var exec = net.SimpleBind(ctx, dictionary))
            {
                var argNames = net.ListArguments();

                float score = 0;
                // Start training

                var sw = new Stopwatch();
                for (var iter = 0; iter < maxEpoch; ++iter)
                {
                    var samples = 0;
                    trainIter.Reset();

                    sw.Restart();
                    while (trainIter.Next())
                    {
                        samples += batchSize;
                        var dataBatch = trainIter.GetDataBatch();
                        // Data provided by DataIter are stored in memory, should be copied to GPU first.
                        dataBatch.Data.CopyTo(dictionary["X"]);
                        dataBatch.Label.CopyTo(dictionary["label"]);
                        // CopyTo is imperative, need to wait for it to complete.
                        NDArray.WaitAll();

                        // Compute gradients
                        exec.Forward(true);
                        exec.Backward();
                        // Update parameters
                        for (var i = 0; i < argNames.Count; ++i)
                        {
                            if (argNames[i] == "X" || argNames[i] == "label")
                            {
                                continue;
                            }

                            var weight = exec.ArgmentArrays[i];
                            var grad   = exec.GradientArrays[i];
                            opt.Update(i, weight, grad);
                        }
                    }

                    sw.Stop();

                    var acc = new Accuracy();
                    valIter.Reset();
                    while (valIter.Next())
                    {
                        var dataBatch = valIter.GetDataBatch();
                        dataBatch.Data.CopyTo(dictionary["X"]);
                        dataBatch.Label.CopyTo(dictionary["label"]);
                        NDArray.WaitAll();
                        // Only forward pass is enough as no gradient is needed when evaluating
                        exec.Forward(false);
                        acc.Update(dataBatch.Label, exec.Outputs[0]);
                    }

                    var duration = sw.ElapsedMilliseconds / 1000.0;
                    var message  = $"Epoch: {iter} {samples / duration} samples/sec Accuracy: {acc.Get()}";
                    Logging.LG(message);
                    score = acc.Get();
                }

                MXNet.MXNotifyShutdown();
                var ret = score >= minScore ? 0 : 1;
                Console.WriteLine($"{ret}");
            }
        }
예제 #7
0
        private static void Main()
        {
            const uint  batchSize    = 50;
            const uint  maxEpoch     = 100;
            const float learningRate = 1e-4f;
            const float weightDecay  = 1e-4f;

            var googlenet = GoogleNetSymbol(101 + 1); // +1 is BACKGROUND_Google
            var argsMap   = new Dictionary <string, NDArray>();
            var auxMap    = new Dictionary <string, NDArray>();

            // change device type if you want to use GPU
            var context = Context.Cpu();

            argsMap["data"]       = new NDArray(new Shape(batchSize, 3, 256, 256), context);
            argsMap["data_label"] = new NDArray(new Shape(batchSize), context);
            googlenet.InferArgsMap(Context.Cpu(), argsMap, argsMap);

            var trainIter = new MXDataIter("ImageRecordIter")
                            .SetParam("path_imglist", "train.lst")
                            .SetParam("path_imgrec", "train.rec")
                            .SetParam("data_shape", new Shape(3, 256, 256))
                            .SetParam("batch_size", batchSize)
                            .SetParam("shuffle", 1)
                            .CreateDataIter();

            var valIter = new MXDataIter("ImageRecordIter")
                          .SetParam("path_imglist", "val.lst")
                          .SetParam("path_imgrec", "val.rec")
                          .SetParam("data_shape", new Shape(3, 256, 256))
                          .SetParam("batch_size", batchSize)
                          .CreateDataIter();

            var opt = OptimizerRegistry.Find("ccsgd");

            opt.SetParam("momentum", 0.9)
            .SetParam("rescale_grad", 1.0 / batchSize)
            .SetParam("clip_gradient", 10)
            .SetParam("lr", learningRate)
            .SetParam("wd", weightDecay);

            using (var exec = googlenet.SimpleBind(Context.Cpu(), argsMap))
            {
                var argNames = googlenet.ListArguments();

                for (var iter = 0; iter < maxEpoch; ++iter)
                {
                    Logging.LG($"Epoch: {iter}");

                    trainIter.Reset();
                    while (trainIter.Next())
                    {
                        var dataBatch = trainIter.GetDataBatch();
                        dataBatch.Data.CopyTo(argsMap["data"]);
                        dataBatch.Label.CopyTo(argsMap["data_label"]);
                        NDArray.WaitAll();
                        exec.Forward(true);
                        exec.Backward();
                        for (var i = 0; i < argNames.Count; ++i)
                        {
                            if (argNames[i] == "data" || argNames[i] == "data_label")
                            {
                                continue;
                            }

                            var weight = exec.ArgmentArrays[i];
                            var grad   = exec.GradientArrays[i];
                            opt.Update(i, weight, grad);
                        }
                    }

                    var acu = new Accuracy();
                    valIter.Reset();
                    while (valIter.Next())
                    {
                        var dataBatch = valIter.GetDataBatch();
                        dataBatch.Data.CopyTo(argsMap["data"]);
                        dataBatch.Label.CopyTo(argsMap["data_label"]);
                        NDArray.WaitAll();
                        exec.Forward(false);
                        NDArray.WaitAll();
                        acu.Update(dataBatch.Label, exec.Outputs[0]);
                    }

                    Logging.LG($"Accuracy: {acu.Get()}");
                }
            }

            MXNet.MXNotifyShutdown();
        }