示例#1
0
        public void TestOptimizer()
        {
            IFunction network = new ModuleList(
                N.Linear(2, 10),
                N.ReLU(),
                N.Linear(10, 1)
                );
            // Optimizer sgd = new SGD(network, 0.01);
            Optimizer sgd  = new MomentumSGD(network);
            Random    rand = new Random();

            // Tensor[] data = this._TestData(1000);
            for (int i = 0; i < 100; i++)
            {
                Tensor[] data = this._TestData(100);

                // Tensor X = network.Forward(
                //     data[0]
                // );
                Tensor X = network.Forward(
                    data[0]
                    );
                Tensor loss = F.MSELoss(X, data[1]);
                // Console.WriteLine($"{i}step:{loss.Data[0]}");
                Console.WriteLine($"{loss.Data[0]}");
                loss.Backward();
                sgd.Update();
            }
        }
示例#2
0
        const Real L2_SCALE = 1e-4f;             //l2 loss scale

        public static void Run()
        {
            //MNISTのデータを用意する
            Console.WriteLine("MNIST data loading...");
            MnistData <Real> mnistData = new MnistData <Real>();

            //テストデータから全データを取得
            TestDataSet <Real> datasetY = mnistData.Eval.GetAllDataSet();

            Console.WriteLine("\nNetwork initializing...");

            int numBatches    = mnistData.Train.Length / BATCH_SIZE; // 600 = 60000 / 100
            int batchPerEpoch = mnistData.Train.Length / BATCH_SIZE;

            int[] boundaries = { LR_DROP_EPOCH *batchPerEpoch, (LR_DROP_EPOCH + 20) * batchPerEpoch };

            Dictionary <string, Real> customSparsities = new Dictionary <string, Real>
            {
                { "layer2", END_SPARSITY *SPARSITY_SCALE },
                { "layer3", END_SPARSITY * 0 }
            };

            MaskedLinear <Real> layer1 = new MaskedLinear <Real>(28 * 28, 300, name: "layer1", gpuEnable: true);
            MaskedLinear <Real> layer2 = new MaskedLinear <Real>(300, 100, name: "layer2", gpuEnable: true);
            MaskedLinear <Real> layer3 = new MaskedLinear <Real>(100, 10, name: "layer3", gpuEnable: true);

            //ネットワークの構成を FunctionStack に書き連ねる
            FunctionStack <Real> nn = new FunctionStack <Real>(
                layer1,
                new ReLU <Real>(name: "l1 ReLU"),
                layer2,
                new ReLU <Real>(name: "l2 ReLU"),
                layer3
                );

            SoftmaxCrossEntropy <Real> sce = new SoftmaxCrossEntropy <Real>();

            WeightDecay <Real> weightDecay = new WeightDecay <Real>(L2_SCALE);

            weightDecay.AddParameters(layer1.Weight, layer2.Weight, layer3.Weight);

            MomentumSGD <Real> mSGD = new MomentumSGD <Real>(LEARNING_RATE);

            mSGD.SetUp(nn);

            var opt = new SparseRigLOptimizer(mSGD, MASKUPDATE_BEGIN_STEP, MASKUPDATE_END_STEP, MASKUPDATE_FREQUENCY, DROP_FRACTION, "cosine", "zeros", RIGL_ACC_SCALE);

            NdArray <Real>[] allMasks =
            {
                layer1.Mask,
                layer2.Mask,
                layer3.Mask,
            };

            string[] LayerNames =
            {
                layer1.Name,
                layer2.Name,
                layer3.Name,
            };

            NdArray <Real>[] allWights =
            {
                layer1.Weight,
                layer2.Weight,
                layer3.Weight,
            };

            //マスクの初期化
            SparseUtils.MaskInit(allMasks, LayerNames, "erdos_renyi", END_SPARSITY, customSparsities);

            Console.WriteLine("[Global sparsity] " + SparseUtils.CalculateSparsity(allMasks));
            var weightSparsity = GetWeightSparsity(allMasks);

            Console.WriteLine("[Sparsity] Layer0, Layer1 : " + weightSparsity[0] + ", " + weightSparsity[1]);

            Console.WriteLine("\nTraining Start...");

            //学習開始
            for (int i = 0; i < NUM_EPOCHS * numBatches; i++)
            {
                //訓練データからランダムにデータを取得
                TestDataSet <Real> datasetX = mnistData.Train.GetRandomDataSet(BATCH_SIZE);

                //バッチ学習を実行する
                NdArray <Real> y    = nn.Forward(datasetX.Data)[0];
                Real           loss = sce.Evaluate(y, datasetX.Label);
                nn.Backward(y);

                weightDecay.Update();
                opt._optimizer.LearningRate = PiecewiseConstant(opt._optimizer.UpdateCount, boundaries, LEARNING_RATE);

                opt.condMaskUpdate(allMasks, allWights);

                ////10回毎に結果出力
                //if (i % 10 + 1 == 10)
                //{
                //    Console.WriteLine("\nbatch count:" + (i + 1) + " (lr:" + opt._optimizer.LearningRate + ")");
                //    Console.WriteLine("loss " + loss);
                //}

                //精度をテストする
                if (i % numBatches + 1 == numBatches)
                {
                    Console.WriteLine("\nEpoch:" + Math.Floor((i + 1) / (Real)numBatches) + " Iteration:" + (i + 1) + " Testing... ");

                    //テストを実行
                    Real accuracy = Trainer.Accuracy(nn, datasetY, new SoftmaxCrossEntropy <Real>(), out loss);

                    Console.WriteLine("loss: " + loss);
                    Console.WriteLine("accuracy: " + accuracy);
                }
            }
        }