public void TestOptimizer() { IFunction network = new ModuleList( N.Linear(2, 10), N.ReLU(), N.Linear(10, 1) ); // Optimizer sgd = new SGD(network, 0.01); Optimizer sgd = new MomentumSGD(network); Random rand = new Random(); // Tensor[] data = this._TestData(1000); for (int i = 0; i < 100; i++) { Tensor[] data = this._TestData(100); // Tensor X = network.Forward( // data[0] // ); Tensor X = network.Forward( data[0] ); Tensor loss = F.MSELoss(X, data[1]); // Console.WriteLine($"{i}step:{loss.Data[0]}"); Console.WriteLine($"{loss.Data[0]}"); loss.Backward(); sgd.Update(); } }
const Real L2_SCALE = 1e-4f; //l2 loss scale public static void Run() { //MNISTのデータを用意する Console.WriteLine("MNIST data loading..."); MnistData <Real> mnistData = new MnistData <Real>(); //テストデータから全データを取得 TestDataSet <Real> datasetY = mnistData.Eval.GetAllDataSet(); Console.WriteLine("\nNetwork initializing..."); int numBatches = mnistData.Train.Length / BATCH_SIZE; // 600 = 60000 / 100 int batchPerEpoch = mnistData.Train.Length / BATCH_SIZE; int[] boundaries = { LR_DROP_EPOCH *batchPerEpoch, (LR_DROP_EPOCH + 20) * batchPerEpoch }; Dictionary <string, Real> customSparsities = new Dictionary <string, Real> { { "layer2", END_SPARSITY *SPARSITY_SCALE }, { "layer3", END_SPARSITY * 0 } }; MaskedLinear <Real> layer1 = new MaskedLinear <Real>(28 * 28, 300, name: "layer1", gpuEnable: true); MaskedLinear <Real> layer2 = new MaskedLinear <Real>(300, 100, name: "layer2", gpuEnable: true); MaskedLinear <Real> layer3 = new MaskedLinear <Real>(100, 10, name: "layer3", gpuEnable: true); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack <Real> nn = new FunctionStack <Real>( layer1, new ReLU <Real>(name: "l1 ReLU"), layer2, new ReLU <Real>(name: "l2 ReLU"), layer3 ); SoftmaxCrossEntropy <Real> sce = new SoftmaxCrossEntropy <Real>(); WeightDecay <Real> weightDecay = new WeightDecay <Real>(L2_SCALE); weightDecay.AddParameters(layer1.Weight, layer2.Weight, layer3.Weight); MomentumSGD <Real> mSGD = new MomentumSGD <Real>(LEARNING_RATE); mSGD.SetUp(nn); var opt = new SparseRigLOptimizer(mSGD, MASKUPDATE_BEGIN_STEP, MASKUPDATE_END_STEP, MASKUPDATE_FREQUENCY, DROP_FRACTION, "cosine", "zeros", RIGL_ACC_SCALE); NdArray <Real>[] allMasks = { layer1.Mask, layer2.Mask, layer3.Mask, }; string[] LayerNames = { layer1.Name, layer2.Name, layer3.Name, }; NdArray <Real>[] allWights = { layer1.Weight, layer2.Weight, layer3.Weight, }; //マスクの初期化 SparseUtils.MaskInit(allMasks, LayerNames, "erdos_renyi", END_SPARSITY, customSparsities); Console.WriteLine("[Global sparsity] " + SparseUtils.CalculateSparsity(allMasks)); var weightSparsity = GetWeightSparsity(allMasks); Console.WriteLine("[Sparsity] Layer0, Layer1 : " + weightSparsity[0] + ", " + weightSparsity[1]); Console.WriteLine("\nTraining Start..."); //学習開始 for (int i = 0; i < NUM_EPOCHS * numBatches; i++) { //訓練データからランダムにデータを取得 TestDataSet <Real> datasetX = mnistData.Train.GetRandomDataSet(BATCH_SIZE); //バッチ学習を実行する NdArray <Real> y = nn.Forward(datasetX.Data)[0]; Real loss = sce.Evaluate(y, datasetX.Label); nn.Backward(y); weightDecay.Update(); opt._optimizer.LearningRate = PiecewiseConstant(opt._optimizer.UpdateCount, boundaries, LEARNING_RATE); opt.condMaskUpdate(allMasks, allWights); ////10回毎に結果出力 //if (i % 10 + 1 == 10) //{ // Console.WriteLine("\nbatch count:" + (i + 1) + " (lr:" + opt._optimizer.LearningRate + ")"); // Console.WriteLine("loss " + loss); //} //精度をテストする if (i % numBatches + 1 == numBatches) { Console.WriteLine("\nEpoch:" + Math.Floor((i + 1) / (Real)numBatches) + " Iteration:" + (i + 1) + " Testing... "); //テストを実行 Real accuracy = Trainer.Accuracy(nn, datasetY, new SoftmaxCrossEntropy <Real>(), out loss); Console.WriteLine("loss: " + loss); Console.WriteLine("accuracy: " + accuracy); } } }