public static void Run() { Stopwatch sw = new Stopwatch(); RILogManager.Default?.SendDebug("CIFAR Data Loading..."); CifarData cifarData = new CifarData(); FunctionStack nn = new FunctionStack("Test18", new Convolution2D(true, 3, 32, 3, name: "l1 Conv2D", gpuEnable: true), new ReLU(name: "l1 ReLU"), new MaxPooling(2, name: "l1 MaxPooling", gpuEnable: false), new Dropout(0.25, name: "l1 DropOut"), new Convolution2D(true, 32, 64, 3, name: "l2 Conv2D", gpuEnable: false), new ReLU(name: "l2 ReLU"), new MaxPooling(2, 2, name: "l2 MaxPooling", gpuEnable: false), new Dropout(0.25, name: "l2 DropOut"), new Linear(true, 13 * 13 * 64, 512, name: "l3 Linear", gpuEnable: false), new ReLU(name: "l3 ReLU"), new Dropout(name: "l3 DropOut"), new Linear(true, 512, 10, name: "l4 Linear", gpuEnable: false) ); nn.SetOptimizer(new AdaDelta()); RILogManager.Default?.SendDebug("Training Start..."); for (int epoch = 1; epoch < 3; epoch++) { RILogManager.Default?.SendDebug("epoch " + epoch); Real totalLoss = 0; long totalLossCount = 0; for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { sw.Restart(); RILogManager.Default?.SendDebug("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); TestData.TestDataSet datasetX = cifarData.GetRandomXSet(BATCH_DATA_COUNT); Real sumLoss = Trainer.Train(nn, datasetX.Data, datasetX.Label, new SoftmaxCrossEntropy()); totalLoss += sumLoss; totalLossCount++; RILogManager.Default?.SendDebug("total loss " + totalLoss / totalLossCount); RILogManager.Default?.SendDebug("local loss " + sumLoss); sw.Stop(); RILogManager.Default?.SendDebug("time " + sw.Elapsed.TotalMilliseconds); if (i % 20 == 0) { RILogManager.Default?.SendDebug("\nTesting..."); TestDataSet datasetY = cifarData.GetRandomYSet(TEACH_DATA_COUNT); Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); RILogManager.Default?.SendDebug("accuracy " + accuracy); } } } }
public static void Run() { //Prepare MNIST data Console.WriteLine("MNIST Data Loading..."); MnistData mnistData = new MnistData(); Console.WriteLine("Training Start..."); //Writing the network configuration in FunctionStack FunctionStack nn = new FunctionStack( new Linear(28 * 28, 1024, name: "l1 Linear"), new Sigmoid(name: "l1 Sigmoid"), new Linear(1024, 10, name: "l2 Linear") ); //Declare optimizer nn.SetOptimizer(new MomentumSGD()); //Three generations learning for (int epoch = 0; epoch < 3; epoch++) { Console.WriteLine("epoch " + (epoch + 1)); //Total error in the whole Real totalLoss = 0; long totalLossCount = 0; //How many times to run the batch for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //Get data randomly from training data TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT); //Execute batch learning in parallel Real sumLoss = Trainer.Train(nn, datasetX.Data, datasetX.Label, new SoftmaxCrossEntropy()); totalLoss = sumLoss; totalLossCount++; //Test the accuracy if you move the batch 20 times if (i % 20 == 0) { Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //Result output Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); Console.WriteLine("\nTesting..."); //Get data randomly from test data TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT); //Run test Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); Console.WriteLine("accuracy " + accuracy); } } } }
public static void Run() { //MNISTのデータを用意する Console.WriteLine("MNIST Data Loading..."); MnistData <Real> mnistData = new MnistData <Real>(); Console.WriteLine("Training Start..."); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack <Real> nn = new FunctionStack <Real>( new Linear <Real>(28 * 28, 1024, name: "l1 Linear"), new ReLU <Real>(name: "l1 Sigmoid"), new Linear <Real>(1024, 10, name: "l2 Linear") ); //optimizerを宣言 //nn.SetOptimizer(new MomentumSGD<Real>()); //三世代学習 for (int epoch = 0; epoch < 3; epoch++) { Console.WriteLine("epoch " + (epoch + 1)); //全体での誤差を集計 Real totalLoss = 0; long totalLossCount = 0; //何回バッチを実行するか for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //訓練データからランダムにデータを取得 TestDataSet <Real> datasetX = mnistData.Train.GetRandomDataSet(BATCH_DATA_COUNT); //バッチ学習を並列実行する Real sumLoss = Trainer.Train(nn, datasetX, new SoftmaxCrossEntropy <Real>(), new MomentumSGD <Real>()); totalLoss = sumLoss; totalLossCount++; //20回バッチを動かしたら精度をテストする if (i % 20 == 0) { Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //結果出力 Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); Console.WriteLine("\nTesting..."); //テストデータからランダムにデータを取得 TestDataSet <Real> datasetY = mnistData.Eval.GetRandomDataSet(TEST_DATA_COUNT); //テストを実行 Real accuracy = Trainer.Accuracy(nn, datasetY); Console.WriteLine("accuracy " + accuracy); } } } }
public static void Run() { RILogManager.Default?.SendDebug("MNIST Data Loading..."); RILogManager.Default?.SendDebug("MNIST Data Loading..."); MnistData mnistData = new MnistData(28); RILogManager.Default?.SendDebug("Training Start..."); FunctionStack nn = new FunctionStack("Test4", new Linear(true, 28 * 28, 1024, name: "l1 Linear"), new Sigmoid(name: "l1 Sigmoid"), new Linear(true, 1024, 10, name: "l2 Linear") ); nn.SetOptimizer(new MomentumSGD()); for (int epoch = 0; epoch < 3; epoch++) { RILogManager.Default?.SendDebug("epoch " + (epoch + 1)); Real totalLoss = 0; long totalLossCount = 0; for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //Get data randomly from training data TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT, 28, 28); Real sumLoss = Trainer.Train(nn, datasetX.Data, datasetX.Label, new SoftmaxCrossEntropy()); totalLoss = sumLoss; totalLossCount++; if (i % 20 == 0) { RILogManager.Default?.SendDebug("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); RILogManager.Default?.SendDebug("total loss " + totalLoss / totalLossCount); RILogManager.Default?.SendDebug("local loss " + sumLoss); RILogManager.Default?.SendDebug("\nTesting..."); //Get data randomly from test data TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT, 28); Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); RILogManager.Default?.SendDebug("accuracy " + accuracy); } } } }
public static void Run(bool isCifar100 = false, bool isFineLabel = false) { Stopwatch sw = new Stopwatch(); //CIFARのデータを用意する Console.WriteLine("CIFAR Data Loading..."); CifarData cifarData = new CifarData(isCifar100, isFineLabel); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack nn = new FunctionStack( new Convolution2D(3, 32, 3, name: "l1 Conv2D", gpuEnable: true), new BatchNormalization(32, name: "l1 BatchNorm"), new ReLU(name: "l1 ReLU"), new MaxPooling2D(2, name: "l1 MaxPooling", gpuEnable: true), new Convolution2D(32, 64, 3, name: "l2 Conv2D", gpuEnable: true), new BatchNormalization(64, name: "l1 BatchNorm"), new ReLU(name: "l2 ReLU"), new MaxPooling2D(2, 2, name: "l2 MaxPooling", gpuEnable: true), new Linear(14 * 14 * 64, 512, name: "l3 Linear", gpuEnable: true), new ReLU(name: "l3 ReLU"), //Cifar100のときは100クラス、簡素であれば20クラス、Cifar10のときは10クラス分類 new Linear(512, cifarData.ClassCount, name: "l4 Linear", gpuEnable: true) ); //optimizerを宣言 nn.SetOptimizer(new Adam()); Console.WriteLine("Training Start..."); //三世代学習 for (int epoch = 1; epoch < 3; epoch++) { Console.WriteLine("epoch " + epoch); //全体での誤差を集計 Real totalLoss = 0; long totalLossCount = 0; //何回バッチを実行するか for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { sw.Restart(); Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //訓練データからランダムにデータを取得 TestDataSet datasetX = cifarData.Train.GetRandomDataSet(BATCH_DATA_COUNT); //バッチ学習を並列実行する Real sumLoss = Trainer.Train(nn, datasetX, new SoftmaxCrossEntropy()); totalLoss += sumLoss; totalLossCount++; //結果出力 Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); sw.Stop(); Console.WriteLine("time" + sw.Elapsed.TotalMilliseconds); //20回バッチを動かしたら精度をテストする if (i % 20 == 0) { Console.WriteLine("\nTesting..."); //テストデータからランダムにデータを取得 TestDataSet datasetY = cifarData.Eval.GetRandomDataSet(TEACH_DATA_COUNT); //テストを実行 Real accuracy = Trainer.Accuracy(nn, datasetY); Console.WriteLine("accuracy " + accuracy); } } } }
public static void Main(string[] args) { //Cifar-10のデータを用意する Console.WriteLine("CIFAR Data Loading..."); CifarData cifarData = new CifarData(); //platformIdは、OpenCL・GPUの導入の記事に書いてある方法でご確認ください Weaver.Initialize(ComputeDeviceTypes.Gpu, platformId: 1, deviceIndex: 0); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack nn = new FunctionStack( /* 最初の4層の畳み込み層を削除 * new Convolution2D (3, 64, 3, pad: 1, gpuEnable: true), * new ReLU (), * new Convolution2D (64, 64, 3, pad: 1, gpuEnable: true), * new ReLU (), * new MaxPooling(2, 2, gpuEnable: true), * * new Convolution2D (64, 128, 3, pad: 1, gpuEnable: true), * new ReLU (), * new Convolution2D (128, 128, 3, pad: 1, gpuEnable: true), * new ReLU (), * new MaxPooling(2, 2, gpuEnable: true), */ // (3, 32, 32) new Convolution2D(3, 64, 3, pad: 1, gpuEnable: true), new ReLU(), new Convolution2D(64, 64, 3, pad: 1, gpuEnable: true), new ReLU(), new Convolution2D(64, 64, 3, pad: 1, gpuEnable: true), new ReLU(), new MaxPooling(2, 2, gpuEnable: true), // (64, 16, 16) new Convolution2D(64, 128, 3, pad: 1, gpuEnable: true), new ReLU(), new Convolution2D(128, 128, 3, pad: 1, gpuEnable: true), new ReLU(), new Convolution2D(128, 128, 3, pad: 1, gpuEnable: true), new ReLU(), new MaxPooling(2, 2, gpuEnable: true), // (128, 8, 8) new Convolution2D(128, 128, 3, pad: 1, gpuEnable: true), new ReLU(), new Convolution2D(128, 128, 3, pad: 1, gpuEnable: true), new ReLU(), new Convolution2D(128, 128, 3, pad: 1, gpuEnable: true), new ReLU(), new MaxPooling(2, 2, gpuEnable: true), // (128, 4, 4) new Linear(128 * 4 * 4, 1024, gpuEnable: true), new ReLU(), new Dropout(0.5), new Linear(1024, 1024, gpuEnable: true), new ReLU(), new Dropout(0.5), new Linear(1024, 10, gpuEnable: true) ); //optimizerを宣言 nn.SetOptimizer(new Adam()); Console.WriteLine("Training Start..."); // epoch for (int epoch = 1; epoch < 10; epoch++) { Console.WriteLine("\nepoch " + epoch); //全体での誤差を集計 Real totalLoss = 0; long totalLossCount = 0; //何回バッチを実行するか for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //Console.WriteLine ("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //訓練データからランダムにデータを取得 TestDataSet datasetX = cifarData.GetRandomXSet(BATCH_DATA_COUNT); //バッチ学習を並列実行する Real sumLoss = Trainer.Train(nn, datasetX.Data, datasetX.Label, new SoftmaxCrossEntropy()); totalLoss += sumLoss; totalLossCount++; //結果出力 Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); //50回バッチを動かしたら精度をテストする if (i % 50 == 0) { Console.WriteLine("step: " + i + " Testing..."); //テストデータからランダムにデータを取得 TestDataSet datasetY = cifarData.GetRandomYSet(TEACH_DATA_COUNT); //テストを実行 Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); Console.WriteLine("accuracy " + accuracy); } } } }
public static void Run() { RILogManager.Default?.SendDebug("MNIST Data Loading..."); MnistData mnistData = new MnistData(28); RILogManager.Default?.SendDebug("Training Start..."); int neuronCount = 28; FunctionStack nn = new FunctionStack("Test19", new Linear(true, neuronCount * neuronCount, N, name: "l1 Linear"), // L1 new BatchNormalization(true, N, name: "l1 BatchNorm"), new LeakyReLU(slope: 0.000001, name: "l1 LeakyReLU"), new Linear(true, N, N, name: "l2 Linear"), // L2 new BatchNormalization(true, N, name: "l2 BatchNorm"), new LeakyReLU(slope: 0.000001, name: "l2 LeakyReLU"), new Linear(true, N, N, name: "l3 Linear"), // L3 new BatchNormalization(true, N, name: "l3 BatchNorm"), new LeakyReLU(slope: 0.000001, name: "l3 LeakyReLU"), new Linear(true, N, N, name: "l4 Linear"), // L4 new BatchNormalization(true, N, name: "l4 BatchNorm"), new LeakyReLU(slope: 0.000001, name: "l4 LeakyReLU"), new Linear(true, N, N, name: "l5 Linear"), // L5 new BatchNormalization(true, N, name: "l5 BatchNorm"), new LeakyReLU(slope: 0.000001, name: "l5 LeakyReLU"), new Linear(true, N, N, name: "l6 Linear"), // L6 new BatchNormalization(true, N, name: "l6 BatchNorm"), new LeakyReLU(slope: 0.000001, name: "l6 LeakyReLU"), new Linear(true, N, N, name: "l7 Linear"), // L7 new BatchNormalization(true, N, name: "l7 BatchNorm"), new LeakyReLU(slope: 0.000001, name: "l7 ReLU"), new Linear(true, N, N, name: "l8 Linear"), // L8 new BatchNormalization(true, N, name: "l8 BatchNorm"), new LeakyReLU(slope: 0.000001, name: "l8 LeakyReLU"), new Linear(true, N, N, name: "l9 Linear"), // L9 new BatchNormalization(true, N, name: "l9 BatchNorm"), new PolynomialApproximantSteep(slope: 0.000001, name: "l9 PolynomialApproximantSteep"), new Linear(true, N, N, name: "l10 Linear"), // L10 new BatchNormalization(true, N, name: "l10 BatchNorm"), new PolynomialApproximantSteep(slope: 0.000001, name: "l10 PolynomialApproximantSteep"), new Linear(true, N, N, name: "l11 Linear"), // L11 new BatchNormalization(true, N, name: "l11 BatchNorm"), new PolynomialApproximantSteep(slope: 0.000001, name: "l11 PolynomialApproximantSteep"), new Linear(true, N, N, name: "l12 Linear"), // L12 new BatchNormalization(true, N, name: "l12 BatchNorm"), new PolynomialApproximantSteep(slope: 0.000001, name: "l12 PolynomialApproximantSteep"), new Linear(true, N, N, name: "l13 Linear"), // L13 new BatchNormalization(true, N, name: "l13 BatchNorm"), new PolynomialApproximantSteep(slope: 0.000001, name: "l13 PolynomialApproximantSteep"), new Linear(true, N, N, name: "l14 Linear"), // L14 new BatchNormalization(true, N, name: "l14 BatchNorm"), new PolynomialApproximantSteep(slope: 0.000001, name: "l14 PolynomialApproximantSteep"), new Linear(true, N, 10, name: "l15 Linear") // L15 ); nn.SetOptimizer(new AdaGrad()); //nn.SetOptimizer(new Adam()); RunningStatistics stats = new RunningStatistics(); Histogram lossHistogram = new Histogram(); Histogram accuracyHistogram = new Histogram(); Real totalLoss = 0; long totalLossCounter = 0; Real highestAccuracy = 0; Real bestLocalLoss = 0; Real bestTotalLoss = 0; // First skeleton save ModelIO.Save(nn, nn.Name); for (int epoch = 0; epoch < 1; epoch++) { RILogManager.Default?.SendDebug("epoch " + (epoch + 1)); RILogManager.Default?.ViewerSendWatch("epoch", (epoch + 1)); for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { RILogManager.Default?.SendInformation("batch count " + i + "/" + TRAIN_DATA_COUNT); TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT, 28, 28); Real sumLoss = Trainer.Train(nn, datasetX.Data, datasetX.Label, new SoftmaxCrossEntropy()); totalLoss += sumLoss; totalLossCounter++; stats.Push(sumLoss); lossHistogram.AddBucket(new Bucket(-10, 10)); accuracyHistogram.AddBucket(new Bucket(-10.0, 10)); if (sumLoss < bestLocalLoss && sumLoss != Double.NaN) { bestLocalLoss = sumLoss; } if (stats.Mean < bestTotalLoss && sumLoss != Double.NaN) { bestTotalLoss = stats.Mean; } try { lossHistogram.AddData(sumLoss); } catch (Exception) { } if (i % 20 == 0) { RILogManager.Default?.SendDebug("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); RILogManager.Default?.SendDebug("Total/Mean loss " + stats.Mean); RILogManager.Default?.SendDebug("local loss " + sumLoss); RILogManager.Default?.SendInformation("batch count " + i + "/" + TRAIN_DATA_COUNT); RILogManager.Default?.ViewerSendWatch("batch count", i); RILogManager.Default?.ViewerSendWatch("Total/Mean loss", stats.Mean); RILogManager.Default?.ViewerSendWatch("local loss", sumLoss); RILogManager.Default?.SendDebug(""); RILogManager.Default?.SendDebug("Testing..."); TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT, 28); Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); if (accuracy > highestAccuracy) { highestAccuracy = accuracy; } RILogManager.Default?.SendDebug("Accuracy: " + accuracy); RILogManager.Default?.ViewerSendWatch("Accuracy", accuracy); try { accuracyHistogram.AddData(accuracy); } catch (Exception) { } } } } RILogManager.Default?.SendDebug("Best Accuracy: " + highestAccuracy); RILogManager.Default?.SendDebug("Best Total Loss " + bestTotalLoss); RILogManager.Default?.SendDebug("Best Local Loss " + bestLocalLoss); RILogManager.Default?.ViewerSendWatch("Best Accuracy:", highestAccuracy); RILogManager.Default?.ViewerSendWatch("Best Total Loss", bestTotalLoss); RILogManager.Default?.ViewerSendWatch("Best Local Loss", bestLocalLoss); // Save all with training data ModelIO.Save(nn, nn.Name); }
public static void Run() { //MNISTのデータを用意する Console.WriteLine("MNIST Data Loading..."); MnistData <Real> mnistData = new MnistData <Real>(); Console.WriteLine("Training Start..."); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack <Real> Layer1 = new FunctionStack <Real>( new Linear <Real>(28 * 28, 256, name: "l1 Linear"), new BatchNormalization <Real>(256, name: "l1 Norm"), new ReLU <Real>(name: "l1 ReLU") ); FunctionStack <Real> Layer2 = new FunctionStack <Real>( new Linear <Real>(256, 256, name: "l2 Linear"), new BatchNormalization <Real>(256, name: "l2 Norm"), new ReLU <Real>(name: "l2 ReLU") ); FunctionStack <Real> Layer3 = new FunctionStack <Real>( new Linear <Real>(256, 256, name: "l3 Linear"), new BatchNormalization <Real>(256, name: "l3 Norm"), new ReLU <Real>(name: "l3 ReLU") ); FunctionStack <Real> Layer4 = new FunctionStack <Real>( new Linear <Real>(256, 10, name: "l4 Linear") ); //FunctionStack自身もFunctionとして積み上げられる FunctionStack <Real> nn = new FunctionStack <Real> ( Layer1, Layer2, Layer3, Layer4 ); FunctionStack <Real> cDNI1 = new FunctionStack <Real>( new Linear <Real>(256 + 10, 1024, name: "cDNI1 Linear1"), new BatchNormalization <Real>(1024, name: "cDNI1 Nrom1"), new ReLU <Real>(name: "cDNI1 ReLU1"), new Linear <Real>(1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack <Real> cDNI2 = new FunctionStack <Real>( new Linear <Real>(256 + 10, 1024, name: "cDNI2 Linear1"), new BatchNormalization <Real>(1024, name: "cDNI2 Nrom1"), new ReLU <Real>(name: "cDNI2 ReLU1"), new Linear <Real>(1024, 256, initialW: new Real[1024, 256], name: "cDNI2 Linear3") ); FunctionStack <Real> cDNI3 = new FunctionStack <Real>( new Linear <Real>(256 + 10, 1024, name: "cDNI3 Linear1"), new BatchNormalization <Real>(1024, name: "cDNI3 Nrom1"), new ReLU <Real>(name: "cDNI3 ReLU1"), new Linear <Real>(1024, 256, initialW: new Real[1024, 256], name: "cDNI3 Linear3") ); //optimizerを宣言 //optimizerを宣言 Adam <Real> L1adam = new Adam <Real>(0.00003f); Adam <Real> L2adam = new Adam <Real>(0.00003f); Adam <Real> L3adam = new Adam <Real>(0.00003f); Adam <Real> L4adam = new Adam <Real>(0.00003f); L1adam.SetUp(Layer1); L2adam.SetUp(Layer2); L3adam.SetUp(Layer3); L4adam.SetUp(Layer4); Adam <Real> cDNI1adam = new Adam <Real>(0.00003f); Adam <Real> cDNI2adam = new Adam <Real>(0.00003f); Adam <Real> cDNI3adam = new Adam <Real>(0.00003f); cDNI1adam.SetUp(cDNI1); cDNI2adam.SetUp(cDNI2); cDNI3adam.SetUp(cDNI3); for (int epoch = 0; epoch < 10; epoch++) { Console.WriteLine("epoch " + (epoch + 1)); //全体での誤差を集計 Real totalLoss = 0; Real cDNI1totalLoss = 0; Real cDNI2totalLoss = 0; Real cDNI3totalLoss = 0; long totalLossCount = 0; long cDNI1totalLossCount = 0; long cDNI2totalLossCount = 0; long cDNI3totalLossCount = 0; //何回バッチを実行するか for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //訓練データからランダムにデータを取得 TestDataSet <Real> datasetX = mnistData.Train.GetRandomDataSet(BATCH_DATA_COUNT); //第一層を実行 NdArray <Real> layer1ForwardResult = Layer1.Forward(datasetX.Data)[0]; ResultDataSet layer1ResultDataSet = new ResultDataSet(layer1ForwardResult, datasetX.Label); //第一層の傾きを取得 NdArray <Real> cDNI1Result = cDNI1.Forward(layer1ResultDataSet.GetTrainData())[0]; //第一層の傾きを適用 layer1ForwardResult.Grad = cDNI1Result.Data.ToArray(); //第一層を更新 Layer1.Backward(layer1ForwardResult); layer1ForwardResult.ParentFunc = null; L1adam.Update(); //第二層を実行 NdArray <Real> layer2ForwardResult = Layer2.Forward(layer1ResultDataSet.Result)[0]; ResultDataSet layer2ResultDataSet = new ResultDataSet(layer2ForwardResult, layer1ResultDataSet.Label); //第二層の傾きを取得 NdArray <Real> cDNI2Result = cDNI2.Forward(layer2ResultDataSet.GetTrainData())[0]; //第二層の傾きを適用 layer2ForwardResult.Grad = cDNI2Result.Data.ToArray(); //第二層を更新 Layer2.Backward(layer2ForwardResult); layer2ForwardResult.ParentFunc = null; //第一層用のcDNIの学習を実行 Real cDNI1loss = new MeanSquaredError <Real>().Evaluate(cDNI1Result, new NdArray <Real>(layer1ResultDataSet.Result.Grad, cDNI1Result.Shape, cDNI1Result.BatchCount)); L2adam.Update(); cDNI1.Backward(cDNI1Result); cDNI1adam.Update(); cDNI1totalLoss += cDNI1loss; cDNI1totalLossCount++; //第三層を実行 NdArray <Real> layer3ForwardResult = Layer3.Forward(layer2ResultDataSet.Result)[0]; ResultDataSet layer3ResultDataSet = new ResultDataSet(layer3ForwardResult, layer2ResultDataSet.Label); //第三層の傾きを取得 NdArray <Real> cDNI3Result = cDNI3.Forward(layer3ResultDataSet.GetTrainData())[0]; //第三層の傾きを適用 layer3ForwardResult.Grad = cDNI3Result.Data.ToArray(); //第三層を更新 Layer3.Backward(layer3ForwardResult); layer3ForwardResult.ParentFunc = null; //第二層用のcDNIの学習を実行 Real cDNI2loss = new MeanSquaredError <Real>().Evaluate(cDNI2Result, new NdArray <Real>(layer2ResultDataSet.Result.Grad, cDNI2Result.Shape, cDNI2Result.BatchCount)); L3adam.Update(); cDNI2.Backward(cDNI2Result); cDNI2adam.Update(); cDNI2totalLoss += cDNI2loss; cDNI2totalLossCount++; //第四層を実行 NdArray <Real> layer4ForwardResult = Layer4.Forward(layer3ResultDataSet.Result)[0]; //第四層の傾きを取得 Real sumLoss = new SoftmaxCrossEntropy <Real>().Evaluate(layer4ForwardResult, layer3ResultDataSet.Label); //第四層を更新 Layer4.Backward(layer4ForwardResult); layer4ForwardResult.ParentFunc = null; totalLoss += sumLoss; totalLossCount++; //第三層用のcDNIの学習を実行 Real cDNI3loss = new MeanSquaredError <Real>().Evaluate(cDNI3Result, new NdArray <Real>(layer3ResultDataSet.Result.Grad, cDNI3Result.Shape, cDNI3Result.BatchCount)); L4adam.Update(); cDNI3.Backward(cDNI3Result); cDNI3adam.Update(); cDNI3totalLoss += cDNI3loss; cDNI3totalLossCount++; Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //結果出力 Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); Console.WriteLine("\ncDNI1 total loss " + cDNI1totalLoss / cDNI1totalLossCount); Console.WriteLine("cDNI2 total loss " + cDNI2totalLoss / cDNI2totalLossCount); Console.WriteLine("cDNI3 total loss " + cDNI3totalLoss / cDNI3totalLossCount); Console.WriteLine("\ncDNI1 local loss " + cDNI1loss); Console.WriteLine("cDNI2 local loss " + cDNI2loss); Console.WriteLine("cDNI3 local loss " + cDNI3loss); //20回バッチを動かしたら精度をテストする if (i % 20 == 0) { Console.WriteLine("\nTesting..."); //テストデータからランダムにデータを取得 TestDataSet <Real> datasetY = mnistData.Eval.GetRandomDataSet(TEST_DATA_COUNT); //テストを実行 Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); Console.WriteLine("accuracy " + accuracy); } } } }
public static void Run() { //MNISTのデータを用意する Console.WriteLine("MNIST Data Loading..."); MnistData mnistData = new MnistData(); Console.WriteLine("Training Start..."); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack Layer1 = new FunctionStack( new Linear(28 * 28, 256, name: "l1 Linear"), new BatchNormalization(256, name: "l1 Norm"), new ReLU(name: "l1 ReLU") ); FunctionStack Layer2 = new FunctionStack( new Linear(256, 256, name: "l2 Linear"), new BatchNormalization(256, name: "l2 Norm"), new ReLU(name: "l2 ReLU") ); FunctionStack Layer3 = new FunctionStack( new Linear(256, 256, name: "l3 Linear"), new BatchNormalization(256, name: "l3 Norm"), new ReLU(name: "l3 ReLU") ); FunctionStack Layer4 = new FunctionStack( new Linear(256, 10, name: "l4 Linear") ); //FunctionStack自身もFunctionとして積み上げられる FunctionStack nn = new FunctionStack ( Layer1, Layer2, Layer3, Layer4 ); FunctionStack DNI1 = new FunctionStack( new Linear(256, 1024, name: "DNI1 Linear1"), new BatchNormalization(1024, name: "DNI1 Nrom1"), new ReLU(name: "DNI1 ReLU1"), new Linear(1024, 1024, name: "DNI1 Linear2"), new BatchNormalization(1024, name: "DNI1 Nrom2"), new ReLU(name: "DNI1 ReLU2"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack DNI2 = new FunctionStack( new Linear(256, 1024, name: "DNI2 Linear1"), new BatchNormalization(1024, name: "DNI2 Nrom1"), new ReLU(name: "DNI2 ReLU1"), new Linear(1024, 1024, name: "DNI2 Linear2"), new BatchNormalization(1024, name: "DNI2 Nrom2"), new ReLU(name: "DNI2 ReLU2"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI2 Linear3") ); FunctionStack DNI3 = new FunctionStack( new Linear(256, 1024, name: "DNI3 Linear1"), new BatchNormalization(1024, name: "DNI3 Nrom1"), new ReLU(name: "DNI3 ReLU1"), new Linear(1024, 1024, name: "DNI3 Linear2"), new BatchNormalization(1024, name: "DNI3 Nrom2"), new ReLU(name: "DNI3 ReLU2"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI3 Linear3") ); //optimizerを宣言 Layer1.SetOptimizer(new Adam()); Layer2.SetOptimizer(new Adam()); Layer3.SetOptimizer(new Adam()); Layer4.SetOptimizer(new Adam()); DNI1.SetOptimizer(new Adam()); DNI2.SetOptimizer(new Adam()); DNI3.SetOptimizer(new Adam()); //三世代学習 for (int epoch = 0; epoch < 20; epoch++) { Console.WriteLine("epoch " + (epoch + 1)); Real totalLoss = 0; Real DNI1totalLoss = 0; Real DNI2totalLoss = 0; Real DNI3totalLoss = 0; long totalLossCount = 0; long DNI1totalLossCount = 0; long DNI2totalLossCount = 0; long DNI3totalLossCount = 0; //何回バッチを実行するか for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //訓練データからランダムにデータを取得 TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT); //第一層を実行 NdArray[] layer1ForwardResult = Layer1.Forward(datasetX.Data); //第一層の傾きを取得 NdArray[] DNI1Result = DNI1.Forward(layer1ForwardResult); //第一層の傾きを適用 layer1ForwardResult[0].Grad = DNI1Result[0].Data.ToArray(); //第一層を更新 Layer1.Backward(layer1ForwardResult); layer1ForwardResult[0].ParentFunc = null; //Backwardを実行したので計算グラフを切っておく Layer1.Update(); //第二層を実行 NdArray[] layer2ForwardResult = Layer2.Forward(layer1ForwardResult); //第二層の傾きを取得 NdArray[] DNI2Result = DNI2.Forward(layer2ForwardResult); //第二層の傾きを適用 layer2ForwardResult[0].Grad = DNI2Result[0].Data.ToArray(); //第二層を更新 Layer2.Backward(layer2ForwardResult); layer2ForwardResult[0].ParentFunc = null; //第一層用のDNIの学習を実行 Real DNI1loss = new MeanSquaredError().Evaluate(DNI1Result, new NdArray(layer1ForwardResult[0].Grad, DNI1Result[0].Shape, DNI1Result[0].BatchCount)); Layer2.Update(); DNI1.Backward(DNI1Result); DNI1.Update(); DNI1totalLoss += DNI1loss; DNI1totalLossCount++; //第三層を実行 NdArray[] layer3ForwardResult = Layer3.Forward(layer2ForwardResult); //第三層の傾きを取得 NdArray[] DNI3Result = DNI3.Forward(layer3ForwardResult); //第三層の傾きを適用 layer3ForwardResult[0].Grad = DNI3Result[0].Data.ToArray(); //第三層を更新 Layer3.Backward(layer3ForwardResult); layer3ForwardResult[0].ParentFunc = null; //第二層用のDNIの学習を実行 Real DNI2loss = new MeanSquaredError().Evaluate(DNI2Result, new NdArray(layer2ForwardResult[0].Grad, DNI2Result[0].Shape, DNI2Result[0].BatchCount)); Layer3.Update(); DNI2.Backward(DNI2Result); DNI2.Update(); DNI2totalLoss += DNI2loss; DNI2totalLossCount++; //第四層を実行 NdArray[] layer4ForwardResult = Layer4.Forward(layer3ForwardResult); //第四層の傾きを取得 Real sumLoss = new SoftmaxCrossEntropy().Evaluate(layer4ForwardResult, datasetX.Label); //第四層を更新 Layer4.Backward(layer4ForwardResult); layer4ForwardResult[0].ParentFunc = null; totalLoss += sumLoss; totalLossCount++; //第三層用のDNIの学習を実行 Real DNI3loss = new MeanSquaredError().Evaluate(DNI3Result, new NdArray(layer3ForwardResult[0].Grad, DNI3Result[0].Shape, DNI3Result[0].BatchCount)); Layer4.Update(); DNI3.Backward(DNI3Result); DNI3.Update(); DNI3totalLoss += DNI3loss; DNI3totalLossCount++; Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //結果出力 Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); Console.WriteLine("\nDNI1 total loss " + DNI1totalLoss / DNI1totalLossCount); Console.WriteLine("DNI2 total loss " + DNI2totalLoss / DNI2totalLossCount); Console.WriteLine("DNI3 total loss " + DNI3totalLoss / DNI3totalLossCount); Console.WriteLine("\nDNI1 local loss " + DNI1loss); Console.WriteLine("DNI2 local loss " + DNI2loss); Console.WriteLine("DNI3 local loss " + DNI3loss); //20回バッチを動かしたら精度をテストする if (i % 20 == 0) { Console.WriteLine("\nTesting..."); //テストデータからランダムにデータを取得 TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT); //テストを実行 Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); Console.WriteLine("accuracy " + accuracy); } } } }
public static void Run() { Stopwatch sw = new Stopwatch(); //Prepare MNIST data Console.WriteLine("MNIST Data Loading..."); MnistData mnistData = new MnistData(); //Writing the network configuration in FunctionStack FunctionStack nn = new FunctionStack( new Convolution2D(1, 32, 5, pad: 2, name: "l1 Conv2D", gpuEnable: true), new ReLU(name: "l1 ReLU"), //new AveragePooling (2, 2, name: "l1 AVGPooling"), new MaxPooling(2, 2, name: "l1 MaxPooling", gpuEnable: true), new Convolution2D(32, 64, 5, pad: 2, name: "l2 Conv2D", gpuEnable: true), new ReLU(name: "l2 ReLU"), //new AveragePooling (2, 2, name: "l2 AVGPooling"), new MaxPooling(2, 2, name: "l2 MaxPooling", gpuEnable: true), new Linear(13 * 13 * 64, 1024, name: "l3 Linear", gpuEnable: true), new ReLU(name: "l3 ReLU"), new Dropout(name: "l3 DropOut"), new Linear(1024, 10, name: "l4 Linear", gpuEnable: true) ); //Declare optimizer nn.SetOptimizer(new Adam()); Console.WriteLine("Training Start..."); //Three generations learning for (int epoch = 1; epoch < 3; epoch++) { Console.WriteLine("epoch " + epoch); //Total error in the whole Real totalLoss = 0; long totalLossCount = 0; //How many times to run the batch for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { sw.Restart(); Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //Get data randomly from training data TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT); //Execute batch learning in parallel Real sumLoss = Trainer.Train(nn, datasetX.Data, datasetX.Label, new SoftmaxCrossEntropy()); totalLoss += sumLoss; totalLossCount++; //Result output Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); sw.Stop(); Console.WriteLine("time" + sw.Elapsed.TotalMilliseconds); //Test the accuracy if you move the batch 20 times if (i % 20 == 0) { Console.WriteLine("\nTesting..."); //Get data randomly from test data TestDataSet datasetY = mnistData.GetRandomYSet(TEACH_DATA_COUNT); //Run test Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); Console.WriteLine("accuracy " + accuracy); } } } }
const int N = 30; //It operates at 1000 similar to the reference link but it is slow at the CPU public static void Run() { //Prepare MNIST data Console.WriteLine("MNIST Data Loading..."); MnistData mnistData = new MnistData(); Console.WriteLine("Training Start..."); //Writing the network configuration in FunctionStack FunctionStack nn = new FunctionStack( new Linear(28 * 28, N, name: "l1 Linear"), //L1 new BatchNormalization(N, name: "l1 BatchNorm"), new ReLU(name: "l1 ReLU"), new Linear(N, N, name: "l2 Linear"), //L2 new BatchNormalization(N, name: "l2 BatchNorm"), new ReLU(name: "l2 ReLU"), new Linear(N, N, name: "l3 Linear"), //L3 new BatchNormalization(N, name: "l3 BatchNorm"), new ReLU(name: "l3 ReLU"), new Linear(N, N, name: "l4 Linear"), //L4 new BatchNormalization(N, name: "l4 BatchNorm"), new ReLU(name: "l4 ReLU"), new Linear(N, N, name: "l5 Linear"), //L5 new BatchNormalization(N, name: "l5 BatchNorm"), new ReLU(name: "l5 ReLU"), new Linear(N, N, name: "l6 Linear"), //L6 new BatchNormalization(N, name: "l6 BatchNorm"), new ReLU(name: "l6 ReLU"), new Linear(N, N, name: "l7 Linear"), //L7 new BatchNormalization(N, name: "l7 BatchNorm"), new ReLU(name: "l7 ReLU"), new Linear(N, N, name: "l8 Linear"), //L8 new BatchNormalization(N, name: "l8 BatchNorm"), new ReLU(name: "l8 ReLU"), new Linear(N, N, name: "l9 Linear"), //L9 new BatchNormalization(N, name: "l9 BatchNorm"), new ReLU(name: "l9 ReLU"), new Linear(N, N, name: "l10 Linear"), //L10 new BatchNormalization(N, name: "l10 BatchNorm"), new ReLU(name: "l10 ReLU"), new Linear(N, N, name: "l11 Linear"), //L11 new BatchNormalization(N, name: "l11 BatchNorm"), new ReLU(name: "l11 ReLU"), new Linear(N, N, name: "l12 Linear"), //L12 new BatchNormalization(N, name: "l12 BatchNorm"), new ReLU(name: "l12 ReLU"), new Linear(N, N, name: "l13 Linear"), //L13 new BatchNormalization(N, name: "l13 BatchNorm"), new ReLU(name: "l13 ReLU"), new Linear(N, N, name: "l14 Linear"), //L14 new BatchNormalization(N, name: "l14 BatchNorm"), new ReLU(name: "l14 ReLU"), new Linear(N, 10, name: "l15 Linear") //L15 ); //In this configuration, learning does not proceed //FunctionStack nn = new FunctionStack ( // new Linear(28 * 28, N), //L1 //new ReLU (), // new Linear(N, N), //L2 //new ReLU (), //, //(Somewhat) //, //new Linear (N, N), L 14 //new ReLU (), // new Linear(N, 10) //L15 //); //Declare optimizer nn.SetOptimizer(new AdaGrad()); //Three generations learning for (int epoch = 0; epoch < 3; epoch++) { Console.WriteLine("epoch " + (epoch + 1)); //Total error in the whole //List <Real> totalLoss = new List <Real> (); Real totalLoss = 0; long totalLossCounter = 0; //How many times to run the batch for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //Get data randomly from training data TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT); //Perform learning Real sumLoss = Trainer.Train(nn, datasetX.Data, datasetX.Label, new SoftmaxCrossEntropy()); totalLoss += sumLoss; totalLossCounter++; //Test the accuracy if you move the batch 20 times if (i % 20 == 0) { //Result output Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); Console.WriteLine("total loss " + totalLoss / totalLossCounter); Console.WriteLine("local loss " + sumLoss); Console.WriteLine(""); Console.WriteLine("Testing..."); //Get data randomly from test data TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT); //Run test Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); Console.WriteLine("accuracy " + accuracy); } } } }
public static void Run() { //Prepare MNIST data Console.WriteLine("MNIST Data Loading..."); MnistData mnistData = new MnistData(); Console.WriteLine("Training Start..."); //Writing the network configuration in FunctionStack FunctionStack Layer1 = new FunctionStack( new Linear(28 * 28, 256, name: "l1 Linear"), new BatchNormalization(256, name: "l1 Norm"), new ReLU(name: "l1 ReLU") ); FunctionStack Layer2 = new FunctionStack( new Linear(256, 256, name: "l2 Linear"), new BatchNormalization(256, name: "l2 Norm"), new ReLU(name: "l2 ReLU") ); FunctionStack Layer3 = new FunctionStack( new Linear(256, 256, name: "l3 Linear"), new BatchNormalization(256, name: "l3 Norm"), new ReLU(name: "l3 ReLU") ); FunctionStack Layer4 = new FunctionStack( new Linear(256, 10, name: "l4 Linear") ); //FunctionStack itself is also stacked as Function FunctionStack nn = new FunctionStack ( Layer1, Layer2, Layer3, Layer4 ); FunctionStack cDNI1 = new FunctionStack( new Linear(256 + 10, 1024, name: "cDNI1 Linear1"), new BatchNormalization(1024, name: "cDNI1 Nrom1"), new ReLU(name: "cDNI1 ReLU1"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack cDNI2 = new FunctionStack( new Linear(256 + 10, 1024, name: "cDNI2 Linear1"), new BatchNormalization(1024, name: "cDNI2 Nrom1"), new ReLU(name: "cDNI2 ReLU1"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "cDNI2 Linear3") ); FunctionStack cDNI3 = new FunctionStack( new Linear(256 + 10, 1024, name: "cDNI3 Linear1"), new BatchNormalization(1024, name: "cDNI3 Nrom1"), new ReLU(name: "cDNI3 ReLU1"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "cDNI3 Linear3") ); //Declare optimizer Layer1.SetOptimizer(new Adam(0.00003f)); Layer2.SetOptimizer(new Adam(0.00003f)); Layer3.SetOptimizer(new Adam(0.00003f)); Layer4.SetOptimizer(new Adam(0.00003f)); cDNI1.SetOptimizer(new Adam(0.00003f)); cDNI2.SetOptimizer(new Adam(0.00003f)); cDNI3.SetOptimizer(new Adam(0.00003f)); for (int epoch = 0; epoch < 10; epoch++) { Console.WriteLine("epoch " + (epoch + 1)); //Total error in the whole Real totalLoss = 0; Real cDNI1totalLoss = 0; Real cDNI2totalLoss = 0; Real cDNI3totalLoss = 0; long totalLossCount = 0; long cDNI1totalLossCount = 0; long cDNI2totalLossCount = 0; long cDNI3totalLossCount = 0; //How many times to run the batch for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //Get data randomly from training data TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT); //Run first tier NdArray[] layer1ForwardResult = Layer1.Forward(datasetX.Data); ResultDataSet layer1ResultDataSet = new ResultDataSet(layer1ForwardResult, datasetX.Label); //Get the inclination of the first layer NdArray[] cDNI1Result = cDNI1.Forward(layer1ResultDataSet.GetTrainData()); //Apply the inclination of the first layer layer1ForwardResult[0].Grad = cDNI1Result[0].Data.ToArray(); //Update first layer Layer1.Backward(layer1ForwardResult); layer1ForwardResult[0].ParentFunc = null; Layer1.Update(); //Run Layer 2 NdArray[] layer2ForwardResult = Layer2.Forward(layer1ResultDataSet.Result); ResultDataSet layer2ResultDataSet = new ResultDataSet(layer2ForwardResult, layer1ResultDataSet.Label); //Get inclination of second layer NdArray[] cDNI2Result = cDNI2.Forward(layer2ResultDataSet.GetTrainData()); //Apply the inclination of the second layer layer2ForwardResult[0].Grad = cDNI2Result[0].Data.ToArray(); //Update 2nd tier Layer2.Backward(layer2ForwardResult); layer2ForwardResult[0].ParentFunc = null; //Perform learning of first layer cDNI Real cDNI1loss = new MeanSquaredError().Evaluate(cDNI1Result, new NdArray(layer1ResultDataSet.Result[0].Grad, cDNI1Result[0].Shape, cDNI1Result[0].BatchCount)); Layer2.Update(); cDNI1.Backward(cDNI1Result); cDNI1.Update(); cDNI1totalLoss += cDNI1loss; cDNI1totalLossCount++; //Run Third Tier NdArray[] layer3ForwardResult = Layer3.Forward(layer2ResultDataSet.Result); ResultDataSet layer3ResultDataSet = new ResultDataSet(layer3ForwardResult, layer2ResultDataSet.Label); //Get the inclination of the third layer NdArray[] cDNI3Result = cDNI3.Forward(layer3ResultDataSet.GetTrainData()); //Apply the inclination of the third layer layer3ForwardResult[0].Grad = cDNI3Result[0].Data.ToArray(); //Update third layer Layer3.Backward(layer3ForwardResult); layer3ForwardResult[0].ParentFunc = null; //Perform learning of cDNI for layer 2 Real cDNI2loss = new MeanSquaredError().Evaluate(cDNI2Result, new NdArray(layer2ResultDataSet.Result[0].Grad, cDNI2Result[0].Shape, cDNI2Result[0].BatchCount)); Layer3.Update(); cDNI2.Backward(cDNI2Result); cDNI2.Update(); cDNI2totalLoss += cDNI2loss; cDNI2totalLossCount++; //Run Layer 4 NdArray[] layer4ForwardResult = Layer4.Forward(layer3ResultDataSet.Result); //Get inclination of the fourth layer Real sumLoss = new SoftmaxCrossEntropy().Evaluate(layer4ForwardResult, layer3ResultDataSet.Label); //Update fourth layer Layer4.Backward(layer4ForwardResult); layer4ForwardResult[0].ParentFunc = null; totalLoss += sumLoss; totalLossCount++; //Perform learning of cDNI for the third layer Real cDNI3loss = new MeanSquaredError().Evaluate(cDNI3Result, new NdArray(layer3ResultDataSet.Result[0].Grad, cDNI3Result[0].Shape, cDNI3Result[0].BatchCount)); Layer4.Update(); cDNI3.Backward(cDNI3Result); cDNI3.Update(); cDNI3totalLoss += cDNI3loss; cDNI3totalLossCount++; Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //Result output Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); Console.WriteLine("\ncDNI1 total loss " + cDNI1totalLoss / cDNI1totalLossCount); Console.WriteLine("cDNI2 total loss " + cDNI2totalLoss / cDNI2totalLossCount); Console.WriteLine("cDNI3 total loss " + cDNI3totalLoss / cDNI3totalLossCount); Console.WriteLine("\ncDNI1 local loss " + cDNI1loss); Console.WriteLine("cDNI2 local loss " + cDNI2loss); Console.WriteLine("cDNI3 local loss " + cDNI3loss); //Test the accuracy if you move the batch 20 times if (i % 20 == 0) { Console.WriteLine("\nTesting..."); //Get data randomly from test data TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT); //Run test Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); Console.WriteLine("accuracy " + accuracy); } } } }
const int N = 30; //It operates at 1000 similar to the reference link but it is slow at the CPU public static void Run() { RILogManager.Default?.SendDebug("MNIST Data Loading..."); MnistData mnistData = new MnistData(28); RILogManager.Default?.SendDebug("Training Start..."); //Writing the network configuration in FunctionStack FunctionStack nn = new FunctionStack("Test7", new Linear(true, 28 * 28, N, name: "l1 Linear"), // L1 new BatchNormalization(true, N, name: "l1 BatchNorm"), new ReLU(name: "l1 ReLU"), new Linear(true, N, N, name: "l2 Linear"), // L2 new BatchNormalization(true, N, name: "l2 BatchNorm"), new ReLU(name: "l2 ReLU"), new Linear(true, N, N, name: "l3 Linear"), // L3 new BatchNormalization(true, N, name: "l3 BatchNorm"), new ReLU(name: "l3 ReLU"), new Linear(true, N, N, name: "l4 Linear"), // L4 new BatchNormalization(true, N, name: "l4 BatchNorm"), new ReLU(name: "l4 ReLU"), new Linear(true, N, N, name: "l5 Linear"), // L5 new BatchNormalization(true, N, name: "l5 BatchNorm"), new ReLU(name: "l5 ReLU"), new Linear(true, N, N, name: "l6 Linear"), // L6 new BatchNormalization(true, N, name: "l6 BatchNorm"), new ReLU(name: "l6 ReLU"), new Linear(true, N, N, name: "l7 Linear"), // L7 new BatchNormalization(true, N, name: "l7 BatchNorm"), new ReLU(name: "l7 ReLU"), new Linear(true, N, N, name: "l8 Linear"), // L8 new BatchNormalization(true, N, name: "l8 BatchNorm"), new ReLU(name: "l8 ReLU"), new Linear(true, N, N, name: "l9 Linear"), // L9 new BatchNormalization(true, N, name: "l9 BatchNorm"), new ReLU(name: "l9 ReLU"), new Linear(true, N, N, name: "l10 Linear"), // L10 new BatchNormalization(true, N, name: "l10 BatchNorm"), new ReLU(name: "l10 ReLU"), new Linear(true, N, N, name: "l11 Linear"), // L11 new BatchNormalization(true, N, name: "l11 BatchNorm"), new ReLU(name: "l11 ReLU"), new Linear(true, N, N, name: "l12 Linear"), // L12 new BatchNormalization(true, N, name: "l12 BatchNorm"), new ReLU(name: "l12 ReLU"), new Linear(true, N, N, name: "l13 Linear"), // L13 new BatchNormalization(true, N, name: "l13 BatchNorm"), new ReLU(name: "l13 ReLU"), new Linear(true, N, N, name: "l14 Linear"), // L14 new BatchNormalization(true, N, name: "l14 BatchNorm"), new ReLU(name: "l14 ReLU"), new Linear(true, N, 10, name: "l15 Linear") // L15 ); nn.SetOptimizer(new AdaGrad()); for (int epoch = 0; epoch < 3; epoch++) { Real totalLoss = 0; long totalLossCounter = 0; //Run the batch for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { RILogManager.Default?.SendDebug("epoch " + (epoch + 1) + " of 3, Batch " + i + " of " + TRAIN_DATA_COUNT); //Get data randomly from training data TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT, 28, 28); //Learn Real sumLoss = Trainer.Train(nn, datasetX.Data, datasetX.Label, new SoftmaxCrossEntropy()); totalLoss += sumLoss; totalLossCounter++; if (i % 20 == 0) { RILogManager.Default?.SendDebug("batch count " + i + "/" + TRAIN_DATA_COUNT); RILogManager.Default?.SendDebug("total loss " + totalLoss / totalLossCounter); RILogManager.Default?.SendDebug("local loss " + sumLoss); RILogManager.Default?.SendDebug("Testing random data..."); //Get data randomly from test data TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT, 28); //Run the test Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); RILogManager.Default?.SendDebug("Test Accuracy: " + accuracy); } } } ModelIO.Save(nn, "Test7.nn"); RILogManager.Default?.SendDebug(nn.Describe()); }
public static void Run() { // Prepare MNIST data RILogManager.Default?.SendDebug("MNIST Data Loading..."); MnistData mnistData = new MnistData(28); RILogManager.Default?.SendDebug("Training Start..."); // Write the network configuration in FunctionStack FunctionStack Layer1 = new FunctionStack("Test12 Layer 1", new Linear(true, 28 * 28, 256, name: "l1 Linear"), new BatchNormalization(true, 256, name: "l1 Norm"), new ReLU(name: "l1 ReLU") ); FunctionStack Layer2 = new FunctionStack("Test12 Layer 2", new Linear(true, 256, 256, name: "l2 Linear"), new BatchNormalization(true, 256, name: "l2 Norm"), new ReLU(name: "l2 ReLU") ); FunctionStack Layer3 = new FunctionStack("Test12 Layer 3", new Linear(true, 256, 256, name: "l3 Linear"), new BatchNormalization(true, 256, name: "l3 Norm"), new ReLU(name: "l3 ReLU") ); FunctionStack Layer4 = new FunctionStack("Test12 Layer 4", new Linear(true, 256, 10, name: "l4 Linear") ); // Function stack itself is also stacked as Function FunctionStack nn = new FunctionStack ("Test12", Layer1, Layer2, Layer3, Layer4 ); FunctionStack cDNI1 = new FunctionStack("Test12 DNI 1", new Linear(true, 256 + 10, 1024, name: "cDNI1 Linear1"), new BatchNormalization(true, 1024, name: "cDNI1 Norm1"), new ReLU(name: "cDNI1 ReLU1"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack cDNI2 = new FunctionStack("Test12 DNI 2", new Linear(true, 256 + 10, 1024, name: "cDNI2 Linear1"), new BatchNormalization(true, 1024, name: "cDNI2 Norm1"), new ReLU(name: "cDNI2 ReLU1"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "cDNI2 Linear3") ); FunctionStack cDNI3 = new FunctionStack("Test12 DNI 3", new Linear(true, 256 + 10, 1024, name: "cDNI3 Linear1"), new BatchNormalization(true, 1024, name: "cDNI3 Norm1"), new ReLU(name: "cDNI3 ReLU1"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "cDNI3 Linear3") ); Layer1.SetOptimizer(new Adam("Adam", 0.00003f)); Layer2.SetOptimizer(new Adam("Adam", 0.00003f)); Layer3.SetOptimizer(new Adam("Adam", 0.00003f)); Layer4.SetOptimizer(new Adam("Adam", 0.00003f)); cDNI1.SetOptimizer(new Adam("Adam", 0.00003f)); cDNI2.SetOptimizer(new Adam("Adam", 0.00003f)); cDNI3.SetOptimizer(new Adam("Adam", 0.00003f)); // Describe each function stack; RILogManager.Default?.SendDebug(Layer1.Describe()); RILogManager.Default?.SendDebug(Layer2.Describe()); RILogManager.Default?.SendDebug(Layer3.Describe()); RILogManager.Default?.SendDebug(Layer4.Describe()); RILogManager.Default?.SendDebug(cDNI1.Describe()); RILogManager.Default?.SendDebug(cDNI2.Describe()); RILogManager.Default?.SendDebug(cDNI3.Describe()); for (int epoch = 0; epoch < 10; epoch++) { // Total error in the whole Real totalLoss = 0; Real cDNI1totalLoss = 0; Real cDNI2totalLoss = 0; Real cDNI3totalLoss = 0; long totalLossCount = 0; long cDNI1totalLossCount = 0; long cDNI2totalLossCount = 0; long cDNI3totalLossCount = 0; // how many times to run the batch for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { RILogManager.Default?.SendDebug("epoch: " + (epoch + 1) + " of 10, batch iteration: " + i + " of " + TRAIN_DATA_COUNT); RILogManager.Default?.ViewerSendWatch("Epoch", epoch + 1); RILogManager.Default?.ViewerSendWatch("Batch Iteration", i); // Get data randomly from the training data TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT, 28, 28); // Run first tier NdArray[] layer1ForwardResult = Layer1.Forward(true, datasetX.Data); ResultDataSet layer1ResultDataSet = new ResultDataSet(layer1ForwardResult, datasetX.Label); // Obtain the slope of the first layer NdArray[] cDNI1Result = cDNI1.Forward(true, layer1ResultDataSet.GetTrainData()); // Apply the slope of the first layer layer1ForwardResult[0].Grad = cDNI1Result[0].Data.ToArray(); //Update first layer Layer1.Backward(true, layer1ForwardResult); layer1ForwardResult[0].ParentFunc = null; Layer1.Update(); // Run Layer 2 NdArray[] layer2ForwardResult = Layer2.Forward(true, layer1ResultDataSet.Result); ResultDataSet layer2ResultDataSet = new ResultDataSet(layer2ForwardResult, layer1ResultDataSet.Label); // Get the inclination of the second layer NdArray[] cDNI2Result = cDNI2.Forward(true, layer2ResultDataSet.GetTrainData()); // Apply the slope of the second layer layer2ForwardResult[0].Grad = cDNI2Result[0].Data.ToArray(); //Update layer 2 Layer2.Backward(true, layer2ForwardResult); layer2ForwardResult[0].ParentFunc = null; //Perform learning of first layer cDNI Real cDNI1loss = new MeanSquaredError().Evaluate(cDNI1Result, new NdArray(layer1ResultDataSet.Result[0].Grad, cDNI1Result[0].Shape, cDNI1Result[0].BatchCount)); Layer2.Update(); cDNI1.Backward(true, cDNI1Result); cDNI1.Update(); cDNI1totalLoss += cDNI1loss; cDNI1totalLossCount++; //Run Third Tier NdArray[] layer3ForwardResult = Layer3.Forward(true, layer2ResultDataSet.Result); ResultDataSet layer3ResultDataSet = new ResultDataSet(layer3ForwardResult, layer2ResultDataSet.Label); //Get the inclination of the third layer NdArray[] cDNI3Result = cDNI3.Forward(true, layer3ResultDataSet.GetTrainData()); //Apply the inclination of the third layer layer3ForwardResult[0].Grad = cDNI3Result[0].Data.ToArray(); //Update third layer Layer3.Backward(true, layer3ForwardResult); layer3ForwardResult[0].ParentFunc = null; //Perform learning of cDNI for layer 2 Real cDNI2loss = new MeanSquaredError().Evaluate(cDNI2Result, new NdArray(layer2ResultDataSet.Result[0].Grad, cDNI2Result[0].Shape, cDNI2Result[0].BatchCount)); Layer3.Update(); cDNI2.Backward(true, cDNI2Result); cDNI2.Update(); cDNI2totalLoss += cDNI2loss; cDNI2totalLossCount++; NdArray[] layer4ForwardResult = Layer4.Forward(true, layer3ResultDataSet.Result); Real sumLoss = new SoftmaxCrossEntropy().Evaluate(layer4ForwardResult, layer3ResultDataSet.Label); Layer4.Backward(true, layer4ForwardResult); layer4ForwardResult[0].ParentFunc = null; totalLoss += sumLoss; totalLossCount++; Real cDNI3loss = new MeanSquaredError().Evaluate(cDNI3Result, new NdArray(layer3ResultDataSet.Result[0].Grad, cDNI3Result[0].Shape, cDNI3Result[0].BatchCount)); Layer4.Update(); cDNI3.Backward(true, cDNI3Result); cDNI3.Update(); cDNI3totalLoss += cDNI3loss; cDNI3totalLossCount++; RILogManager.Default?.SendDebug("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); RILogManager.Default?.SendDebug("total loss " + totalLoss / totalLossCount); RILogManager.Default?.SendDebug("local loss " + sumLoss); RILogManager.Default?.SendDebug("\ncDNI1 total loss " + cDNI1totalLoss / cDNI1totalLossCount); RILogManager.Default?.SendDebug("cDNI2 total loss " + cDNI2totalLoss / cDNI2totalLossCount); RILogManager.Default?.SendDebug("cDNI3 total loss " + cDNI3totalLoss / cDNI3totalLossCount); RILogManager.Default?.SendDebug("\ncDNI1 local loss " + cDNI1loss); RILogManager.Default?.SendDebug("cDNI2 local loss " + cDNI2loss); RILogManager.Default?.SendDebug("cDNI3 local loss " + cDNI3loss); if (i % 20 == 0) { RILogManager.Default?.SendDebug("\nTesting..."); TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT, 28); Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); RILogManager.Default?.SendDebug("accuracy " + accuracy); } } } }
public static void Run() { int neuronCount = 28; RILogManager.Default?.SendDebug("MNIST Data Loading..."); MnistData mnistData = new MnistData(neuronCount); RILogManager.Default.SendInformation("Training Start, creating function stack."); SortedFunctionStack nn = new SortedFunctionStack(); SortedList <Function> functions = new SortedList <Function>(); ParallelOptions po = new ParallelOptions(); po.MaxDegreeOfParallelism = 4; for (int x = 0; x < numLayers; x++) { Application.DoEvents(); functions.Add(new Linear(true, neuronCount * neuronCount, N, name: $"l{x} Linear")); functions.Add(new BatchNormalization(true, N, name: $"l{x} BatchNorm")); functions.Add(new ReLU(name: $"l{x} ReLU")); RILogManager.Default.ViewerSendWatch("Total Layers", (x + 1)); } ; RILogManager.Default.SendInformation("Adding Output Layer"); Application.DoEvents(); nn.Add(new Linear(true, N, 10, noBias: false, name: $"l{numLayers + 1} Linear")); RILogManager.Default.ViewerSendWatch("Total Layers", numLayers); RILogManager.Default.SendInformation("Setting Optimizer to AdaGrad"); nn.SetOptimizer(new AdaGrad()); Application.DoEvents(); RunningStatistics stats = new RunningStatistics(); Histogram lossHistogram = new Histogram(); Histogram accuracyHistogram = new Histogram(); Real totalLoss = 0; long totalLossCounter = 0; Real highestAccuracy = 0; Real bestLocalLoss = 0; Real bestTotalLoss = 0; for (int epoch = 0; epoch < 3; epoch++) { RILogManager.Default?.SendDebug("epoch " + (epoch + 1)); RILogManager.Default.SendInformation("epoch " + (epoch + 1)); RILogManager.Default.ViewerSendWatch("epoch", (epoch + 1)); Application.DoEvents(); for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { Application.DoEvents(); TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT, neuronCount, neuronCount); Real sumLoss = Trainer.Train(nn, datasetX.Data, datasetX.Label, new SoftmaxCrossEntropy()); totalLoss += sumLoss; totalLossCounter++; stats.Push(sumLoss); lossHistogram.AddBucket(new Bucket(-10, 10)); accuracyHistogram.AddBucket(new Bucket(-10.0, 10)); if (sumLoss < bestLocalLoss && !double.IsNaN(sumLoss)) { bestLocalLoss = sumLoss; } if (stats.Mean < bestTotalLoss && !double.IsNaN(sumLoss)) { bestTotalLoss = stats.Mean; } try { lossHistogram.AddData(sumLoss); } catch (Exception) { } if (i % 20 == 0) { RILogManager.Default.ViewerSendWatch("Batch Count ", i); RILogManager.Default.ViewerSendWatch("Total/Mean loss", stats.Mean); RILogManager.Default.ViewerSendWatch("Local loss", sumLoss); RILogManager.Default.SendInformation("Batch Count " + i + "/" + TRAIN_DATA_COUNT + ", epoch " + epoch + 1); RILogManager.Default.SendInformation("Total/Mean loss " + stats.Mean); RILogManager.Default.SendInformation("Local loss " + sumLoss); Application.DoEvents(); RILogManager.Default?.SendDebug("Testing..."); TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT, 28); Real accuracy = Trainer.Accuracy(nn, datasetY?.Data, datasetY.Label); if (accuracy > highestAccuracy) { highestAccuracy = accuracy; } RILogManager.Default?.SendDebug("Accuracy: " + accuracy); RILogManager.Default.ViewerSendWatch("Best Accuracy: ", highestAccuracy); RILogManager.Default.ViewerSendWatch("Best Total Loss ", bestTotalLoss); RILogManager.Default.ViewerSendWatch("Best Local Loss ", bestLocalLoss); Application.DoEvents(); try { accuracyHistogram.AddData(accuracy); } catch (Exception) { } } } } ModelIO.Save(nn, Application.StartupPath + "\\test20.nn"); RILogManager.Default?.SendDebug("Best Accuracy: " + highestAccuracy); RILogManager.Default?.SendDebug("Best Total Loss " + bestTotalLoss); RILogManager.Default?.SendDebug("Best Local Loss " + bestLocalLoss); RILogManager.Default.ViewerSendWatch("Best Accuracy: ", highestAccuracy); RILogManager.Default.ViewerSendWatch("Best Total Loss ", bestTotalLoss); RILogManager.Default.ViewerSendWatch("Best Local Loss ", bestLocalLoss); }
const int N = 30; //参考先リンクと同様の1000でも動作するがCPUでは遅いので public static void Run() { //MNISTのデータを用意する Console.WriteLine("MNIST Data Loading..."); MnistData mnistData = new MnistData(); Console.WriteLine("Training Start..."); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack nn = new FunctionStack( new Linear(28 * 28, N, name: "l1 Linear"), // L1 new BatchNormalization(N, name: "l1 BatchNorm"), new ReLU(name: "l1 ReLU"), new Linear(N, N, name: "l2 Linear"), // L2 new BatchNormalization(N, name: "l2 BatchNorm"), new ReLU(name: "l2 ReLU"), new Linear(N, N, name: "l3 Linear"), // L3 new BatchNormalization(N, name: "l3 BatchNorm"), new ReLU(name: "l3 ReLU"), new Linear(N, N, name: "l4 Linear"), // L4 new BatchNormalization(N, name: "l4 BatchNorm"), new ReLU(name: "l4 ReLU"), new Linear(N, N, name: "l5 Linear"), // L5 new BatchNormalization(N, name: "l5 BatchNorm"), new ReLU(name: "l5 ReLU"), new Linear(N, N, name: "l6 Linear"), // L6 new BatchNormalization(N, name: "l6 BatchNorm"), new ReLU(name: "l6 ReLU"), new Linear(N, N, name: "l7 Linear"), // L7 new BatchNormalization(N, name: "l7 BatchNorm"), new ReLU(name: "l7 ReLU"), new Linear(N, N, name: "l8 Linear"), // L8 new BatchNormalization(N, name: "l8 BatchNorm"), new ReLU(name: "l8 ReLU"), new Linear(N, N, name: "l9 Linear"), // L9 new BatchNormalization(N, name: "l9 BatchNorm"), new ReLU(name: "l9 ReLU"), new Linear(N, N, name: "l10 Linear"), // L10 new BatchNormalization(N, name: "l10 BatchNorm"), new ReLU(name: "l10 ReLU"), new Linear(N, N, name: "l11 Linear"), // L11 new BatchNormalization(N, name: "l11 BatchNorm"), new ReLU(name: "l11 ReLU"), new Linear(N, N, name: "l12 Linear"), // L12 new BatchNormalization(N, name: "l12 BatchNorm"), new ReLU(name: "l12 ReLU"), new Linear(N, N, name: "l13 Linear"), // L13 new BatchNormalization(N, name: "l13 BatchNorm"), new ReLU(name: "l13 ReLU"), new Linear(N, N, name: "l14 Linear"), // L14 new BatchNormalization(N, name: "l14 BatchNorm"), new ReLU(name: "l14 ReLU"), new Linear(N, 10, name: "l15 Linear") // L15 ); //この構成では学習が進まない //FunctionStack nn = new FunctionStack( // new Linear(28 * 28, N), // L1 // new ReLU(), // new Linear(N, N), // L2 // new ReLU(), // // (中略) // // new Linear(N, N), // L14 // new ReLU(), // new Linear(N, 10) // L15 //); //optimizerを宣言 nn.SetOptimizer(new AdaGrad()); //三世代学習 for (int epoch = 0; epoch < 3; epoch++) { Console.WriteLine("epoch " + (epoch + 1)); //全体での誤差を集計 //List<Real> totalLoss = new List<Real>(); Real totalLoss = 0; long totalLossCounter = 0; //何回バッチを実行するか for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //訓練データからランダムにデータを取得 TestDataSet datasetX = mnistData.Train.GetRandomDataSet(BATCH_DATA_COUNT); //学習を実行 Real sumLoss = Trainer.Train(nn, datasetX, new SoftmaxCrossEntropy()); totalLoss += sumLoss; totalLossCounter++; //20回バッチを動かしたら精度をテストする if (i % 20 == 0) { //結果出力 Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); Console.WriteLine("total loss " + totalLoss / totalLossCounter); Console.WriteLine("local loss " + sumLoss); Console.WriteLine(""); Console.WriteLine("Testing..."); //テストデータからランダムにデータを取得 TestDataSet datasetY = mnistData.Eval.GetRandomDataSet(TEST_DATA_COUNT); //テストを実行 Real accuracy = Trainer.Accuracy(nn, datasetY); Console.WriteLine("accuracy " + accuracy); } } } }
public static void Run() { Stopwatch sw = new Stopwatch(); //MNISTのデータを用意する Console.WriteLine("MNIST Data Loading..."); MnistData mnistData = new MnistData(); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack nn = new FunctionStack( new Convolution2D(1, 32, 5, pad: 2, name: "l1 Conv2D", gpuEnable: true), new ReLU(name: "l1 ReLU"), //new AveragePooling(2, 2, name: "l1 AVGPooling"), new MaxPooling2D(2, 2, name: "l1 MaxPooling", gpuEnable: true), new Convolution2D(32, 64, 5, pad: 2, name: "l2 Conv2D", gpuEnable: true), new ReLU(name: "l2 ReLU"), //new AveragePooling(2, 2, name: "l2 AVGPooling"), new MaxPooling2D(2, 2, name: "l2 MaxPooling", gpuEnable: true), new Linear(7 * 7 * 64, 1024, name: "l3 Linear", gpuEnable: true), new ReLU(name: "l3 ReLU"), new Dropout(name: "l3 DropOut"), new Linear(1024, 10, name: "l4 Linear", gpuEnable: true) ); //optimizerを宣言 nn.SetOptimizer(new Adam()); Console.WriteLine("Training Start..."); //三世代学習 for (int epoch = 1; epoch < 3; epoch++) { Console.WriteLine("epoch " + epoch); //全体での誤差を集計 Real totalLoss = 0; long totalLossCount = 0; //何回バッチを実行するか for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { sw.Restart(); Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //訓練データからランダムにデータを取得 TestDataSet datasetX = mnistData.Train.GetRandomDataSet(BATCH_DATA_COUNT); //バッチ学習を並列実行する Real sumLoss = Trainer.Train(nn, datasetX, new SoftmaxCrossEntropy()); totalLoss += sumLoss; totalLossCount++; //結果出力 Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); sw.Stop(); Console.WriteLine("time" + sw.Elapsed.TotalMilliseconds); //20回バッチを動かしたら精度をテストする if (i % 20 == 0) { Console.WriteLine("\nTesting..."); //テストデータからランダムにデータを取得 TestDataSet datasetY = mnistData.Eval.GetRandomDataSet(TEACH_DATA_COUNT); //テストを実行 Real accuracy = Trainer.Accuracy(nn, datasetY); Console.WriteLine("accuracy " + accuracy); } } } }
const Real L2_SCALE = 1e-4f; //l2 loss scale public static void Run() { //MNISTのデータを用意する Console.WriteLine("MNIST data loading..."); MnistData <Real> mnistData = new MnistData <Real>(); //テストデータから全データを取得 TestDataSet <Real> datasetY = mnistData.Eval.GetAllDataSet(); Console.WriteLine("\nNetwork initializing..."); int numBatches = mnistData.Train.Length / BATCH_SIZE; // 600 = 60000 / 100 int batchPerEpoch = mnistData.Train.Length / BATCH_SIZE; int[] boundaries = { LR_DROP_EPOCH *batchPerEpoch, (LR_DROP_EPOCH + 20) * batchPerEpoch }; Dictionary <string, Real> customSparsities = new Dictionary <string, Real> { { "layer2", END_SPARSITY *SPARSITY_SCALE }, { "layer3", END_SPARSITY * 0 } }; MaskedLinear <Real> layer1 = new MaskedLinear <Real>(28 * 28, 300, name: "layer1", gpuEnable: true); MaskedLinear <Real> layer2 = new MaskedLinear <Real>(300, 100, name: "layer2", gpuEnable: true); MaskedLinear <Real> layer3 = new MaskedLinear <Real>(100, 10, name: "layer3", gpuEnable: true); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack <Real> nn = new FunctionStack <Real>( layer1, new ReLU <Real>(name: "l1 ReLU"), layer2, new ReLU <Real>(name: "l2 ReLU"), layer3 ); SoftmaxCrossEntropy <Real> sce = new SoftmaxCrossEntropy <Real>(); WeightDecay <Real> weightDecay = new WeightDecay <Real>(L2_SCALE); weightDecay.AddParameters(layer1.Weight, layer2.Weight, layer3.Weight); MomentumSGD <Real> mSGD = new MomentumSGD <Real>(LEARNING_RATE); mSGD.SetUp(nn); var opt = new SparseRigLOptimizer(mSGD, MASKUPDATE_BEGIN_STEP, MASKUPDATE_END_STEP, MASKUPDATE_FREQUENCY, DROP_FRACTION, "cosine", "zeros", RIGL_ACC_SCALE); NdArray <Real>[] allMasks = { layer1.Mask, layer2.Mask, layer3.Mask, }; string[] LayerNames = { layer1.Name, layer2.Name, layer3.Name, }; NdArray <Real>[] allWights = { layer1.Weight, layer2.Weight, layer3.Weight, }; //マスクの初期化 SparseUtils.MaskInit(allMasks, LayerNames, "erdos_renyi", END_SPARSITY, customSparsities); Console.WriteLine("[Global sparsity] " + SparseUtils.CalculateSparsity(allMasks)); var weightSparsity = GetWeightSparsity(allMasks); Console.WriteLine("[Sparsity] Layer0, Layer1 : " + weightSparsity[0] + ", " + weightSparsity[1]); Console.WriteLine("\nTraining Start..."); //学習開始 for (int i = 0; i < NUM_EPOCHS * numBatches; i++) { //訓練データからランダムにデータを取得 TestDataSet <Real> datasetX = mnistData.Train.GetRandomDataSet(BATCH_SIZE); //バッチ学習を実行する NdArray <Real> y = nn.Forward(datasetX.Data)[0]; Real loss = sce.Evaluate(y, datasetX.Label); nn.Backward(y); weightDecay.Update(); opt._optimizer.LearningRate = PiecewiseConstant(opt._optimizer.UpdateCount, boundaries, LEARNING_RATE); opt.condMaskUpdate(allMasks, allWights); ////10回毎に結果出力 //if (i % 10 + 1 == 10) //{ // Console.WriteLine("\nbatch count:" + (i + 1) + " (lr:" + opt._optimizer.LearningRate + ")"); // Console.WriteLine("loss " + loss); //} //精度をテストする if (i % numBatches + 1 == numBatches) { Console.WriteLine("\nEpoch:" + Math.Floor((i + 1) / (Real)numBatches) + " Iteration:" + (i + 1) + " Testing... "); //テストを実行 Real accuracy = Trainer.Accuracy(nn, datasetY, new SoftmaxCrossEntropy <Real>(), out loss); Console.WriteLine("loss: " + loss); Console.WriteLine("accuracy: " + accuracy); } } }
// MNIST accuracy tester public static void Run(double accuracyThreshold = .9979D) { MnistData mnistData = new MnistData(28); Real maxAccuracy = 0; //Number of middle layers const int N = 30; //It operates at 1000 similar to the reference link but it is slow at the CPU ReflectInsight ri = new ReflectInsight("Test21"); ri.Enabled = true; RILogManager.Add("Test21", "Test21"); RILogManager.SetDefault("Test21"); //FunctionStack nn = new FunctionStack("Test21", // new Linear(28 * 28, 1024, name: "l1 Linear"), // new Sigmoid(name: "l1 Sigmoid"), // new Linear(1024, 10, name: "l2 Linear") //); //nn.SetOptimizer(new MomentumSGD()); FunctionStack nn = new FunctionStack("Test7", new Linear(true, 28 * 28, N, name: "l1 Linear"), // L1 new BatchNormalization(true, N, name: "l1 BatchNorm"), new ReLU(name: "l1 ReLU"), new Linear(true, N, N, name: "l2 Linear"), // L2 new BatchNormalization(true, N, name: "l2 BatchNorm"), new ReLU(name: "l2 ReLU"), new Linear(true, N, N, name: "l3 Linear"), // L3 new BatchNormalization(true, N, name: "l3 BatchNorm"), new ReLU(name: "l3 ReLU"), new Linear(true, N, N, name: "l4 Linear"), // L4 new BatchNormalization(true, N, name: "l4 BatchNorm"), new ReLU(name: "l4 ReLU"), new Linear(true, N, N, name: "l5 Linear"), // L5 new BatchNormalization(true, N, name: "l5 BatchNorm"), new ReLU(name: "l5 ReLU"), new Linear(true, N, N, name: "l6 Linear"), // L6 new BatchNormalization(true, N, name: "l6 BatchNorm"), new ReLU(name: "l6 ReLU"), new Linear(true, N, N, name: "l7 Linear"), // L7 new BatchNormalization(true, N, name: "l7 BatchNorm"), new ReLU(name: "l7 ReLU"), new Linear(true, N, N, name: "l8 Linear"), // L8 new BatchNormalization(true, N, name: "l8 BatchNorm"), new ReLU(name: "l8 ReLU"), new Linear(true, N, N, name: "l9 Linear"), // L9 new BatchNormalization(true, N, name: "l9 BatchNorm"), new ReLU(name: "l9 ReLU"), new Linear(true, N, N, name: "l10 Linear"), // L10 new BatchNormalization(true, N, name: "l10 BatchNorm"), new ReLU(name: "l10 ReLU"), new Linear(true, N, N, name: "l11 Linear"), // L11 new BatchNormalization(true, N, name: "l11 BatchNorm"), new ReLU(name: "l11 ReLU"), new Linear(true, N, N, name: "l12 Linear"), // L12 new BatchNormalization(true, N, name: "l12 BatchNorm"), new ReLU(name: "l12 ReLU"), new Linear(true, N, N, name: "l13 Linear"), // L13 new BatchNormalization(true, N, name: "l13 BatchNorm"), new ReLU(name: "l13 ReLU"), new Linear(true, N, N, name: "l14 Linear"), // L14 new BatchNormalization(true, N, name: "l14 BatchNorm"), new ReLU(name: "l14 ReLU"), new Linear(true, N, 10, name: "l15 Linear") // L15 ); // 0.0005 - 97.5, 0.001, 0.00146 double alpha = 0.001; double beta1 = 0.9D; double beta2 = 0.999D; double epsilon = 1e-8; nn.SetOptimizer(new Adam("Adam21", alpha, beta1, beta2, epsilon)); Stopwatch sw = new Stopwatch(); sw.Start(); for (int epoch = 0; epoch < 3; epoch++) { Real totalLoss = 0; long totalLossCount = 0; for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT, 28, 28); Real sumLoss = Trainer.Train(nn, datasetX.Data, datasetX.Label, new SoftmaxCrossEntropy()); totalLoss = sumLoss; totalLossCount++; if (i % 20 == 0) { TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT, 28); Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label, false); if (accuracy > maxAccuracy) { maxAccuracy = accuracy; } Passed = (accuracy >= accuracyThreshold); sw.Stop(); ri.ViewerSendWatch("Iteration", "epoch " + (epoch + 1) + " of 3, batch " + i + " of " + TRAIN_DATA_COUNT); ri.ViewerSendWatch("Max Accuracy", maxAccuracy * 100 + "%"); ri.ViewerSendWatch("Current Accuracy", accuracy * 100 + "%"); ri.ViewerSendWatch("Total Loss ", totalLoss / totalLossCount); ri.ViewerSendWatch("Elapsed Time", Helpers.FormatTimeSpan(sw.Elapsed)); ri.ViewerSendWatch("Accuracy Threshold", Passed ? "Passed" : "Not Passed"); sw.Start(); } } sw.Stop(); ri.SendInformation("Total Processing Time: " + Helpers.FormatTimeSpan(sw.Elapsed)); } }
public static void Run() { // Prepare MNIST data RILogManager.Default?.SendDebug("MNIST Data Loading..."); MnistData mnistData = new MnistData(28); RILogManager.Default?.SendDebug("Training Start..."); // Write the network configuration in FunctionStack FunctionStack Layer1 = new FunctionStack("Test11 Layer 1", new Linear(true, 28 * 28, 256, name: "l1 Linear"), new BatchNormalization(true, 256, name: "l1 Norm"), new ReLU(name: "l1 ReLU") ); FunctionStack Layer2 = new FunctionStack("Test11 Layer 2", new Linear(true, 256, 256, name: "l2 Linear"), new BatchNormalization(true, 256, name: "l2 Norm"), new ReLU(name: "l2 ReLU") ); FunctionStack Layer3 = new FunctionStack("Test11 Layer 3", new Linear(true, 256, 256, name: "l3 Linear"), new BatchNormalization(true, 256, name: "l3 Norm"), new ReLU(name: "l3 ReLU") ); FunctionStack Layer4 = new FunctionStack("Test11 Layer 4", new Linear(true, 256, 10, name: "l4 Linear") ); // Function stack itself is also stacked as Function FunctionStack nn = new FunctionStack ("Test11", Layer1, Layer2, Layer3, Layer4 ); FunctionStack DNI1 = new FunctionStack("Test11 DNI1", new Linear(true, 256, 1024, name: "DNI1 Linear1"), new BatchNormalization(true, 1024, name: "DNI1 Norm1"), new ReLU(name: "DNI1 ReLU1"), new Linear(true, 1024, 1024, name: "DNI1 Linear2"), new BatchNormalization(true, 1024, name: "DNI1 Norm2"), new ReLU(name: "DNI1 ReLU2"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack DNI2 = new FunctionStack("Test11 DNI2", new Linear(true, 256, 1024, name: "DNI2 Linear1"), new BatchNormalization(true, 1024, name: "DNI2 Norm1"), new ReLU(name: "DNI2 ReLU1"), new Linear(true, 1024, 1024, name: "DNI2 Linear2"), new BatchNormalization(true, 1024, name: "DNI2 Norm2"), new ReLU(name: "DNI2 ReLU2"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "DNI2 Linear3") ); FunctionStack DNI3 = new FunctionStack("Test11 DNI3", new Linear(true, 256, 1024, name: "DNI3 Linear1"), new BatchNormalization(true, 1024, name: "DNI3 Norm1"), new ReLU(name: "DNI3 ReLU1"), new Linear(true, 1024, 1024, name: "DNI3 Linear2"), new BatchNormalization(true, 1024, name: "DNI3 Norm2"), new ReLU(name: "DNI3 ReLU2"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "DNI3 Linear3") ); //optimizer Layer1.SetOptimizer(new Adam()); Layer2.SetOptimizer(new Adam()); Layer3.SetOptimizer(new Adam()); Layer4.SetOptimizer(new Adam()); DNI1.SetOptimizer(new Adam()); DNI2.SetOptimizer(new Adam()); DNI3.SetOptimizer(new Adam()); // Three generations learning for (int epoch = 0; epoch < 20; epoch++) { RILogManager.Default?.SendDebug("epoch " + (epoch + 1)); Real totalLoss = 0; Real DNI1totalLoss = 0; Real DNI2totalLoss = 0; Real DNI3totalLoss = 0; long totalLossCount = 0; long DNI1totalLossCount = 0; long DNI2totalLossCount = 0; long DNI3totalLossCount = 0; // how many times to run the batch for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { // Get data randomly from the training data TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT, 28, 28); // Run first tier NdArray[] layer1ForwardResult = Layer1.Forward(true, datasetX.Data); // Obtain the slope of the first layer NdArray[] DNI1Result = DNI1.Forward(true, layer1ForwardResult); // Apply the slope of the first layer layer1ForwardResult[0].Grad = DNI1Result[0].Data.ToArray(); // Update first layer Layer1.Backward(true, layer1ForwardResult); layer1ForwardResult[0].ParentFunc = null; // Backward was executed and cut off calculation graph Layer1.Update(); // Run Layer 2 NdArray[] layer2ForwardResult = Layer2.Forward(true, layer1ForwardResult); // Get the inclination of the second layer NdArray[] DNI2Result = DNI2.Forward(true, layer2ForwardResult); // Apply the slope of the second layer layer2ForwardResult[0].Grad = DNI2Result[0].Data.ToArray(); // Update layer 2 Layer2.Backward(true, layer2ForwardResult); layer2ForwardResult[0].ParentFunc = null; // Learn DNI for first tier Real DNI1loss = new MeanSquaredError().Evaluate(DNI1Result, new NdArray(layer1ForwardResult[0].Grad, DNI1Result[0].Shape, DNI1Result[0].BatchCount)); Layer2.Update(); DNI1.Backward(true, DNI1Result); DNI1.Update(); DNI1totalLoss += DNI1loss; DNI1totalLossCount++; // run layer 3 NdArray[] layer3ForwardResult = Layer3.Forward(true, layer2ForwardResult); // Get the inclination of the third layer NdArray[] DNI3Result = DNI3.Forward(true, layer3ForwardResult); // Apply the slope of the third layer layer3ForwardResult[0].Grad = DNI3Result[0].Data.ToArray(); // Update layer 3 Layer3.Backward(true, layer3ForwardResult); layer3ForwardResult[0].ParentFunc = null; // Run DNI learning for layer 2 Real DNI2loss = new MeanSquaredError().Evaluate(DNI2Result, new NdArray(layer2ForwardResult[0].Grad, DNI2Result[0].Shape, DNI2Result[0].BatchCount)); Layer3.Update(); DNI2.Backward(true, DNI2Result); DNI2.Update(); DNI2totalLoss += DNI2loss; DNI2totalLossCount++; // run layer 4 NdArray[] layer4ForwardResult = Layer4.Forward(true, layer3ForwardResult); // Obtain the slope of the fourth layer Real sumLoss = new SoftmaxCrossEntropy().Evaluate(layer4ForwardResult, datasetX.Label); // Update fourth layer Layer4.Backward(true, layer4ForwardResult); layer4ForwardResult[0].ParentFunc = null; totalLoss += sumLoss; totalLossCount++; // Run DNI learning for layer 3 Real DNI3loss = new MeanSquaredError().Evaluate(DNI3Result, new NdArray(layer3ForwardResult[0].Grad, DNI3Result[0].Shape, DNI3Result[0].BatchCount)); Layer4.Update(); DNI3.Backward(true, DNI3Result); DNI3.Update(); DNI3totalLoss += DNI3loss; DNI3totalLossCount++; RILogManager.Default?.SendDebug("batch count " + i + "/" + TRAIN_DATA_COUNT); RILogManager.Default?.SendDebug("total loss " + totalLoss / totalLossCount); RILogManager.Default?.SendDebug("local loss " + sumLoss); RILogManager.Default?.SendDebug("DNI1 total loss " + DNI1totalLoss / DNI1totalLossCount); RILogManager.Default?.SendDebug("DNI2 total loss " + DNI2totalLoss / DNI2totalLossCount); RILogManager.Default?.SendDebug("DNI3 total loss " + DNI3totalLoss / DNI3totalLossCount); RILogManager.Default?.SendDebug("DNI1 local loss " + DNI1loss); RILogManager.Default?.SendDebug("DNI2 local loss " + DNI2loss); RILogManager.Default?.SendDebug("DNI3 local loss " + DNI3loss); // Test the accuracy if you move the batch 20 times if (i % 20 == 0) { RILogManager.Default?.SendDebug("Testing..."); // Get data randomly from test data TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT, 28); // Run test Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); RILogManager.Default?.SendDebug("accuracy " + accuracy); } } } }