public static void Run() { DataMaker dataMaker = new DataMaker(STEPS_PER_CYCLE, NUMBER_OF_CYCLES); NdArray trainData = dataMaker.Make(); FunctionStack model = new FunctionStack("Test8", new Linear(true, 1, 5, name: "Linear l1"), new LSTM(true, 5, 5, name: "LSTM l2"), new Linear(true, 5, 1, name: "Linear l3") ); model.SetOptimizer(new Adam()); RILogManager.Default?.SendDebug("Training..."); for (int epoch = 0; epoch < TRAINING_EPOCHS; epoch++) { NdArray[] sequences = dataMaker.MakeMiniBatch(trainData, MINI_BATCH_SIZE, LENGTH_OF_SEQUENCE); Real loss = ComputeLoss(model, sequences); model.Update(); model.ResetState(); if (epoch != 0 && epoch % DISPLAY_EPOCH == 0) { RILogManager.Default?.SendDebug("[{0}]training loss:\t{1}", epoch, loss); } } RILogManager.Default?.SendDebug("Testing..."); NdArray[] testSequences = dataMaker.MakeMiniBatch(trainData, MINI_BATCH_SIZE, LENGTH_OF_SEQUENCE); int sample_index = 45; predict(testSequences[sample_index], model, PREDICTION_LENGTH); }
public static void Run() { Stopwatch sw = new Stopwatch(); RILogManager.Default?.SendDebug("CIFAR Data Loading..."); CifarData cifarData = new CifarData(); FunctionStack nn = new FunctionStack("Test18", new Convolution2D(true, 3, 32, 3, name: "l1 Conv2D", gpuEnable: true), new ReLU(name: "l1 ReLU"), new MaxPooling(2, name: "l1 MaxPooling", gpuEnable: false), new Dropout(0.25, name: "l1 DropOut"), new Convolution2D(true, 32, 64, 3, name: "l2 Conv2D", gpuEnable: false), new ReLU(name: "l2 ReLU"), new MaxPooling(2, 2, name: "l2 MaxPooling", gpuEnable: false), new Dropout(0.25, name: "l2 DropOut"), new Linear(true, 13 * 13 * 64, 512, name: "l3 Linear", gpuEnable: false), new ReLU(name: "l3 ReLU"), new Dropout(name: "l3 DropOut"), new Linear(true, 512, 10, name: "l4 Linear", gpuEnable: false) ); nn.SetOptimizer(new AdaDelta()); RILogManager.Default?.SendDebug("Training Start..."); for (int epoch = 1; epoch < 3; epoch++) { RILogManager.Default?.SendDebug("epoch " + epoch); Real totalLoss = 0; long totalLossCount = 0; for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { sw.Restart(); RILogManager.Default?.SendDebug("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); TestData.TestDataSet datasetX = cifarData.GetRandomXSet(BATCH_DATA_COUNT); Real sumLoss = Trainer.Train(nn, datasetX.Data, datasetX.Label, new SoftmaxCrossEntropy()); totalLoss += sumLoss; totalLossCount++; RILogManager.Default?.SendDebug("total loss " + totalLoss / totalLossCount); RILogManager.Default?.SendDebug("local loss " + sumLoss); sw.Stop(); RILogManager.Default?.SendDebug("time " + sw.Elapsed.TotalMilliseconds); if (i % 20 == 0) { RILogManager.Default?.SendDebug("\nTesting..."); TestDataSet datasetY = cifarData.GetRandomYSet(TEACH_DATA_COUNT); Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); RILogManager.Default?.SendDebug("accuracy " + accuracy); } } } }
public static void Run() { //MNISTのデータを用意する Console.WriteLine("MNIST Data Loading..."); MnistData mnistData = new MnistData(); Console.WriteLine("Training Start..."); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack nn = new FunctionStack( new Linear(28 * 28, 1024, name: "l1 Linear"), new Sigmoid(name: "l1 Sigmoid"), new Linear(1024, 10, name: "l2 Linear") ); //optimizerを宣言 nn.SetOptimizer(new MomentumSGD()); //三世代学習 for (int epoch = 0; epoch < 3; epoch++) { Console.WriteLine("epoch " + (epoch + 1)); //全体での誤差を集計 Real totalLoss = 0; long totalLossCount = 0; //何回バッチを実行するか for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //訓練データからランダムにデータを取得 TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT); //バッチ学習を並列実行する Real sumLoss = Trainer.Train(nn, datasetX.Data, datasetX.Label, new SoftmaxCrossEntropy()); totalLoss = sumLoss; totalLossCount++; //20回バッチを動かしたら精度をテストする if (i % 20 == 0) { Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //結果出力 Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); Console.WriteLine("\nTesting..."); //テストデータからランダムにデータを取得 TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT); //テストを実行 Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); Console.WriteLine("accuracy " + accuracy); } } } }
public static void Run() { //Prepare MNIST data Console.WriteLine("MNIST Data Loading..."); MnistData mnistData = new MnistData(); Console.WriteLine("Training Start..."); //Writing the network configuration in FunctionStack FunctionStack nn = new FunctionStack( new Linear(28 * 28, 1024, name: "l1 Linear"), new Sigmoid(name: "l1 Sigmoid"), new Linear(1024, 10, name: "l2 Linear") ); //Declare optimizer nn.SetOptimizer(new MomentumSGD()); //Three generations learning for (int epoch = 0; epoch < 3; epoch++) { Console.WriteLine("epoch " + (epoch + 1)); //Total error in the whole Real totalLoss = 0; long totalLossCount = 0; //How many times to run the batch for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //Get data randomly from training data TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT); //Execute batch learning in parallel Real sumLoss = Trainer.Train(nn, datasetX.Data, datasetX.Label, new SoftmaxCrossEntropy()); totalLoss = sumLoss; totalLossCount++; //Test the accuracy if you move the batch 20 times if (i % 20 == 0) { Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //Result output Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); Console.WriteLine("\nTesting..."); //Get data randomly from test data TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT); //Run test Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); Console.WriteLine("accuracy " + accuracy); } } } }
public static void Run() { //Number of exercises const int learningCount = 10000; //Training data Real[][] trainData = { new Real[] { 0, 0 }, new Real[] { 1, 0 }, new Real[] { 0, 1 }, new Real[] { 1, 1 } }; //Training data label Real[][] trainLabel = { new Real[] { 0 }, new Real[] { 1 }, new Real[] { 1 }, new Real[] { 0 } }; //Writing the network configuration in FunctionStack FunctionStack nn = new FunctionStack( new Linear(2, 2, name: "l1 Linear"), new ReLU(name: "l1 ReLU"), new Linear(2, 1, name: "l2 Linear") ); //Declare optimizer (Adam in this time) nn.SetOptimizer(new Adam()); //Training loop Console.WriteLine("Training..."); for (int i = 0; i < learningCount; i++) { //This time use MeanSquaredError for loss function Trainer.Train(nn, trainData[0], trainLabel[0], new MeanSquaredError(), false); Trainer.Train(nn, trainData[1], trainLabel[1], new MeanSquaredError(), false); Trainer.Train(nn, trainData[2], trainLabel[2], new MeanSquaredError(), false); Trainer.Train(nn, trainData[3], trainLabel[3], new MeanSquaredError(), false); //If you do not update every time after training, you can update it as a mini batch nn.Update(); } //Show training results Console.WriteLine("Test Start..."); foreach (Real[] val in trainData) { NdArray result = nn.Predict(val)[0]; Console.WriteLine(val[0] + " xor " + val[1] + " = " + (result.Data[0] > 0.5 ? 1 : 0) + " " + result); } }
public static void Run() { //訓練回数 const int learningCount = 10000; //訓練データ Real[][] trainData = { new Real[] { 0, 0 }, new Real[] { 1, 0 }, new Real[] { 0, 1 }, new Real[] { 1, 1 } }; //訓練データラベル Real[][] trainLabel = { new Real[] { 0 }, new Real[] { 1 }, new Real[] { 1 }, new Real[] { 0 } }; //ネットワークの構成を FunctionStack に書き連ねる FunctionStack nn = new FunctionStack( new Linear(2, 2, name: "l1 Linear"), new ReLU(name: "l1 ReLU"), new Linear(2, 1, name: "l2 Linear") ); //optimizerを宣言(今回はAdam) nn.SetOptimizer(new Adam()); //訓練ループ Console.WriteLine("Training..."); for (int i = 0; i < learningCount; i++) { //今回はロス関数にMeanSquaredErrorを使う Trainer.Train(nn, trainData[0], trainLabel[0], new MeanSquaredError(), false); Trainer.Train(nn, trainData[1], trainLabel[1], new MeanSquaredError(), false); Trainer.Train(nn, trainData[2], trainLabel[2], new MeanSquaredError(), false); Trainer.Train(nn, trainData[3], trainLabel[3], new MeanSquaredError(), false); //訓練後に毎回更新を実行しなければ、ミニバッチとして更新できる nn.Update(); } //訓練結果を表示 Console.WriteLine("Test Start..."); foreach (Real[] val in trainData) { NdArray result = nn.Predict(val)[0]; Console.WriteLine(val[0] + " xor " + val[1] + " = " + (result.Data[0] > 0.5 ? 1 : 0) + " " + result); } }
public static void Run() { Real[][] trainData = new Real[N][]; Real[][] trainLabel = new Real[N][]; for (int i = 0; i < N; i++) { //Sin波を一周期分用意 Real radian = -Math.PI + Math.PI * 2.0 * i / (N - 1); trainData[i] = new[] { radian }; trainLabel[i] = new Real[] { Math.Sin(radian) }; } //ネットワークの構成を FunctionStack に書き連ねる FunctionStack nn = new FunctionStack( new Linear(1, 4, name: "l1 Linear"), new TanhActivation(name: "l1 Tanh"), new Linear(4, 1, name: "l2 Linear") ); //optimizerの宣言 nn.SetOptimizer(new SGD(0.1)); //訓練ループ for (int i = 0; i < EPOCH; i++) { //誤差集計用 Real loss = 0; for (int j = 0; j < N; j++) { //ネットワークは訓練を実行すると戻り値に誤差が返ってくる loss += Trainer.Train(nn, trainData[j], trainLabel[j], new MeanSquaredError()); } if (i % (EPOCH / 10) == 0) { Console.WriteLine("loss:" + loss / N); Console.WriteLine(""); } } //訓練結果を表示 Console.WriteLine("Test Start..."); foreach (Real[] val in trainData) { Console.WriteLine(val[0] + ":" + nn.Predict(val)[0].Data[0]); } }
public static void Run() { Real[][] trainData = new Real[N][]; Real[][] trainLabel = new Real[N][]; for (int i = 0; i < N; i++) { //Prepare Sin wave for one cycle Real radian = -Math.PI + Math.PI * 2.0 * i / (N - 1); trainData[i] = new[] { radian }; trainLabel[i] = new Real[] { Math.Sin(radian) }; } //Writing the network configuration in FunctionStack FunctionStack nn = new FunctionStack( new Linear(1, 4, name: "l1 Linear"), new Tanh(name: "l1 Tanh"), new Linear(4, 1, name: "l2 Linear") ); //Declaration of optimizer nn.SetOptimizer(new SGD()); //Training loop for (int i = 0; i < EPOCH; i++) { //For error aggregation Real loss = 0; for (int j = 0; j < N; j++) { //When training is executed in the network, an error is returned to the return value loss += Trainer.Train(nn, trainData[j], trainLabel[j], new MeanSquaredError()); } if (i % (EPOCH / 10) == 0) { Console.WriteLine("loss:" + loss / N); Console.WriteLine(""); } } //Show training results Console.WriteLine("Test Start..."); foreach (Real[] val in trainData) { Console.WriteLine(val[0] + ":" + nn.Predict(val)[0].Data[0]); } }
public static void Run() { RILogManager.Default?.SendDebug("MNIST Data Loading..."); RILogManager.Default?.SendDebug("MNIST Data Loading..."); MnistData mnistData = new MnistData(28); RILogManager.Default?.SendDebug("Training Start..."); FunctionStack nn = new FunctionStack("Test4", new Linear(true, 28 * 28, 1024, name: "l1 Linear"), new Sigmoid(name: "l1 Sigmoid"), new Linear(true, 1024, 10, name: "l2 Linear") ); nn.SetOptimizer(new MomentumSGD()); for (int epoch = 0; epoch < 3; epoch++) { RILogManager.Default?.SendDebug("epoch " + (epoch + 1)); Real totalLoss = 0; long totalLossCount = 0; for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //Get data randomly from training data TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT, 28, 28); Real sumLoss = Trainer.Train(nn, datasetX.Data, datasetX.Label, new SoftmaxCrossEntropy()); totalLoss = sumLoss; totalLossCount++; if (i % 20 == 0) { RILogManager.Default?.SendDebug("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); RILogManager.Default?.SendDebug("total loss " + totalLoss / totalLossCount); RILogManager.Default?.SendDebug("local loss " + sumLoss); RILogManager.Default?.SendDebug("\nTesting..."); //Get data randomly from test data TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT, 28); Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); RILogManager.Default?.SendDebug("accuracy " + accuracy); } } } }
public static void Run() { const int learningCount = 10000; Real[][] trainData = { new Real[] { 0, 0 }, new Real[] { 1, 0 }, new Real[] { 0, 1 }, new Real[] { 1, 1 } }; Real[][] trainLabel = { new Real[] { 0 }, new Real[] { 1 }, new Real[] { 1 }, new Real[] { 0 } }; FunctionStack nn = new FunctionStack("Test2", new Linear(true, 2, 2, name: "l1 Linear"), new ReLU(name: "l1 ReLU"), new Linear(true, 2, 1, name: "l2 Linear")); nn.SetOptimizer(new AdaGrad()); RILogManager.Default?.SendDebug("Training..."); for (int i = 0; i < learningCount; i++) { //use MeanSquaredError for loss function Trainer.Train(nn, trainData[0], trainLabel[0], new MeanSquaredError(), false); Trainer.Train(nn, trainData[1], trainLabel[1], new MeanSquaredError(), false); Trainer.Train(nn, trainData[2], trainLabel[2], new MeanSquaredError(), false); Trainer.Train(nn, trainData[3], trainLabel[3], new MeanSquaredError(), false); //If you do not update every time after training, you can update it as a mini batch nn.Update(); } RILogManager.Default?.SendDebug("Test Start..."); foreach (Real[] val in trainData) { NdArray result = nn.Predict(true, val)[0]; RILogManager.Default?.SendDebug($"{val[0]} xor {val[1]} = {(result.Data[0] > 0.5 ? 1 : 0)} {result}"); } }
public static void Run() { Real[][] trainData = new Real[N][]; Real[][] trainLabel = new Real[N][]; for (int i = 0; i < N; i++) { //Prepare Sin wave for one cycle Real radian = -Math.PI + Math.PI * 2.0 * i / (N - 1); trainData[i] = new[] { radian }; trainLabel[i] = new Real[] { Math.Sin(radian) }; } FunctionStack nn = new FunctionStack("Test3", new Linear(true, 1, 4, name: "l1 Linear"), new Tanh(name: "l1 Tanh"), new Linear(true, 4, 1, name: "l2 Linear") ); nn.SetOptimizer(new SGD()); for (int i = 0; i < EPOCH; i++) { Real loss = 0; for (int j = 0; j < N; j++) { //When training is executed in the network, an error is returned to the return value loss += Trainer.Train(nn, trainData[j], trainLabel[j], new MeanSquaredError()); } if (i % (EPOCH / 10) == 0) { RILogManager.Default?.SendDebug("loss:" + loss / N); RILogManager.Default?.SendDebug(""); } } RILogManager.Default?.SendDebug("Test Start..."); foreach (Real[] val in trainData) { RILogManager.Default?.SendDebug(val[0] + ":" + nn.Predict(true, val)[0].Data[0]); } }
public static void Run() { DataMaker dataMaker = new DataMaker(STEPS_PER_CYCLE, NUMBER_OF_CYCLES); NdArray trainData = dataMaker.Make(); //Network configuration is written in FunctionStack FunctionStack model = new FunctionStack( new Linear(1, 5, name: "Linear l1"), new LSTM(5, 5, name: "LSTM l2"), new Linear(5, 1, name: "Linear l3") ); //Declare optimizer model.SetOptimizer(new Adam()); //Training loop Console.WriteLine("Training..."); for (int epoch = 0; epoch < TRAINING_EPOCHS; epoch++) { NdArray[] sequences = dataMaker.MakeMiniBatch(trainData, MINI_BATCH_SIZE, LENGTH_OF_SEQUENCE); Real loss = ComputeLoss(model, sequences); model.Update(); model.ResetState(); if (epoch != 0 && epoch % DISPLAY_EPOCH == 0) { Console.WriteLine("[{0}]training loss:\t{1}", epoch, loss); } } Console.WriteLine("Testing..."); NdArray[] testSequences = dataMaker.MakeMiniBatch(trainData, MINI_BATCH_SIZE, LENGTH_OF_SEQUENCE); int sample_index = 45; predict(testSequences[sample_index], model, PREDICTION_LENGTH); }
private static void RunAsync() { var trainData = new NdArray(new[] { 2, 3, 4 }); for (int i = 0; i < trainData.Data.Length; i++) { trainData.Data[i] = (float)i / trainData.Data.Length; } var functions = new List <Function>(); functions.Add(new Convolution2D(2, 1, 3)); var nn = new FunctionStack(functions.ToArray()); nn.Compress(); var optimizer = new Adam(); nn.SetOptimizer(optimizer); var result = nn.Predict(trainData)[0]; }
public static void Run() { Stopwatch sw = new Stopwatch(); //MNISTのデータを用意する Console.WriteLine("MNIST Data Loading..."); MnistData mnistData = new MnistData(); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack nn = new FunctionStack( new Convolution2D(1, 32, 5, pad: 2, name: "l1 Conv2D", gpuEnable: true), new ReLU(name: "l1 ReLU"), //new AveragePooling(2, 2, name: "l1 AVGPooling"), new MaxPooling2D(2, 2, name: "l1 MaxPooling", gpuEnable: true), new Convolution2D(32, 64, 5, pad: 2, name: "l2 Conv2D", gpuEnable: true), new ReLU(name: "l2 ReLU"), //new AveragePooling(2, 2, name: "l2 AVGPooling"), new MaxPooling2D(2, 2, name: "l2 MaxPooling", gpuEnable: true), new Linear(7 * 7 * 64, 1024, name: "l3 Linear", gpuEnable: true), new ReLU(name: "l3 ReLU"), new Dropout(name: "l3 DropOut"), new Linear(1024, 10, name: "l4 Linear", gpuEnable: true) ); //optimizerを宣言 nn.SetOptimizer(new Adam()); Console.WriteLine("Training Start..."); //三世代学習 for (int epoch = 1; epoch < 3; epoch++) { Console.WriteLine("epoch " + epoch); //全体での誤差を集計 Real totalLoss = 0; long totalLossCount = 0; //何回バッチを実行するか for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { sw.Restart(); Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //訓練データからランダムにデータを取得 TestDataSet datasetX = mnistData.Train.GetRandomDataSet(BATCH_DATA_COUNT); //バッチ学習を並列実行する Real sumLoss = Trainer.Train(nn, datasetX, new SoftmaxCrossEntropy()); totalLoss += sumLoss; totalLossCount++; //結果出力 Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); sw.Stop(); Console.WriteLine("time" + sw.Elapsed.TotalMilliseconds); //20回バッチを動かしたら精度をテストする if (i % 20 == 0) { Console.WriteLine("\nTesting..."); //テストデータからランダムにデータを取得 TestDataSet datasetY = mnistData.Eval.GetRandomDataSet(TEACH_DATA_COUNT); //テストを実行 Real accuracy = Trainer.Accuracy(nn, datasetY); Console.WriteLine("accuracy " + accuracy); } } } }
const int N = 30; //参考先リンクと同様の1000でも動作するがCPUでは遅いので public static void Run() { //MNISTのデータを用意する Console.WriteLine("MNIST Data Loading..."); MnistData mnistData = new MnistData(); Console.WriteLine("Training Start..."); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack nn = new FunctionStack( new Linear(28 * 28, N, name: "l1 Linear"), // L1 new BatchNormalization(N, name: "l1 BatchNorm"), new ReLU(name: "l1 ReLU"), new Linear(N, N, name: "l2 Linear"), // L2 new BatchNormalization(N, name: "l2 BatchNorm"), new ReLU(name: "l2 ReLU"), new Linear(N, N, name: "l3 Linear"), // L3 new BatchNormalization(N, name: "l3 BatchNorm"), new ReLU(name: "l3 ReLU"), new Linear(N, N, name: "l4 Linear"), // L4 new BatchNormalization(N, name: "l4 BatchNorm"), new ReLU(name: "l4 ReLU"), new Linear(N, N, name: "l5 Linear"), // L5 new BatchNormalization(N, name: "l5 BatchNorm"), new ReLU(name: "l5 ReLU"), new Linear(N, N, name: "l6 Linear"), // L6 new BatchNormalization(N, name: "l6 BatchNorm"), new ReLU(name: "l6 ReLU"), new Linear(N, N, name: "l7 Linear"), // L7 new BatchNormalization(N, name: "l7 BatchNorm"), new ReLU(name: "l7 ReLU"), new Linear(N, N, name: "l8 Linear"), // L8 new BatchNormalization(N, name: "l8 BatchNorm"), new ReLU(name: "l8 ReLU"), new Linear(N, N, name: "l9 Linear"), // L9 new BatchNormalization(N, name: "l9 BatchNorm"), new ReLU(name: "l9 ReLU"), new Linear(N, N, name: "l10 Linear"), // L10 new BatchNormalization(N, name: "l10 BatchNorm"), new ReLU(name: "l10 ReLU"), new Linear(N, N, name: "l11 Linear"), // L11 new BatchNormalization(N, name: "l11 BatchNorm"), new ReLU(name: "l11 ReLU"), new Linear(N, N, name: "l12 Linear"), // L12 new BatchNormalization(N, name: "l12 BatchNorm"), new ReLU(name: "l12 ReLU"), new Linear(N, N, name: "l13 Linear"), // L13 new BatchNormalization(N, name: "l13 BatchNorm"), new ReLU(name: "l13 ReLU"), new Linear(N, N, name: "l14 Linear"), // L14 new BatchNormalization(N, name: "l14 BatchNorm"), new ReLU(name: "l14 ReLU"), new Linear(N, 10, name: "l15 Linear") // L15 ); //この構成では学習が進まない //FunctionStack nn = new FunctionStack( // new Linear(28 * 28, N), // L1 // new ReLU(), // new Linear(N, N), // L2 // new ReLU(), // // (中略) // // new Linear(N, N), // L14 // new ReLU(), // new Linear(N, 10) // L15 //); //optimizerを宣言 nn.SetOptimizer(new AdaGrad()); //三世代学習 for (int epoch = 0; epoch < 3; epoch++) { Console.WriteLine("epoch " + (epoch + 1)); //全体での誤差を集計 //List<Real> totalLoss = new List<Real>(); Real totalLoss = 0; long totalLossCounter = 0; //何回バッチを実行するか for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //訓練データからランダムにデータを取得 TestDataSet datasetX = mnistData.Train.GetRandomDataSet(BATCH_DATA_COUNT); //学習を実行 Real sumLoss = Trainer.Train(nn, datasetX, new SoftmaxCrossEntropy()); totalLoss += sumLoss; totalLossCounter++; //20回バッチを動かしたら精度をテストする if (i % 20 == 0) { //結果出力 Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); Console.WriteLine("total loss " + totalLoss / totalLossCounter); Console.WriteLine("local loss " + sumLoss); Console.WriteLine(""); Console.WriteLine("Testing..."); //テストデータからランダムにデータを取得 TestDataSet datasetY = mnistData.Eval.GetRandomDataSet(TEST_DATA_COUNT); //テストを実行 Real accuracy = Trainer.Accuracy(nn, datasetY); Console.WriteLine("accuracy " + accuracy); } } } }
public static void Run() { //訓練回数 const int learningCount = 10000; //訓練データ Real[][] trainData = { new Real[] { 0, 0 }, new Real[] { 1, 0 }, new Real[] { 0, 1 }, new Real[] { 1, 1 } }; //訓練データラベル Real[][] trainLabel = { new Real[] { 0 }, new Real[] { 1 }, new Real[] { 1 }, new Real[] { 0 } }; //ネットワークの構成は FunctionStack に書き連ねる FunctionStack nn = new FunctionStack( new Linear(2, 2, name: "l1 Linear"), new Sigmoid(name: "l1 Sigmoid"), new Linear(2, 2, name: "l2 Linear") ); //optimizerを宣言 nn.SetOptimizer(new MomentumSGD()); //訓練ループ Console.WriteLine("Training..."); for (int i = 0; i < learningCount; i++) { for (int j = 0; j < trainData.Length; j++) { //訓練実行時にロス関数を記述 Trainer.Train(nn, trainData[j], trainLabel[j], new SoftmaxCrossEntropy()); } } //訓練結果を表示 Console.WriteLine("Test Start..."); foreach (Real[] input in trainData) { NdArray result = nn.Predict(input)[0]; int resultIndex = Array.IndexOf(result.Data, result.Data.Max()); Console.WriteLine(input[0] + " xor " + input[1] + " = " + resultIndex + " " + result); } //学習の終わったネットワークを保存 ModelIO.Save(nn, "test.nn"); //学習の終わったネットワークを読み込み Function testnn = ModelIO.Load("test.nn"); Console.WriteLine("Test Start..."); foreach (Real[] input in trainData) { NdArray result = testnn.Predict(input)[0]; int resultIndex = Array.IndexOf(result.Data, result.Data.Max()); Console.WriteLine(input[0] + " xor " + input[1] + " = " + resultIndex + " " + result); } }
public static void Run(bool isCifar100 = false, bool isFineLabel = false) { Stopwatch sw = new Stopwatch(); //CIFARのデータを用意する Console.WriteLine("CIFAR Data Loading..."); CifarData cifarData = new CifarData(isCifar100, isFineLabel); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack nn = new FunctionStack( new Convolution2D(3, 32, 3, name: "l1 Conv2D", gpuEnable: true), new BatchNormalization(32, name: "l1 BatchNorm"), new ReLU(name: "l1 ReLU"), new MaxPooling2D(2, name: "l1 MaxPooling", gpuEnable: true), new Convolution2D(32, 64, 3, name: "l2 Conv2D", gpuEnable: true), new BatchNormalization(64, name: "l1 BatchNorm"), new ReLU(name: "l2 ReLU"), new MaxPooling2D(2, 2, name: "l2 MaxPooling", gpuEnable: true), new Linear(14 * 14 * 64, 512, name: "l3 Linear", gpuEnable: true), new ReLU(name: "l3 ReLU"), //Cifar100のときは100クラス、簡素であれば20クラス、Cifar10のときは10クラス分類 new Linear(512, cifarData.ClassCount, name: "l4 Linear", gpuEnable: true) ); //optimizerを宣言 nn.SetOptimizer(new Adam()); Console.WriteLine("Training Start..."); //三世代学習 for (int epoch = 1; epoch < 3; epoch++) { Console.WriteLine("epoch " + epoch); //全体での誤差を集計 Real totalLoss = 0; long totalLossCount = 0; //何回バッチを実行するか for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { sw.Restart(); Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //訓練データからランダムにデータを取得 TestDataSet datasetX = cifarData.Train.GetRandomDataSet(BATCH_DATA_COUNT); //バッチ学習を並列実行する Real sumLoss = Trainer.Train(nn, datasetX, new SoftmaxCrossEntropy()); totalLoss += sumLoss; totalLossCount++; //結果出力 Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); sw.Stop(); Console.WriteLine("time" + sw.Elapsed.TotalMilliseconds); //20回バッチを動かしたら精度をテストする if (i % 20 == 0) { Console.WriteLine("\nTesting..."); //テストデータからランダムにデータを取得 TestDataSet datasetY = cifarData.Eval.GetRandomDataSet(TEACH_DATA_COUNT); //テストを実行 Real accuracy = Trainer.Accuracy(nn, datasetY); Console.WriteLine("accuracy " + accuracy); } } } }
public static void Run() { _outputStream = File.Create(LogPath); _logWriter = new HistogramLogWriter(_outputStream); _logWriter.Write(DateTime.Now); var recorder = HistogramFactory .With64BitBucketSize() ?.WithValuesFrom(1) ?.WithValuesUpTo(2345678912345) ?.WithPrecisionOf(3) ?.WithThreadSafeWrites() ?.WithThreadSafeReads() ?.Create(); var accumulatingHistogram = new LongHistogram(2345678912345, 3); var size = accumulatingHistogram.GetEstimatedFootprintInBytes(); RILogManager.Default?.SendDebug("Histogram size = {0} bytes ({1:F2} MB)", size, size / 1024.0 / 1024.0); RILogManager.Default?.SendDebug("Recorded latencies [in system clock ticks]"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.None, useCsvFormat: true); Console.WriteLine(); RILogManager.Default?.SendDebug("Recorded latencies [in usec]"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToMicroseconds, useCsvFormat: true); Console.WriteLine(); RILogManager.Default?.SendDebug("Recorded latencies [in msec]"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToMilliseconds, useCsvFormat: true); Console.WriteLine(); RILogManager.Default?.SendDebug("Recorded latencies [in sec]"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToSeconds, useCsvFormat: true); DocumentResults(accumulatingHistogram, recorder); RILogManager.Default?.SendDebug("Build Vocabulary."); DocumentResults(accumulatingHistogram, recorder); Vocabulary vocabulary = new Vocabulary(); DocumentResults(accumulatingHistogram, recorder); string trainPath = InternetFileDownloader.Download(DOWNLOAD_URL + TRAIN_FILE, TRAIN_FILE); DocumentResults(accumulatingHistogram, recorder); string validPath = InternetFileDownloader.Download(DOWNLOAD_URL + VALID_FILE, VALID_FILE); DocumentResults(accumulatingHistogram, recorder); string testPath = InternetFileDownloader.Download(DOWNLOAD_URL + TEST_FILE, TEST_FILE); DocumentResults(accumulatingHistogram, recorder); int[] trainData = vocabulary.LoadData(trainPath); DocumentResults(accumulatingHistogram, recorder); int[] validData = vocabulary.LoadData(validPath); DocumentResults(accumulatingHistogram, recorder); int[] testData = vocabulary.LoadData(testPath); DocumentResults(accumulatingHistogram, recorder); int nVocab = vocabulary.Length; RILogManager.Default?.SendDebug("Network Initializing."); FunctionStack model = new FunctionStack("Test10", new EmbedID(nVocab, N_UNITS, name: "l1 EmbedID"), new Dropout(), new LSTM(true, N_UNITS, N_UNITS, name: "l2 LSTM"), new Dropout(), new LSTM(true, N_UNITS, N_UNITS, name: "l3 LSTM"), new Dropout(), new Linear(true, N_UNITS, nVocab, name: "l4 Linear") ); DocumentResults(accumulatingHistogram, recorder); // Do not cease at the given threshold, correct the rate by taking the rate from L2Norm of all parameters GradientClipping gradientClipping = new GradientClipping(threshold: GRAD_CLIP); SGD sgd = new SGD(learningRate: 1); model.SetOptimizer(gradientClipping, sgd); DocumentResults(accumulatingHistogram, recorder); Real wholeLen = trainData.Length; int jump = (int)Math.Floor(wholeLen / BATCH_SIZE); int epoch = 0; Stack <NdArray[]> backNdArrays = new Stack <NdArray[]>(); RILogManager.Default?.SendDebug("Train Start."); double dVal; NdArray x = new NdArray(new[] { 1 }, BATCH_SIZE, (Function)null); NdArray t = new NdArray(new[] { 1 }, BATCH_SIZE, (Function)null); for (int i = 0; i < jump * N_EPOCH; i++) { for (int j = 0; j < BATCH_SIZE; j++) { x.Data[j] = trainData[(int)((jump * j + i) % wholeLen)]; t.Data[j] = trainData[(int)((jump * j + i + 1) % wholeLen)]; } NdArray[] result = model.Forward(true, x); Real sumLoss = new SoftmaxCrossEntropy().Evaluate(result, t); backNdArrays.Push(result); RILogManager.Default?.SendDebug("[{0}/{1}] Loss: {2}", i + 1, jump, sumLoss); //Run truncated BPTT if ((i + 1) % BPROP_LEN == 0) { for (int j = 0; backNdArrays.Count > 0; j++) { RILogManager.Default?.SendDebug("backward" + backNdArrays.Count); model.Backward(true, backNdArrays.Pop()); } model.Update(); model.ResetState(); } if ((i + 1) % jump == 0) { epoch++; RILogManager.Default?.SendDebug("evaluate"); dVal = Evaluate(model, validData); RILogManager.Default?.SendDebug($"validation perplexity: {dVal}"); if (epoch >= 6) { sgd.LearningRate /= 1.2; RILogManager.Default?.SendDebug("learning rate =" + sgd.LearningRate); } } DocumentResults(accumulatingHistogram, recorder); } RILogManager.Default?.SendDebug("test start"); dVal = Evaluate(model, testData); RILogManager.Default?.SendDebug("test perplexity:" + dVal); DocumentResults(accumulatingHistogram, recorder); _logWriter.Dispose(); _outputStream.Dispose(); RILogManager.Default?.SendDebug("Log contents"); RILogManager.Default?.SendDebug(File.ReadAllText(LogPath)); Console.WriteLine(); RILogManager.Default?.SendDebug("Percentile distribution (values reported in milliseconds)"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToMilliseconds, useCsvFormat: true); RILogManager.Default?.SendDebug("Mean: " + BytesToString(accumulatingHistogram.GetMean()) + ", StdDev: " + BytesToString(accumulatingHistogram.GetStdDeviation())); }
public static void Run() { const int learningCount = 10000; Real[][] trainData = { new Real[] { 0, 0 }, new Real[] { 1, 0 }, new Real[] { 0, 1 }, new Real[] { 1, 1 } }; //Training data label Real[][] trainLabel = { new Real[] { 0 }, new Real[] { 1 }, new Real[] { 1 }, new Real[] { 0 } }; //Network configuration is written in FunctionStack FunctionStack nn = new FunctionStack( new Linear(2, 2, name: "l1 Linear"), new Sigmoid(name: "l1 Sigmoid"), new Linear(2, 2, name: "l2 Linear") ); //optimizer nn.SetOptimizer(new MomentumSGD()); Console.WriteLine("Training..."); for (int i = 0; i < learningCount; i++) { for (int j = 0; j < trainData.Length; j++) { //Describe the loss function at training execution Trainer.Train(nn, trainData[j], trainLabel[j], new SoftmaxCrossEntropy()); } } //Show training results Console.WriteLine("Test Start..."); foreach (Real[] input in trainData) { NdArray result = nn.Predict(input)[0]; int resultIndex = Array.IndexOf(result.Data, result.Data.Max()); Console.WriteLine(input[0] + " xor " + input[1] + " = " + resultIndex + " " + result); } //Save network after learning ModelIO.Save(nn, "test.nn"); //Load the network after learning FunctionStack testnn = ModelIO.Load("test.nn"); Console.WriteLine("Test Start..."); foreach (Real[] input in trainData) { NdArray result = testnn.Predict(input)[0]; int resultIndex = Array.IndexOf(result.Data, result.Data.Max()); Console.WriteLine(input[0] + " xor " + input[1] + " = " + resultIndex + " " + result); } }
public static void Run() { // Write the configuration of the network you want to read into FunctionStack and adjust the parameters of each function // Make sure to match name to the variable name of Chainer here FunctionStack nn = new FunctionStack("Test16", new Convolution2D(true, 1, 2, 3, name: "conv1", gpuEnable: true),// Do not forget the GPU flag if necessary new ReLU(), new MaxPooling(2, 2), new Convolution2D(true, 2, 2, 2, name: "conv2", gpuEnable: true), new ReLU(), new MaxPooling(2, 2), new Linear(true, 8, 2, name: "fl3"), new ReLU(), new Linear(true, 2, 2, name: "fl4") ); /* Chainerでの宣言 * class NN(chainer.Chain): * def __init__(self): * super(NN, self).__init__( * conv1 = L.Convolution2D(1,2,3), * conv2 = L.Convolution2D(2,2,2), * fl3 = L.Linear(8,2), * fl4 = L.Linear(2,2) * ) * * def __call__(self, x): * h_conv1 = F.relu(self.conv1(x)) * h_pool1 = F.max_pooling_2d(h_conv1, 2) * h_conv2 = F.relu(self.conv2(h_pool1)) * h_pool2 = F.max_pooling_2d(h_conv2, 2) * h_fc1 = F.relu(self.fl3(h_pool2)) * y = self.fl4(h_fc1) * return y */ // Read parameters ChainerModelDataLoader.ModelLoad(MODEL_FILE_PATH, nn); // We will use the rest as usual nn.SetOptimizer(new SGD()); NdArray x = new NdArray(new Real[, , ] { { { 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.9, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.2, 0.8, 0.9, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.1, 0.8, 0.5, 0.8, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.3, 0.3, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.1, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.8, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.4, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.2, 0.8, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.1, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 } } }); Real[] t = { 0.0, 1.0 }; Trainer.Train(nn, x, t, new MeanSquaredError(), false); Convolution2D l2 = (Convolution2D)nn.Functions[0]; RILogManager.Default?.SendDebug("gw1"); RILogManager.Default?.SendDebug(l2.Weight.ToString("Grad")); RILogManager.Default?.SendDebug("gb1"); RILogManager.Default?.SendDebug(l2.Bias.ToString("Grad")); // If Update is executed, grad is consumed, so output the value first nn.Update(); RILogManager.Default?.SendDebug("w1"); RILogManager.Default?.SendDebug(l2.Weight.ToString()); RILogManager.Default?.SendDebug("b1"); RILogManager.Default?.SendDebug(l2.Bias.ToString()); }
public static void Run() { //各初期値を記述 Real[,,,] initial_W1 = { { { { 1.0, 0.5, 0.0 }, { 0.5, 0.0, -0.5 }, { 0.0, -0.5, -1.0 } } }, { { { 0.0, -0.1, 0.1 }, { -0.3, 0.4, 0.7 }, { 0.5, -0.2, 0.2 } } } }; Real[] initial_b1 = { 0.5, 1.0 }; Real[,,,] initial_W2 = { { { { -0.1, 0.6 }, { 0.3, -0.9 } }, { { 0.7, 0.9 }, { -0.2, -0.3 } } }, { { { -0.6, -0.1 }, { 0.3, 0.3 } }, { { -0.5, 0.8 }, { 0.9, 0.1 } } } }; Real[] initial_b2 = { 0.1, 0.9 }; Real[,] initial_W3 = { { 0.5, 0.3, 0.4, 0.2, 0.6, 0.1, 0.4, 0.3 }, { 0.6, 0.4, 0.9, 0.1, 0.5, 0.2, 0.3, 0.4 } }; Real[] initial_b3 = { 0.01, 0.02 }; Real[,] initial_W4 = { { 0.8, 0.2 }, { 0.4, 0.6 } }; Real[] initial_b4 = { 0.02, 0.01 }; //入力データ NdArray x = new NdArray(new Real[, , ] { { { 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.9, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.2, 0.8, 0.9, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.1, 0.8, 0.5, 0.8, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.3, 0.3, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.1, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.8, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.4, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.2, 0.8, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.1, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 } } }); //教師信号 Real[] t = { 0.0, 1.0 }; //層の中身をチェックしたい場合は、層単体でインスタンスを持つ Convolution2D l2 = new Convolution2D(1, 2, 3, initialW: initial_W1, initialb: initial_b1, name: "l2 Conv2D"); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack nn = new FunctionStack( l2, //new Convolution2D(1, 2, 3, initialW: initial_W1, initialb: initial_b1), new ReLU(name: "l2 ReLU"), //new AveragePooling(2, 2, name: "l2 AVGPooling"), new MaxPooling2D(2, 2, name: "l2 MaxPooling"), new Convolution2D(2, 2, 2, initialW: initial_W2, initialb: initial_b2, name: "l3 Conv2D"), new ReLU(name: "l3 ReLU"), //new AveragePooling(2, 2, name: "l3 AVGPooling"), new MaxPooling2D(2, 2, name: "l3 MaxPooling"), new Linear(8, 2, initialW: initial_W3, initialb: initial_b3, name: "l4 Linear"), new ReLU(name: "l4 ReLU"), new Linear(2, 2, initialW: initial_W4, initialb: initial_b4, name: "l5 Linear") ); nn.SetOptimizer(new SGD(0.1)); //訓練を実施 Trainer.Train(nn, x, t, new MeanSquaredError(), false); //Updateを実行するとgradが消費されてしまうため値を先に出力 Console.WriteLine("gw1"); Console.WriteLine(l2.Weight.ToString("Grad")); Console.WriteLine("gb1"); Console.WriteLine(l2.Bias.ToString("Grad")); //更新 nn.Update(); Console.WriteLine("w1"); Console.WriteLine(l2.Weight); Console.WriteLine("b1"); Console.WriteLine(l2.Bias); }
public static void Run() { Stopwatch sw = new Stopwatch(); //Prepare MNIST data Console.WriteLine("MNIST Data Loading..."); MnistData mnistData = new MnistData(); //Writing the network configuration in FunctionStack FunctionStack nn = new FunctionStack( new Convolution2D(1, 32, 5, pad: 2, name: "l1 Conv2D", gpuEnable: true), new ReLU(name: "l1 ReLU"), //new AveragePooling (2, 2, name: "l1 AVGPooling"), new MaxPooling(2, 2, name: "l1 MaxPooling", gpuEnable: true), new Convolution2D(32, 64, 5, pad: 2, name: "l2 Conv2D", gpuEnable: true), new ReLU(name: "l2 ReLU"), //new AveragePooling (2, 2, name: "l2 AVGPooling"), new MaxPooling(2, 2, name: "l2 MaxPooling", gpuEnable: true), new Linear(13 * 13 * 64, 1024, name: "l3 Linear", gpuEnable: true), new ReLU(name: "l3 ReLU"), new Dropout(name: "l3 DropOut"), new Linear(1024, 10, name: "l4 Linear", gpuEnable: true) ); //Declare optimizer nn.SetOptimizer(new Adam()); Console.WriteLine("Training Start..."); //Three generations learning for (int epoch = 1; epoch < 3; epoch++) { Console.WriteLine("epoch " + epoch); //Total error in the whole Real totalLoss = 0; long totalLossCount = 0; //How many times to run the batch for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { sw.Restart(); Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //Get data randomly from training data TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT); //Execute batch learning in parallel Real sumLoss = Trainer.Train(nn, datasetX.Data, datasetX.Label, new SoftmaxCrossEntropy()); totalLoss += sumLoss; totalLossCount++; //Result output Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); sw.Stop(); Console.WriteLine("time" + sw.Elapsed.TotalMilliseconds); //Test the accuracy if you move the batch 20 times if (i % 20 == 0) { Console.WriteLine("\nTesting..."); //Get data randomly from test data TestDataSet datasetY = mnistData.GetRandomYSet(TEACH_DATA_COUNT); //Run test Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); Console.WriteLine("accuracy " + accuracy); } } } }
public static void Run() { //Describe each initial value Real[,,,] initial_W1 = { { { { 1.0, 0.5, 0.0 }, { 0.5, 0.0, -0.5 }, { 0.0, -0.5, -1.0 } } }, { { { 0.0, -0.1, 0.1 }, { -0.3, 0.4, 0.7 }, { 0.5, -0.2, 0.2 } } } }; Real[] initial_b1 = { 0.5, 1.0 }; Real[,,,] initial_W2 = { { { { -0.1, 0.6 }, { 0.3, -0.9 } }, { { 0.7, 0.9 }, { -0.2, -0.3 } } }, { { { -0.6, -0.1 }, { 0.3, 0.3 } }, { { -0.5, 0.8 }, { 0.9, 0.1 } } } }; Real[] initial_b2 = { 0.1, 0.9 }; Real[,] initial_W3 = { { 0.5, 0.3, 0.4, 0.2, 0.6, 0.1, 0.4, 0.3 }, { 0.6, 0.4, 0.9, 0.1, 0.5, 0.2, 0.3, 0.4 } }; Real[] initial_b3 = { 0.01, 0.02 }; Real[,] initial_W4 = { { 0.8, 0.2 }, { 0.4, 0.6 } }; Real[] initial_b4 = { 0.02, 0.01 }; //Input data NdArray x = new NdArray(new Real[, , ] { { { 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.9, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.2, 0.8, 0.9, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.1, 0.8, 0.5, 0.8, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.3, 0.3, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.1, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.8, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.4, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.2, 0.8, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.1, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 } } }); //Teacher signal Real[] t = { 0.0, 1.0 }; //If you want to check the contents of a layer, have an instance as a single layer Convolution2D l2 = new Convolution2D(1, 2, 3, initialW: initial_W1, initialb: initial_b1, name: "l2 Conv2D"); //Writing the network configuration in FunctionStack FunctionStack nn = new FunctionStack( l2, //new Convolution 2 D (1, 2, 3, initial W: initial W 1, initial b: initial _ b 1), new ReLU(name: "l2 ReLU"), //new AveragePooling (2, 2, name: "l2 AVGPooling"), new MaxPooling(2, 2, name: "l2 MaxPooling"), new Convolution2D(2, 2, 2, initialW: initial_W2, initialb: initial_b2, name: "l3 Conv2D"), new ReLU(name: "l3 ReLU"), //new AveragePooling (2, 2, name: "l3 AVGPooling"), new MaxPooling(2, 2, name: "l3 MaxPooling"), new Linear(8, 2, initialW: initial_W3, initialb: initial_b3, name: "l4 Linear"), new ReLU(name: "l4 ReLU"), new Linear(2, 2, initialW: initial_W4, initialb: initial_b4, name: "l5 Linear") ); //If you omit the optimizer declaration, the default SGD (0.1) is used nn.SetOptimizer(new SGD()); //Training conducted Trainer.Train(nn, x, t, new MeanSquaredError(), false); //When updating is executed grad will be consumed, so output the value first Console.WriteLine("gw1"); Console.WriteLine(l2.Weight.ToString("Grad")); Console.WriteLine("gb1"); Console.WriteLine(l2.Bias.ToString("Grad")); //update nn.Update(); Console.WriteLine("w1"); Console.WriteLine(l2.Weight); Console.WriteLine("b1"); Console.WriteLine(l2.Bias); }
// MNIST accuracy tester public static void Run(double accuracyThreshold = .9979D) { MnistData mnistData = new MnistData(28); Real maxAccuracy = 0; //Number of middle layers const int N = 30; //It operates at 1000 similar to the reference link but it is slow at the CPU ReflectInsight ri = new ReflectInsight("Test21"); ri.Enabled = true; RILogManager.Add("Test21", "Test21"); RILogManager.SetDefault("Test21"); //FunctionStack nn = new FunctionStack("Test21", // new Linear(28 * 28, 1024, name: "l1 Linear"), // new Sigmoid(name: "l1 Sigmoid"), // new Linear(1024, 10, name: "l2 Linear") //); //nn.SetOptimizer(new MomentumSGD()); FunctionStack nn = new FunctionStack("Test7", new Linear(true, 28 * 28, N, name: "l1 Linear"), // L1 new BatchNormalization(true, N, name: "l1 BatchNorm"), new ReLU(name: "l1 ReLU"), new Linear(true, N, N, name: "l2 Linear"), // L2 new BatchNormalization(true, N, name: "l2 BatchNorm"), new ReLU(name: "l2 ReLU"), new Linear(true, N, N, name: "l3 Linear"), // L3 new BatchNormalization(true, N, name: "l3 BatchNorm"), new ReLU(name: "l3 ReLU"), new Linear(true, N, N, name: "l4 Linear"), // L4 new BatchNormalization(true, N, name: "l4 BatchNorm"), new ReLU(name: "l4 ReLU"), new Linear(true, N, N, name: "l5 Linear"), // L5 new BatchNormalization(true, N, name: "l5 BatchNorm"), new ReLU(name: "l5 ReLU"), new Linear(true, N, N, name: "l6 Linear"), // L6 new BatchNormalization(true, N, name: "l6 BatchNorm"), new ReLU(name: "l6 ReLU"), new Linear(true, N, N, name: "l7 Linear"), // L7 new BatchNormalization(true, N, name: "l7 BatchNorm"), new ReLU(name: "l7 ReLU"), new Linear(true, N, N, name: "l8 Linear"), // L8 new BatchNormalization(true, N, name: "l8 BatchNorm"), new ReLU(name: "l8 ReLU"), new Linear(true, N, N, name: "l9 Linear"), // L9 new BatchNormalization(true, N, name: "l9 BatchNorm"), new ReLU(name: "l9 ReLU"), new Linear(true, N, N, name: "l10 Linear"), // L10 new BatchNormalization(true, N, name: "l10 BatchNorm"), new ReLU(name: "l10 ReLU"), new Linear(true, N, N, name: "l11 Linear"), // L11 new BatchNormalization(true, N, name: "l11 BatchNorm"), new ReLU(name: "l11 ReLU"), new Linear(true, N, N, name: "l12 Linear"), // L12 new BatchNormalization(true, N, name: "l12 BatchNorm"), new ReLU(name: "l12 ReLU"), new Linear(true, N, N, name: "l13 Linear"), // L13 new BatchNormalization(true, N, name: "l13 BatchNorm"), new ReLU(name: "l13 ReLU"), new Linear(true, N, N, name: "l14 Linear"), // L14 new BatchNormalization(true, N, name: "l14 BatchNorm"), new ReLU(name: "l14 ReLU"), new Linear(true, N, 10, name: "l15 Linear") // L15 ); // 0.0005 - 97.5, 0.001, 0.00146 double alpha = 0.001; double beta1 = 0.9D; double beta2 = 0.999D; double epsilon = 1e-8; nn.SetOptimizer(new Adam("Adam21", alpha, beta1, beta2, epsilon)); Stopwatch sw = new Stopwatch(); sw.Start(); for (int epoch = 0; epoch < 3; epoch++) { Real totalLoss = 0; long totalLossCount = 0; for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT, 28, 28); Real sumLoss = Trainer.Train(nn, datasetX.Data, datasetX.Label, new SoftmaxCrossEntropy()); totalLoss = sumLoss; totalLossCount++; if (i % 20 == 0) { TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT, 28); Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label, false); if (accuracy > maxAccuracy) { maxAccuracy = accuracy; } Passed = (accuracy >= accuracyThreshold); sw.Stop(); ri.ViewerSendWatch("Iteration", "epoch " + (epoch + 1) + " of 3, batch " + i + " of " + TRAIN_DATA_COUNT); ri.ViewerSendWatch("Max Accuracy", maxAccuracy * 100 + "%"); ri.ViewerSendWatch("Current Accuracy", accuracy * 100 + "%"); ri.ViewerSendWatch("Total Loss ", totalLoss / totalLossCount); ri.ViewerSendWatch("Elapsed Time", Helpers.FormatTimeSpan(sw.Elapsed)); ri.ViewerSendWatch("Accuracy Threshold", Passed ? "Passed" : "Not Passed"); sw.Start(); } } sw.Stop(); ri.SendInformation("Total Processing Time: " + Helpers.FormatTimeSpan(sw.Elapsed)); } }
public static void Run() { //Write the configuration of the network you want to read into FunctionStack and adjust the parameters of each function //Make sure to match name to the variable name of Chainer FunctionStack nn = new FunctionStack( new Convolution2D(1, 2, 3, name: "conv1", gpuEnable: true),//Do not forget the GPU flag if necessary new ReLU(), new MaxPooling(2, 2), new Convolution2D(2, 2, 2, name: "conv2", gpuEnable: true), new ReLU(), new MaxPooling(2, 2), new Linear(8, 2, name: "fl3"), new ReLU(), new Linear(2, 2, name: "fl4") ); /* Declaration in Chainer * class NN (chainer.Chain): * def __init __ (self): * super (NN, self).__ init __ ( * conv 1 = L. Convolution 2 D (1, 2, 3), * conv 2 = L. Convolution 2 D (2, 2, 2), * fl3 = L. Linear (8, 2), * fl4 = L. Linear (2, 2) * ) * * def __call __ (self, x): * h_conv 1 = F.relu (self.conv 1 (x)) * h_pool 1 = F.max_pooling - 2 d (h_conv 1, 2) * h_conv 2 = F.relu (self.conv 2 (h_pool 1)) * h_pool 2 = F.max_pooling - 2 d (h_conv 2, 2) * h_fc1 = F.relu (self.fl3 (h_pool2)) * y = self.fl 4 (h_fc 1) * return y * */ //Read parameters ChainerModelDataLoader.ModelLoad(MODEL_FILE_PATH, nn); //Use it as usual nn.SetOptimizer(new SGD()); //Input data NdArray x = new NdArray(new Real[, , ] { { { 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.9, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.2, 0.8, 0.9, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.1, 0.8, 0.5, 0.8, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.3, 0.3, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.1, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.8, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.4, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.2, 0.8, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.1, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 } } }); //Teacher signal Real[] t = { 0.0, 1.0 }; //Training conducted Trainer.Train(nn, x, t, new MeanSquaredError(), false); //Evacuate for results display Convolution2D l2 = (Convolution2D)nn.Functions[0]; //When updating is executed grad will be consumed, so output the value first Console.WriteLine("gw1"); Console.WriteLine(l2.Weight.ToString("Grad")); Console.WriteLine("gb1"); Console.WriteLine(l2.Bias.ToString("Grad")); //update nn.Update(); Console.WriteLine("w1"); Console.WriteLine(l2.Weight); Console.WriteLine("b1"); Console.WriteLine(l2.Bias); }
public static void Run() { Console.WriteLine("Build Vocabulary."); Vocabulary vocabulary = new Vocabulary(); string trainPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TRAIN_FILE, TRAIN_FILE); string validPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + VALID_FILE, VALID_FILE); string testPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TEST_FILE, TEST_FILE); int[] trainData = vocabulary.LoadData(trainPath); int[] validData = vocabulary.LoadData(validPath); int[] testData = vocabulary.LoadData(testPath); int nVocab = vocabulary.Length; Console.WriteLine("Network Initilizing."); FunctionStack model = new FunctionStack( new EmbedID(nVocab, N_UNITS, name: "l1 EmbedID"), new Dropout(), new LSTM(N_UNITS, N_UNITS, name: "l2 LSTM"), new Dropout(), new LSTM(N_UNITS, N_UNITS, name: "l3 LSTM"), new Dropout(), new Linear(N_UNITS, nVocab, name: "l4 Linear") ); //与えられたthresholdで頭打ちではなく、全パラメータのL2Normからレートを取り補正を行う GradientClipping gradientClipping = new GradientClipping(threshold: GRAD_CLIP); SGD sgd = new SGD(learningRate: 1); model.SetOptimizer(gradientClipping, sgd); Real wholeLen = trainData.Length; int jump = (int)Math.Floor(wholeLen / BATCH_SIZE); int epoch = 0; Stack <NdArray[]> backNdArrays = new Stack <NdArray[]>(); Console.WriteLine("Train Start."); for (int i = 0; i < jump * N_EPOCH; i++) { NdArray x = new NdArray(new[] { 1 }, BATCH_SIZE); NdArray t = new NdArray(new[] { 1 }, BATCH_SIZE); for (int j = 0; j < BATCH_SIZE; j++) { x.Data[j] = trainData[(int)((jump * j + i) % wholeLen)]; t.Data[j] = trainData[(int)((jump * j + i + 1) % wholeLen)]; } NdArray[] result = model.Forward(x); Real sumLoss = new SoftmaxCrossEntropy().Evaluate(result, t); backNdArrays.Push(result); Console.WriteLine("[{0}/{1}] Loss: {2}", i + 1, jump, sumLoss); //Run truncated BPTT if ((i + 1) % BPROP_LEN == 0) { for (int j = 0; backNdArrays.Count > 0; j++) { Console.WriteLine("backward" + backNdArrays.Count); model.Backward(backNdArrays.Pop()); } model.Update(); model.ResetState(); } if ((i + 1) % jump == 0) { epoch++; Console.WriteLine("evaluate"); Console.WriteLine("validation perplexity: {0}", Evaluate(model, validData)); if (epoch >= 6) { sgd.LearningRate /= 1.2; Console.WriteLine("learning rate =" + sgd.LearningRate); } } } Console.WriteLine("test start"); Console.WriteLine("test perplexity:" + Evaluate(model, testData)); }
public static void Run() { //MNISTのデータを用意する Console.WriteLine("MNIST Data Loading..."); MnistData mnistData = new MnistData(); Console.WriteLine("Training Start..."); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack Layer1 = new FunctionStack( new Linear(28 * 28, 256, name: "l1 Linear"), new BatchNormalization(256, name: "l1 Norm"), new ReLU(name: "l1 ReLU") ); FunctionStack Layer2 = new FunctionStack( new Linear(256, 256, name: "l2 Linear"), new BatchNormalization(256, name: "l2 Norm"), new ReLU(name: "l2 ReLU") ); FunctionStack Layer3 = new FunctionStack( new Linear(256, 256, name: "l3 Linear"), new BatchNormalization(256, name: "l3 Norm"), new ReLU(name: "l3 ReLU") ); FunctionStack Layer4 = new FunctionStack( new Linear(256, 10, name: "l4 Linear") ); //FunctionStack自身もFunctionとして積み上げられる FunctionStack nn = new FunctionStack ( Layer1, Layer2, Layer3, Layer4 ); FunctionStack DNI1 = new FunctionStack( new Linear(256, 1024, name: "DNI1 Linear1"), new BatchNormalization(1024, name: "DNI1 Nrom1"), new ReLU(name: "DNI1 ReLU1"), new Linear(1024, 1024, name: "DNI1 Linear2"), new BatchNormalization(1024, name: "DNI1 Nrom2"), new ReLU(name: "DNI1 ReLU2"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack DNI2 = new FunctionStack( new Linear(256, 1024, name: "DNI2 Linear1"), new BatchNormalization(1024, name: "DNI2 Nrom1"), new ReLU(name: "DNI2 ReLU1"), new Linear(1024, 1024, name: "DNI2 Linear2"), new BatchNormalization(1024, name: "DNI2 Nrom2"), new ReLU(name: "DNI2 ReLU2"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI2 Linear3") ); FunctionStack DNI3 = new FunctionStack( new Linear(256, 1024, name: "DNI3 Linear1"), new BatchNormalization(1024, name: "DNI3 Nrom1"), new ReLU(name: "DNI3 ReLU1"), new Linear(1024, 1024, name: "DNI3 Linear2"), new BatchNormalization(1024, name: "DNI3 Nrom2"), new ReLU(name: "DNI3 ReLU2"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI3 Linear3") ); //optimizerを宣言 Layer1.SetOptimizer(new Adam()); Layer2.SetOptimizer(new Adam()); Layer3.SetOptimizer(new Adam()); Layer4.SetOptimizer(new Adam()); DNI1.SetOptimizer(new Adam()); DNI2.SetOptimizer(new Adam()); DNI3.SetOptimizer(new Adam()); //三世代学習 for (int epoch = 0; epoch < 20; epoch++) { Console.WriteLine("epoch " + (epoch + 1)); Real totalLoss = 0; Real DNI1totalLoss = 0; Real DNI2totalLoss = 0; Real DNI3totalLoss = 0; long totalLossCount = 0; long DNI1totalLossCount = 0; long DNI2totalLossCount = 0; long DNI3totalLossCount = 0; //何回バッチを実行するか for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //訓練データからランダムにデータを取得 TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT); //第一層を実行 NdArray[] layer1ForwardResult = Layer1.Forward(datasetX.Data); //第一層の傾きを取得 NdArray[] DNI1Result = DNI1.Forward(layer1ForwardResult); //第一層の傾きを適用 layer1ForwardResult[0].Grad = DNI1Result[0].Data.ToArray(); //第一層を更新 Layer1.Backward(layer1ForwardResult); layer1ForwardResult[0].ParentFunc = null; //Backwardを実行したので計算グラフを切っておく Layer1.Update(); //第二層を実行 NdArray[] layer2ForwardResult = Layer2.Forward(layer1ForwardResult); //第二層の傾きを取得 NdArray[] DNI2Result = DNI2.Forward(layer2ForwardResult); //第二層の傾きを適用 layer2ForwardResult[0].Grad = DNI2Result[0].Data.ToArray(); //第二層を更新 Layer2.Backward(layer2ForwardResult); layer2ForwardResult[0].ParentFunc = null; //第一層用のDNIの学習を実行 Real DNI1loss = new MeanSquaredError().Evaluate(DNI1Result, new NdArray(layer1ForwardResult[0].Grad, DNI1Result[0].Shape, DNI1Result[0].BatchCount)); Layer2.Update(); DNI1.Backward(DNI1Result); DNI1.Update(); DNI1totalLoss += DNI1loss; DNI1totalLossCount++; //第三層を実行 NdArray[] layer3ForwardResult = Layer3.Forward(layer2ForwardResult); //第三層の傾きを取得 NdArray[] DNI3Result = DNI3.Forward(layer3ForwardResult); //第三層の傾きを適用 layer3ForwardResult[0].Grad = DNI3Result[0].Data.ToArray(); //第三層を更新 Layer3.Backward(layer3ForwardResult); layer3ForwardResult[0].ParentFunc = null; //第二層用のDNIの学習を実行 Real DNI2loss = new MeanSquaredError().Evaluate(DNI2Result, new NdArray(layer2ForwardResult[0].Grad, DNI2Result[0].Shape, DNI2Result[0].BatchCount)); Layer3.Update(); DNI2.Backward(DNI2Result); DNI2.Update(); DNI2totalLoss += DNI2loss; DNI2totalLossCount++; //第四層を実行 NdArray[] layer4ForwardResult = Layer4.Forward(layer3ForwardResult); //第四層の傾きを取得 Real sumLoss = new SoftmaxCrossEntropy().Evaluate(layer4ForwardResult, datasetX.Label); //第四層を更新 Layer4.Backward(layer4ForwardResult); layer4ForwardResult[0].ParentFunc = null; totalLoss += sumLoss; totalLossCount++; //第三層用のDNIの学習を実行 Real DNI3loss = new MeanSquaredError().Evaluate(DNI3Result, new NdArray(layer3ForwardResult[0].Grad, DNI3Result[0].Shape, DNI3Result[0].BatchCount)); Layer4.Update(); DNI3.Backward(DNI3Result); DNI3.Update(); DNI3totalLoss += DNI3loss; DNI3totalLossCount++; Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //結果出力 Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); Console.WriteLine("\nDNI1 total loss " + DNI1totalLoss / DNI1totalLossCount); Console.WriteLine("DNI2 total loss " + DNI2totalLoss / DNI2totalLossCount); Console.WriteLine("DNI3 total loss " + DNI3totalLoss / DNI3totalLossCount); Console.WriteLine("\nDNI1 local loss " + DNI1loss); Console.WriteLine("DNI2 local loss " + DNI2loss); Console.WriteLine("DNI3 local loss " + DNI3loss); //20回バッチを動かしたら精度をテストする if (i % 20 == 0) { Console.WriteLine("\nTesting..."); //テストデータからランダムにデータを取得 TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT); //テストを実行 Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); Console.WriteLine("accuracy " + accuracy); } } } }
public static void Main(string[] args) { //Cifar-10のデータを用意する Console.WriteLine("CIFAR Data Loading..."); CifarData cifarData = new CifarData(); //platformIdは、OpenCL・GPUの導入の記事に書いてある方法でご確認ください Weaver.Initialize(ComputeDeviceTypes.Gpu, platformId: 1, deviceIndex: 0); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack nn = new FunctionStack( /* 最初の4層の畳み込み層を削除 * new Convolution2D (3, 64, 3, pad: 1, gpuEnable: true), * new ReLU (), * new Convolution2D (64, 64, 3, pad: 1, gpuEnable: true), * new ReLU (), * new MaxPooling(2, 2, gpuEnable: true), * * new Convolution2D (64, 128, 3, pad: 1, gpuEnable: true), * new ReLU (), * new Convolution2D (128, 128, 3, pad: 1, gpuEnable: true), * new ReLU (), * new MaxPooling(2, 2, gpuEnable: true), */ // (3, 32, 32) new Convolution2D(3, 64, 3, pad: 1, gpuEnable: true), new ReLU(), new Convolution2D(64, 64, 3, pad: 1, gpuEnable: true), new ReLU(), new Convolution2D(64, 64, 3, pad: 1, gpuEnable: true), new ReLU(), new MaxPooling(2, 2, gpuEnable: true), // (64, 16, 16) new Convolution2D(64, 128, 3, pad: 1, gpuEnable: true), new ReLU(), new Convolution2D(128, 128, 3, pad: 1, gpuEnable: true), new ReLU(), new Convolution2D(128, 128, 3, pad: 1, gpuEnable: true), new ReLU(), new MaxPooling(2, 2, gpuEnable: true), // (128, 8, 8) new Convolution2D(128, 128, 3, pad: 1, gpuEnable: true), new ReLU(), new Convolution2D(128, 128, 3, pad: 1, gpuEnable: true), new ReLU(), new Convolution2D(128, 128, 3, pad: 1, gpuEnable: true), new ReLU(), new MaxPooling(2, 2, gpuEnable: true), // (128, 4, 4) new Linear(128 * 4 * 4, 1024, gpuEnable: true), new ReLU(), new Dropout(0.5), new Linear(1024, 1024, gpuEnable: true), new ReLU(), new Dropout(0.5), new Linear(1024, 10, gpuEnable: true) ); //optimizerを宣言 nn.SetOptimizer(new Adam()); Console.WriteLine("Training Start..."); // epoch for (int epoch = 1; epoch < 10; epoch++) { Console.WriteLine("\nepoch " + epoch); //全体での誤差を集計 Real totalLoss = 0; long totalLossCount = 0; //何回バッチを実行するか for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //Console.WriteLine ("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //訓練データからランダムにデータを取得 TestDataSet datasetX = cifarData.GetRandomXSet(BATCH_DATA_COUNT); //バッチ学習を並列実行する Real sumLoss = Trainer.Train(nn, datasetX.Data, datasetX.Label, new SoftmaxCrossEntropy()); totalLoss += sumLoss; totalLossCount++; //結果出力 Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); //50回バッチを動かしたら精度をテストする if (i % 50 == 0) { Console.WriteLine("step: " + i + " Testing..."); //テストデータからランダムにデータを取得 TestDataSet datasetY = cifarData.GetRandomYSet(TEACH_DATA_COUNT); //テストを実行 Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); Console.WriteLine("accuracy " + accuracy); } } } }
public static void Run() { Console.WriteLine("Build Vocabulary."); Vocabulary vocabulary = new Vocabulary(); string trainPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TRAIN_FILE, TRAIN_FILE, TRAIN_FILE_HASH); string testPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TEST_FILE, TEST_FILE, TEST_FILE_HASH); int[] trainData = vocabulary.LoadData(trainPath); int[] testData = vocabulary.LoadData(testPath); int nVocab = vocabulary.Length; Console.WriteLine("Done."); Console.WriteLine("Network Initilizing."); FunctionStack model = new FunctionStack( new EmbedID(nVocab, N_UNITS, name: "l1 EmbedID"), new Linear(N_UNITS, N_UNITS, name: "l2 Linear"), new TanhActivation("l2 Tanh"), new Linear(N_UNITS, nVocab, name: "l3 Linear"), new Softmax("l3 Sonftmax") ); model.SetOptimizer(new Adam()); List <int> s = new List <int>(); Console.WriteLine("Train Start."); SoftmaxCrossEntropy softmaxCrossEntropy = new SoftmaxCrossEntropy(); for (int epoch = 0; epoch < TRAINING_EPOCHS; epoch++) { for (int pos = 0; pos < trainData.Length; pos++) { NdArray h = new NdArray(new Real[N_UNITS]); int id = trainData[pos]; s.Add(id); if (id == vocabulary.EosID) { Real accumloss = 0; Stack <NdArray> tmp = new Stack <NdArray>(); for (int i = 0; i < s.Count; i++) { int tx = i == s.Count - 1 ? vocabulary.EosID : s[i + 1]; //l1 EmbedID NdArray l1 = model.Functions[0].Forward(s[i])[0]; //l2 Linear NdArray l2 = model.Functions[1].Forward(h)[0]; //Add NdArray xK = l1 + l2; //l2 Tanh h = model.Functions[2].Forward(xK)[0]; //l3 Linear NdArray h2 = model.Functions[3].Forward(h)[0]; Real loss = softmaxCrossEntropy.Evaluate(h2, tx); tmp.Push(h2); accumloss += loss; } Console.WriteLine(accumloss); for (int i = 0; i < s.Count; i++) { model.Backward(tmp.Pop()); } model.Update(); s.Clear(); } if (pos % 100 == 0) { Console.WriteLine(pos + "/" + trainData.Length + " finished"); } } } Console.WriteLine("Test Start."); Real sum = 0; int wnum = 0; List <int> ts = new List <int>(); bool unkWord = false; for (int pos = 0; pos < 1000; pos++) { int id = testData[pos]; ts.Add(id); if (id > trainData.Length) { unkWord = true; } if (id == vocabulary.EosID) { if (!unkWord) { Console.WriteLine("pos" + pos); Console.WriteLine("tsLen" + ts.Count); Console.WriteLine("sum" + sum); Console.WriteLine("wnum" + wnum); sum += CalPs(model, ts); wnum += ts.Count - 1; } else { unkWord = false; } ts.Clear(); } } Console.WriteLine(Math.Pow(2.0, sum / wnum)); }
public static void Run() { RILogManager.Default?.SendDebug("MNIST Data Loading..."); MnistData mnistData = new MnistData(28); RILogManager.Default?.SendDebug("Training Start..."); int neuronCount = 28; FunctionStack nn = new FunctionStack("Test19", new Linear(true, neuronCount * neuronCount, N, name: "l1 Linear"), // L1 new BatchNormalization(true, N, name: "l1 BatchNorm"), new LeakyReLU(slope: 0.000001, name: "l1 LeakyReLU"), new Linear(true, N, N, name: "l2 Linear"), // L2 new BatchNormalization(true, N, name: "l2 BatchNorm"), new LeakyReLU(slope: 0.000001, name: "l2 LeakyReLU"), new Linear(true, N, N, name: "l3 Linear"), // L3 new BatchNormalization(true, N, name: "l3 BatchNorm"), new LeakyReLU(slope: 0.000001, name: "l3 LeakyReLU"), new Linear(true, N, N, name: "l4 Linear"), // L4 new BatchNormalization(true, N, name: "l4 BatchNorm"), new LeakyReLU(slope: 0.000001, name: "l4 LeakyReLU"), new Linear(true, N, N, name: "l5 Linear"), // L5 new BatchNormalization(true, N, name: "l5 BatchNorm"), new LeakyReLU(slope: 0.000001, name: "l5 LeakyReLU"), new Linear(true, N, N, name: "l6 Linear"), // L6 new BatchNormalization(true, N, name: "l6 BatchNorm"), new LeakyReLU(slope: 0.000001, name: "l6 LeakyReLU"), new Linear(true, N, N, name: "l7 Linear"), // L7 new BatchNormalization(true, N, name: "l7 BatchNorm"), new LeakyReLU(slope: 0.000001, name: "l7 ReLU"), new Linear(true, N, N, name: "l8 Linear"), // L8 new BatchNormalization(true, N, name: "l8 BatchNorm"), new LeakyReLU(slope: 0.000001, name: "l8 LeakyReLU"), new Linear(true, N, N, name: "l9 Linear"), // L9 new BatchNormalization(true, N, name: "l9 BatchNorm"), new PolynomialApproximantSteep(slope: 0.000001, name: "l9 PolynomialApproximantSteep"), new Linear(true, N, N, name: "l10 Linear"), // L10 new BatchNormalization(true, N, name: "l10 BatchNorm"), new PolynomialApproximantSteep(slope: 0.000001, name: "l10 PolynomialApproximantSteep"), new Linear(true, N, N, name: "l11 Linear"), // L11 new BatchNormalization(true, N, name: "l11 BatchNorm"), new PolynomialApproximantSteep(slope: 0.000001, name: "l11 PolynomialApproximantSteep"), new Linear(true, N, N, name: "l12 Linear"), // L12 new BatchNormalization(true, N, name: "l12 BatchNorm"), new PolynomialApproximantSteep(slope: 0.000001, name: "l12 PolynomialApproximantSteep"), new Linear(true, N, N, name: "l13 Linear"), // L13 new BatchNormalization(true, N, name: "l13 BatchNorm"), new PolynomialApproximantSteep(slope: 0.000001, name: "l13 PolynomialApproximantSteep"), new Linear(true, N, N, name: "l14 Linear"), // L14 new BatchNormalization(true, N, name: "l14 BatchNorm"), new PolynomialApproximantSteep(slope: 0.000001, name: "l14 PolynomialApproximantSteep"), new Linear(true, N, 10, name: "l15 Linear") // L15 ); nn.SetOptimizer(new AdaGrad()); //nn.SetOptimizer(new Adam()); RunningStatistics stats = new RunningStatistics(); Histogram lossHistogram = new Histogram(); Histogram accuracyHistogram = new Histogram(); Real totalLoss = 0; long totalLossCounter = 0; Real highestAccuracy = 0; Real bestLocalLoss = 0; Real bestTotalLoss = 0; // First skeleton save ModelIO.Save(nn, nn.Name); for (int epoch = 0; epoch < 1; epoch++) { RILogManager.Default?.SendDebug("epoch " + (epoch + 1)); RILogManager.Default?.ViewerSendWatch("epoch", (epoch + 1)); for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { RILogManager.Default?.SendInformation("batch count " + i + "/" + TRAIN_DATA_COUNT); TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT, 28, 28); Real sumLoss = Trainer.Train(nn, datasetX.Data, datasetX.Label, new SoftmaxCrossEntropy()); totalLoss += sumLoss; totalLossCounter++; stats.Push(sumLoss); lossHistogram.AddBucket(new Bucket(-10, 10)); accuracyHistogram.AddBucket(new Bucket(-10.0, 10)); if (sumLoss < bestLocalLoss && sumLoss != Double.NaN) { bestLocalLoss = sumLoss; } if (stats.Mean < bestTotalLoss && sumLoss != Double.NaN) { bestTotalLoss = stats.Mean; } try { lossHistogram.AddData(sumLoss); } catch (Exception) { } if (i % 20 == 0) { RILogManager.Default?.SendDebug("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); RILogManager.Default?.SendDebug("Total/Mean loss " + stats.Mean); RILogManager.Default?.SendDebug("local loss " + sumLoss); RILogManager.Default?.SendInformation("batch count " + i + "/" + TRAIN_DATA_COUNT); RILogManager.Default?.ViewerSendWatch("batch count", i); RILogManager.Default?.ViewerSendWatch("Total/Mean loss", stats.Mean); RILogManager.Default?.ViewerSendWatch("local loss", sumLoss); RILogManager.Default?.SendDebug(""); RILogManager.Default?.SendDebug("Testing..."); TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT, 28); Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); if (accuracy > highestAccuracy) { highestAccuracy = accuracy; } RILogManager.Default?.SendDebug("Accuracy: " + accuracy); RILogManager.Default?.ViewerSendWatch("Accuracy", accuracy); try { accuracyHistogram.AddData(accuracy); } catch (Exception) { } } } } RILogManager.Default?.SendDebug("Best Accuracy: " + highestAccuracy); RILogManager.Default?.SendDebug("Best Total Loss " + bestTotalLoss); RILogManager.Default?.SendDebug("Best Local Loss " + bestLocalLoss); RILogManager.Default?.ViewerSendWatch("Best Accuracy:", highestAccuracy); RILogManager.Default?.ViewerSendWatch("Best Total Loss", bestTotalLoss); RILogManager.Default?.ViewerSendWatch("Best Local Loss", bestLocalLoss); // Save all with training data ModelIO.Save(nn, nn.Name); }