public static void Run() { DataMaker dataMaker = new DataMaker(STEPS_PER_CYCLE, NUMBER_OF_CYCLES); NdArray trainData = dataMaker.Make(); FunctionStack model = new FunctionStack("Test8", new Linear(true, 1, 5, name: "Linear l1"), new LSTM(true, 5, 5, name: "LSTM l2"), new Linear(true, 5, 1, name: "Linear l3") ); model.SetOptimizer(new Adam()); RILogManager.Default?.SendDebug("Training..."); for (int epoch = 0; epoch < TRAINING_EPOCHS; epoch++) { NdArray[] sequences = dataMaker.MakeMiniBatch(trainData, MINI_BATCH_SIZE, LENGTH_OF_SEQUENCE); Real loss = ComputeLoss(model, sequences); model.Update(); model.ResetState(); if (epoch != 0 && epoch % DISPLAY_EPOCH == 0) { RILogManager.Default?.SendDebug("[{0}]training loss:\t{1}", epoch, loss); } } RILogManager.Default?.SendDebug("Testing..."); NdArray[] testSequences = dataMaker.MakeMiniBatch(trainData, MINI_BATCH_SIZE, LENGTH_OF_SEQUENCE); int sample_index = 45; predict(testSequences[sample_index], model, PREDICTION_LENGTH); }
public static void Run() { //訓練回数 const int learningCount = 10000; //訓練データ Real[][] trainData = { new Real[] { 0, 0 }, new Real[] { 1, 0 }, new Real[] { 0, 1 }, new Real[] { 1, 1 } }; //訓練データラベル Real[][] trainLabel = { new Real[] { 0 }, new Real[] { 1 }, new Real[] { 1 }, new Real[] { 0 } }; //ネットワークの構成を FunctionStack に書き連ねる FunctionStack nn = new FunctionStack( new Linear(2, 2, name: "l1 Linear"), new ReLU(name: "l1 ReLU"), new Linear(2, 1, name: "l2 Linear") ); //optimizerを宣言(今回はAdam) nn.SetOptimizer(new Adam()); //訓練ループ Console.WriteLine("Training..."); for (int i = 0; i < learningCount; i++) { //今回はロス関数にMeanSquaredErrorを使う Trainer.Train(nn, trainData[0], trainLabel[0], new MeanSquaredError(), false); Trainer.Train(nn, trainData[1], trainLabel[1], new MeanSquaredError(), false); Trainer.Train(nn, trainData[2], trainLabel[2], new MeanSquaredError(), false); Trainer.Train(nn, trainData[3], trainLabel[3], new MeanSquaredError(), false); //訓練後に毎回更新を実行しなければ、ミニバッチとして更新できる nn.Update(); } //訓練結果を表示 Console.WriteLine("Test Start..."); foreach (Real[] val in trainData) { NdArray result = nn.Predict(val)[0]; Console.WriteLine(val[0] + " xor " + val[1] + " = " + (result.Data[0] > 0.5 ? 1 : 0) + " " + result); } }
public static void Run() { //Number of exercises const int learningCount = 10000; //Training data Real[][] trainData = { new Real[] { 0, 0 }, new Real[] { 1, 0 }, new Real[] { 0, 1 }, new Real[] { 1, 1 } }; //Training data label Real[][] trainLabel = { new Real[] { 0 }, new Real[] { 1 }, new Real[] { 1 }, new Real[] { 0 } }; //Writing the network configuration in FunctionStack FunctionStack nn = new FunctionStack( new Linear(2, 2, name: "l1 Linear"), new ReLU(name: "l1 ReLU"), new Linear(2, 1, name: "l2 Linear") ); //Declare optimizer (Adam in this time) nn.SetOptimizer(new Adam()); //Training loop Console.WriteLine("Training..."); for (int i = 0; i < learningCount; i++) { //This time use MeanSquaredError for loss function Trainer.Train(nn, trainData[0], trainLabel[0], new MeanSquaredError(), false); Trainer.Train(nn, trainData[1], trainLabel[1], new MeanSquaredError(), false); Trainer.Train(nn, trainData[2], trainLabel[2], new MeanSquaredError(), false); Trainer.Train(nn, trainData[3], trainLabel[3], new MeanSquaredError(), false); //If you do not update every time after training, you can update it as a mini batch nn.Update(); } //Show training results Console.WriteLine("Test Start..."); foreach (Real[] val in trainData) { NdArray result = nn.Predict(val)[0]; Console.WriteLine(val[0] + " xor " + val[1] + " = " + (result.Data[0] > 0.5 ? 1 : 0) + " " + result); } }
public static void Run() { const int learningCount = 10000; Real[][] trainData = { new Real[] { 0, 0 }, new Real[] { 1, 0 }, new Real[] { 0, 1 }, new Real[] { 1, 1 } }; Real[][] trainLabel = { new Real[] { 0 }, new Real[] { 1 }, new Real[] { 1 }, new Real[] { 0 } }; FunctionStack nn = new FunctionStack("Test2", new Linear(true, 2, 2, name: "l1 Linear"), new ReLU(name: "l1 ReLU"), new Linear(true, 2, 1, name: "l2 Linear")); nn.SetOptimizer(new AdaGrad()); RILogManager.Default?.SendDebug("Training..."); for (int i = 0; i < learningCount; i++) { //use MeanSquaredError for loss function Trainer.Train(nn, trainData[0], trainLabel[0], new MeanSquaredError(), false); Trainer.Train(nn, trainData[1], trainLabel[1], new MeanSquaredError(), false); Trainer.Train(nn, trainData[2], trainLabel[2], new MeanSquaredError(), false); Trainer.Train(nn, trainData[3], trainLabel[3], new MeanSquaredError(), false); //If you do not update every time after training, you can update it as a mini batch nn.Update(); } RILogManager.Default?.SendDebug("Test Start..."); foreach (Real[] val in trainData) { NdArray result = nn.Predict(true, val)[0]; RILogManager.Default?.SendDebug($"{val[0]} xor {val[1]} = {(result.Data[0] > 0.5 ? 1 : 0)} {result}"); } }
//バッチで学習処理を行う public static Real Train(FunctionStack functionStack, NdArray input, NdArray teach, LossFunction lossFunction, bool isUpdate = true) { //結果の誤差保存用 NdArray[] result = functionStack.Forward(input); Real sumLoss = lossFunction.Evaluate(result, teach); //Backwardのバッチを実行 functionStack.Backward(result); //更新 if (isUpdate) { functionStack.Update(); } return(sumLoss); }
//Perform learning process in batch public static Real Train(FunctionStack functionStack, NdArray input, NdArray teach, LossFunction lossFunction, bool isUpdate = true) { //For preserving error of result NdArray[] result = functionStack.Forward(input); Real sumLoss = lossFunction.Evaluate(result, teach); //Run Backward's batch functionStack.Backward(result); //update if (isUpdate) { functionStack.Update(); } return(sumLoss); }
//////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> Do a learning process with a batch. </summary> /// /// <param name="functionStack"> Stack of functions. </param> /// <param name="input"> The input data. </param> /// <param name="teach"> The teaching data. </param> /// <param name="lossFunction"> The loss function. </param> /// <param name="isUpdate"> (Optional) True if this object is being updated. </param> /// /// <returns> A Real. </returns> //////////////////////////////////////////////////////////////////////////////////////////////////// public static Real Train([NotNull] FunctionStack functionStack, [CanBeNull] NdArray input, [CanBeNull] NdArray teach, [NotNull] LossFunction lossFunction, bool isUpdate = true, bool verbose = true) { if (verbose) { RILogManager.Default?.EnterMethod("Training " + functionStack.Name); } if (verbose) { RILogManager.Default?.SendDebug("Forward propagation"); } NdArray[] result = functionStack.Forward(verbose, input); if (verbose) { RILogManager.Default?.SendDebug("Evaluating loss"); } Real sumLoss = lossFunction.Evaluate(result, teach); // Run Backward batch if (verbose) { RILogManager.Default?.SendDebug("Backward propagation"); } functionStack.Backward(verbose, result); if (isUpdate) { if (verbose) { RILogManager.Default?.SendDebug("Updating stack"); } functionStack.Update(); } if (verbose) { RILogManager.Default?.ExitMethod("Training " + functionStack.Name); RILogManager.Default?.ViewerSendWatch("Local Loss", sumLoss.ToString(), sumLoss); } return(sumLoss); }
public static void Run() { DataMaker dataMaker = new DataMaker(STEPS_PER_CYCLE, NUMBER_OF_CYCLES); NdArray trainData = dataMaker.Make(); //Network configuration is written in FunctionStack FunctionStack model = new FunctionStack( new Linear(1, 5, name: "Linear l1"), new LSTM(5, 5, name: "LSTM l2"), new Linear(5, 1, name: "Linear l3") ); //Declare optimizer model.SetOptimizer(new Adam()); //Training loop Console.WriteLine("Training..."); for (int epoch = 0; epoch < TRAINING_EPOCHS; epoch++) { NdArray[] sequences = dataMaker.MakeMiniBatch(trainData, MINI_BATCH_SIZE, LENGTH_OF_SEQUENCE); Real loss = ComputeLoss(model, sequences); model.Update(); model.ResetState(); if (epoch != 0 && epoch % DISPLAY_EPOCH == 0) { Console.WriteLine("[{0}]training loss:\t{1}", epoch, loss); } } Console.WriteLine("Testing..."); NdArray[] testSequences = dataMaker.MakeMiniBatch(trainData, MINI_BATCH_SIZE, LENGTH_OF_SEQUENCE); int sample_index = 45; predict(testSequences[sample_index], model, PREDICTION_LENGTH); }
public static void Run() { // Write the configuration of the network you want to read into FunctionStack and adjust the parameters of each function // Make sure to match name to the variable name of Chainer here FunctionStack nn = new FunctionStack("Test16", new Convolution2D(true, 1, 2, 3, name: "conv1", gpuEnable: true),// Do not forget the GPU flag if necessary new ReLU(), new MaxPooling(2, 2), new Convolution2D(true, 2, 2, 2, name: "conv2", gpuEnable: true), new ReLU(), new MaxPooling(2, 2), new Linear(true, 8, 2, name: "fl3"), new ReLU(), new Linear(true, 2, 2, name: "fl4") ); /* Chainerでの宣言 * class NN(chainer.Chain): * def __init__(self): * super(NN, self).__init__( * conv1 = L.Convolution2D(1,2,3), * conv2 = L.Convolution2D(2,2,2), * fl3 = L.Linear(8,2), * fl4 = L.Linear(2,2) * ) * * def __call__(self, x): * h_conv1 = F.relu(self.conv1(x)) * h_pool1 = F.max_pooling_2d(h_conv1, 2) * h_conv2 = F.relu(self.conv2(h_pool1)) * h_pool2 = F.max_pooling_2d(h_conv2, 2) * h_fc1 = F.relu(self.fl3(h_pool2)) * y = self.fl4(h_fc1) * return y */ // Read parameters ChainerModelDataLoader.ModelLoad(MODEL_FILE_PATH, nn); // We will use the rest as usual nn.SetOptimizer(new SGD()); NdArray x = new NdArray(new Real[, , ] { { { 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.9, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.2, 0.8, 0.9, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.1, 0.8, 0.5, 0.8, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.3, 0.3, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.1, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.8, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.4, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.2, 0.8, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.1, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 } } }); Real[] t = { 0.0, 1.0 }; Trainer.Train(nn, x, t, new MeanSquaredError(), false); Convolution2D l2 = (Convolution2D)nn.Functions[0]; RILogManager.Default?.SendDebug("gw1"); RILogManager.Default?.SendDebug(l2.Weight.ToString("Grad")); RILogManager.Default?.SendDebug("gb1"); RILogManager.Default?.SendDebug(l2.Bias.ToString("Grad")); // If Update is executed, grad is consumed, so output the value first nn.Update(); RILogManager.Default?.SendDebug("w1"); RILogManager.Default?.SendDebug(l2.Weight.ToString()); RILogManager.Default?.SendDebug("b1"); RILogManager.Default?.SendDebug(l2.Bias.ToString()); }
public static void Run() { //各初期値を記述 Real[,,,] initial_W1 = { { { { 1.0, 0.5, 0.0 }, { 0.5, 0.0, -0.5 }, { 0.0, -0.5, -1.0 } } }, { { { 0.0, -0.1, 0.1 }, { -0.3, 0.4, 0.7 }, { 0.5, -0.2, 0.2 } } } }; Real[] initial_b1 = { 0.5, 1.0 }; Real[,,,] initial_W2 = { { { { -0.1, 0.6 }, { 0.3, -0.9 } }, { { 0.7, 0.9 }, { -0.2, -0.3 } } }, { { { -0.6, -0.1 }, { 0.3, 0.3 } }, { { -0.5, 0.8 }, { 0.9, 0.1 } } } }; Real[] initial_b2 = { 0.1, 0.9 }; Real[,] initial_W3 = { { 0.5, 0.3, 0.4, 0.2, 0.6, 0.1, 0.4, 0.3 }, { 0.6, 0.4, 0.9, 0.1, 0.5, 0.2, 0.3, 0.4 } }; Real[] initial_b3 = { 0.01, 0.02 }; Real[,] initial_W4 = { { 0.8, 0.2 }, { 0.4, 0.6 } }; Real[] initial_b4 = { 0.02, 0.01 }; //入力データ NdArray x = new NdArray(new Real[, , ] { { { 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.9, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.2, 0.8, 0.9, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.1, 0.8, 0.5, 0.8, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.3, 0.3, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.1, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.8, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.4, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.2, 0.8, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.1, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 } } }); //教師信号 Real[] t = { 0.0, 1.0 }; //層の中身をチェックしたい場合は、層単体でインスタンスを持つ Convolution2D l2 = new Convolution2D(1, 2, 3, initialW: initial_W1, initialb: initial_b1, name: "l2 Conv2D"); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack nn = new FunctionStack( l2, //new Convolution2D(1, 2, 3, initialW: initial_W1, initialb: initial_b1), new ReLU(name: "l2 ReLU"), //new AveragePooling(2, 2, name: "l2 AVGPooling"), new MaxPooling2D(2, 2, name: "l2 MaxPooling"), new Convolution2D(2, 2, 2, initialW: initial_W2, initialb: initial_b2, name: "l3 Conv2D"), new ReLU(name: "l3 ReLU"), //new AveragePooling(2, 2, name: "l3 AVGPooling"), new MaxPooling2D(2, 2, name: "l3 MaxPooling"), new Linear(8, 2, initialW: initial_W3, initialb: initial_b3, name: "l4 Linear"), new ReLU(name: "l4 ReLU"), new Linear(2, 2, initialW: initial_W4, initialb: initial_b4, name: "l5 Linear") ); nn.SetOptimizer(new SGD(0.1)); //訓練を実施 Trainer.Train(nn, x, t, new MeanSquaredError(), false); //Updateを実行するとgradが消費されてしまうため値を先に出力 Console.WriteLine("gw1"); Console.WriteLine(l2.Weight.ToString("Grad")); Console.WriteLine("gb1"); Console.WriteLine(l2.Bias.ToString("Grad")); //更新 nn.Update(); Console.WriteLine("w1"); Console.WriteLine(l2.Weight); Console.WriteLine("b1"); Console.WriteLine(l2.Bias); }
public static void Run() { //MNISTのデータを用意する Console.WriteLine("MNIST Data Loading..."); MnistData mnistData = new MnistData(); Console.WriteLine("Training Start..."); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack Layer1 = new FunctionStack( new Linear(28 * 28, 256, name: "l1 Linear"), new BatchNormalization(256, name: "l1 Norm"), new ReLU(name: "l1 ReLU") ); FunctionStack Layer2 = new FunctionStack( new Linear(256, 256, name: "l2 Linear"), new BatchNormalization(256, name: "l2 Norm"), new ReLU(name: "l2 ReLU") ); FunctionStack Layer3 = new FunctionStack( new Linear(256, 256, name: "l3 Linear"), new BatchNormalization(256, name: "l3 Norm"), new ReLU(name: "l3 ReLU") ); FunctionStack Layer4 = new FunctionStack( new Linear(256, 10, name: "l4 Linear") ); //FunctionStack自身もFunctionとして積み上げられる FunctionStack nn = new FunctionStack ( Layer1, Layer2, Layer3, Layer4 ); FunctionStack DNI1 = new FunctionStack( new Linear(256, 1024, name: "DNI1 Linear1"), new BatchNormalization(1024, name: "DNI1 Nrom1"), new ReLU(name: "DNI1 ReLU1"), new Linear(1024, 1024, name: "DNI1 Linear2"), new BatchNormalization(1024, name: "DNI1 Nrom2"), new ReLU(name: "DNI1 ReLU2"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack DNI2 = new FunctionStack( new Linear(256, 1024, name: "DNI2 Linear1"), new BatchNormalization(1024, name: "DNI2 Nrom1"), new ReLU(name: "DNI2 ReLU1"), new Linear(1024, 1024, name: "DNI2 Linear2"), new BatchNormalization(1024, name: "DNI2 Nrom2"), new ReLU(name: "DNI2 ReLU2"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI2 Linear3") ); FunctionStack DNI3 = new FunctionStack( new Linear(256, 1024, name: "DNI3 Linear1"), new BatchNormalization(1024, name: "DNI3 Nrom1"), new ReLU(name: "DNI3 ReLU1"), new Linear(1024, 1024, name: "DNI3 Linear2"), new BatchNormalization(1024, name: "DNI3 Nrom2"), new ReLU(name: "DNI3 ReLU2"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI3 Linear3") ); //optimizerを宣言 Layer1.SetOptimizer(new Adam()); Layer2.SetOptimizer(new Adam()); Layer3.SetOptimizer(new Adam()); Layer4.SetOptimizer(new Adam()); DNI1.SetOptimizer(new Adam()); DNI2.SetOptimizer(new Adam()); DNI3.SetOptimizer(new Adam()); //三世代学習 for (int epoch = 0; epoch < 20; epoch++) { Console.WriteLine("epoch " + (epoch + 1)); Real totalLoss = 0; Real DNI1totalLoss = 0; Real DNI2totalLoss = 0; Real DNI3totalLoss = 0; long totalLossCount = 0; long DNI1totalLossCount = 0; long DNI2totalLossCount = 0; long DNI3totalLossCount = 0; //何回バッチを実行するか for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //訓練データからランダムにデータを取得 TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT); //第一層を実行 NdArray[] layer1ForwardResult = Layer1.Forward(datasetX.Data); //第一層の傾きを取得 NdArray[] DNI1Result = DNI1.Forward(layer1ForwardResult); //第一層の傾きを適用 layer1ForwardResult[0].Grad = DNI1Result[0].Data.ToArray(); //第一層を更新 Layer1.Backward(layer1ForwardResult); layer1ForwardResult[0].ParentFunc = null; //Backwardを実行したので計算グラフを切っておく Layer1.Update(); //第二層を実行 NdArray[] layer2ForwardResult = Layer2.Forward(layer1ForwardResult); //第二層の傾きを取得 NdArray[] DNI2Result = DNI2.Forward(layer2ForwardResult); //第二層の傾きを適用 layer2ForwardResult[0].Grad = DNI2Result[0].Data.ToArray(); //第二層を更新 Layer2.Backward(layer2ForwardResult); layer2ForwardResult[0].ParentFunc = null; //第一層用のDNIの学習を実行 Real DNI1loss = new MeanSquaredError().Evaluate(DNI1Result, new NdArray(layer1ForwardResult[0].Grad, DNI1Result[0].Shape, DNI1Result[0].BatchCount)); Layer2.Update(); DNI1.Backward(DNI1Result); DNI1.Update(); DNI1totalLoss += DNI1loss; DNI1totalLossCount++; //第三層を実行 NdArray[] layer3ForwardResult = Layer3.Forward(layer2ForwardResult); //第三層の傾きを取得 NdArray[] DNI3Result = DNI3.Forward(layer3ForwardResult); //第三層の傾きを適用 layer3ForwardResult[0].Grad = DNI3Result[0].Data.ToArray(); //第三層を更新 Layer3.Backward(layer3ForwardResult); layer3ForwardResult[0].ParentFunc = null; //第二層用のDNIの学習を実行 Real DNI2loss = new MeanSquaredError().Evaluate(DNI2Result, new NdArray(layer2ForwardResult[0].Grad, DNI2Result[0].Shape, DNI2Result[0].BatchCount)); Layer3.Update(); DNI2.Backward(DNI2Result); DNI2.Update(); DNI2totalLoss += DNI2loss; DNI2totalLossCount++; //第四層を実行 NdArray[] layer4ForwardResult = Layer4.Forward(layer3ForwardResult); //第四層の傾きを取得 Real sumLoss = new SoftmaxCrossEntropy().Evaluate(layer4ForwardResult, datasetX.Label); //第四層を更新 Layer4.Backward(layer4ForwardResult); layer4ForwardResult[0].ParentFunc = null; totalLoss += sumLoss; totalLossCount++; //第三層用のDNIの学習を実行 Real DNI3loss = new MeanSquaredError().Evaluate(DNI3Result, new NdArray(layer3ForwardResult[0].Grad, DNI3Result[0].Shape, DNI3Result[0].BatchCount)); Layer4.Update(); DNI3.Backward(DNI3Result); DNI3.Update(); DNI3totalLoss += DNI3loss; DNI3totalLossCount++; Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //結果出力 Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); Console.WriteLine("\nDNI1 total loss " + DNI1totalLoss / DNI1totalLossCount); Console.WriteLine("DNI2 total loss " + DNI2totalLoss / DNI2totalLossCount); Console.WriteLine("DNI3 total loss " + DNI3totalLoss / DNI3totalLossCount); Console.WriteLine("\nDNI1 local loss " + DNI1loss); Console.WriteLine("DNI2 local loss " + DNI2loss); Console.WriteLine("DNI3 local loss " + DNI3loss); //20回バッチを動かしたら精度をテストする if (i % 20 == 0) { Console.WriteLine("\nTesting..."); //テストデータからランダムにデータを取得 TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT); //テストを実行 Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); Console.WriteLine("accuracy " + accuracy); } } } }
public static void Run() { //Prepare MNIST data Console.WriteLine("MNIST Data Loading..."); MnistData mnistData = new MnistData(); Console.WriteLine("Training Start..."); //Writing the network configuration in FunctionStack FunctionStack Layer1 = new FunctionStack( new Linear(28 * 28, 256, name: "l1 Linear"), new BatchNormalization(256, name: "l1 Norm"), new ReLU(name: "l1 ReLU") ); FunctionStack Layer2 = new FunctionStack( new Linear(256, 256, name: "l2 Linear"), new BatchNormalization(256, name: "l2 Norm"), new ReLU(name: "l2 ReLU") ); FunctionStack Layer3 = new FunctionStack( new Linear(256, 256, name: "l3 Linear"), new BatchNormalization(256, name: "l3 Norm"), new ReLU(name: "l3 ReLU") ); FunctionStack Layer4 = new FunctionStack( new Linear(256, 10, name: "l4 Linear") ); //FunctionStack itself is also stacked as Function FunctionStack nn = new FunctionStack ( Layer1, Layer2, Layer3, Layer4 ); FunctionStack cDNI1 = new FunctionStack( new Linear(256 + 10, 1024, name: "cDNI1 Linear1"), new BatchNormalization(1024, name: "cDNI1 Nrom1"), new ReLU(name: "cDNI1 ReLU1"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack cDNI2 = new FunctionStack( new Linear(256 + 10, 1024, name: "cDNI2 Linear1"), new BatchNormalization(1024, name: "cDNI2 Nrom1"), new ReLU(name: "cDNI2 ReLU1"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "cDNI2 Linear3") ); FunctionStack cDNI3 = new FunctionStack( new Linear(256 + 10, 1024, name: "cDNI3 Linear1"), new BatchNormalization(1024, name: "cDNI3 Nrom1"), new ReLU(name: "cDNI3 ReLU1"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "cDNI3 Linear3") ); //Declare optimizer Layer1.SetOptimizer(new Adam(0.00003f)); Layer2.SetOptimizer(new Adam(0.00003f)); Layer3.SetOptimizer(new Adam(0.00003f)); Layer4.SetOptimizer(new Adam(0.00003f)); cDNI1.SetOptimizer(new Adam(0.00003f)); cDNI2.SetOptimizer(new Adam(0.00003f)); cDNI3.SetOptimizer(new Adam(0.00003f)); for (int epoch = 0; epoch < 10; epoch++) { Console.WriteLine("epoch " + (epoch + 1)); //Total error in the whole Real totalLoss = 0; Real cDNI1totalLoss = 0; Real cDNI2totalLoss = 0; Real cDNI3totalLoss = 0; long totalLossCount = 0; long cDNI1totalLossCount = 0; long cDNI2totalLossCount = 0; long cDNI3totalLossCount = 0; //How many times to run the batch for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //Get data randomly from training data TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT); //Run first tier NdArray[] layer1ForwardResult = Layer1.Forward(datasetX.Data); ResultDataSet layer1ResultDataSet = new ResultDataSet(layer1ForwardResult, datasetX.Label); //Get the inclination of the first layer NdArray[] cDNI1Result = cDNI1.Forward(layer1ResultDataSet.GetTrainData()); //Apply the inclination of the first layer layer1ForwardResult[0].Grad = cDNI1Result[0].Data.ToArray(); //Update first layer Layer1.Backward(layer1ForwardResult); layer1ForwardResult[0].ParentFunc = null; Layer1.Update(); //Run Layer 2 NdArray[] layer2ForwardResult = Layer2.Forward(layer1ResultDataSet.Result); ResultDataSet layer2ResultDataSet = new ResultDataSet(layer2ForwardResult, layer1ResultDataSet.Label); //Get inclination of second layer NdArray[] cDNI2Result = cDNI2.Forward(layer2ResultDataSet.GetTrainData()); //Apply the inclination of the second layer layer2ForwardResult[0].Grad = cDNI2Result[0].Data.ToArray(); //Update 2nd tier Layer2.Backward(layer2ForwardResult); layer2ForwardResult[0].ParentFunc = null; //Perform learning of first layer cDNI Real cDNI1loss = new MeanSquaredError().Evaluate(cDNI1Result, new NdArray(layer1ResultDataSet.Result[0].Grad, cDNI1Result[0].Shape, cDNI1Result[0].BatchCount)); Layer2.Update(); cDNI1.Backward(cDNI1Result); cDNI1.Update(); cDNI1totalLoss += cDNI1loss; cDNI1totalLossCount++; //Run Third Tier NdArray[] layer3ForwardResult = Layer3.Forward(layer2ResultDataSet.Result); ResultDataSet layer3ResultDataSet = new ResultDataSet(layer3ForwardResult, layer2ResultDataSet.Label); //Get the inclination of the third layer NdArray[] cDNI3Result = cDNI3.Forward(layer3ResultDataSet.GetTrainData()); //Apply the inclination of the third layer layer3ForwardResult[0].Grad = cDNI3Result[0].Data.ToArray(); //Update third layer Layer3.Backward(layer3ForwardResult); layer3ForwardResult[0].ParentFunc = null; //Perform learning of cDNI for layer 2 Real cDNI2loss = new MeanSquaredError().Evaluate(cDNI2Result, new NdArray(layer2ResultDataSet.Result[0].Grad, cDNI2Result[0].Shape, cDNI2Result[0].BatchCount)); Layer3.Update(); cDNI2.Backward(cDNI2Result); cDNI2.Update(); cDNI2totalLoss += cDNI2loss; cDNI2totalLossCount++; //Run Layer 4 NdArray[] layer4ForwardResult = Layer4.Forward(layer3ResultDataSet.Result); //Get inclination of the fourth layer Real sumLoss = new SoftmaxCrossEntropy().Evaluate(layer4ForwardResult, layer3ResultDataSet.Label); //Update fourth layer Layer4.Backward(layer4ForwardResult); layer4ForwardResult[0].ParentFunc = null; totalLoss += sumLoss; totalLossCount++; //Perform learning of cDNI for the third layer Real cDNI3loss = new MeanSquaredError().Evaluate(cDNI3Result, new NdArray(layer3ResultDataSet.Result[0].Grad, cDNI3Result[0].Shape, cDNI3Result[0].BatchCount)); Layer4.Update(); cDNI3.Backward(cDNI3Result); cDNI3.Update(); cDNI3totalLoss += cDNI3loss; cDNI3totalLossCount++; Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //Result output Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); Console.WriteLine("\ncDNI1 total loss " + cDNI1totalLoss / cDNI1totalLossCount); Console.WriteLine("cDNI2 total loss " + cDNI2totalLoss / cDNI2totalLossCount); Console.WriteLine("cDNI3 total loss " + cDNI3totalLoss / cDNI3totalLossCount); Console.WriteLine("\ncDNI1 local loss " + cDNI1loss); Console.WriteLine("cDNI2 local loss " + cDNI2loss); Console.WriteLine("cDNI3 local loss " + cDNI3loss); //Test the accuracy if you move the batch 20 times if (i % 20 == 0) { Console.WriteLine("\nTesting..."); //Get data randomly from test data TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT); //Run test Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); Console.WriteLine("accuracy " + accuracy); } } } }
public static void Main() { // platformIdは、OpenCL・GPUの導入の記事に書いてある方法でご確認ください // https://jinbeizame.hateblo.jp/entry/kelpnet_opencl_gpu Weaver.Initialize(ComputeDeviceTypes.Gpu, platformId: 1, deviceIndex: 0); // ネットからVGGの学習済みモデルをダウンロード string modelFilePath = InternetFileDownloader.Donwload(DOWNLOAD_URL, MODEL_FILE); // 学習済みモデルをFunctionのリストとして保存 List <Function> vgg16Net = CaffemodelDataLoader.ModelLoad(modelFilePath); // VGGの出力層とその活性化関数を削除 vgg16Net.RemoveAt(vgg16Net.Count() - 1); vgg16Net.RemoveAt(vgg16Net.Count() - 1); // VGGの各FunctionのgpuEnableをtrueに for (int i = 0; i < vgg16Net.Count - 1; i++) { // GPUに対応している層であれば、GPU対応へ if (vgg16Net[i] is Convolution2D || vgg16Net[i] is Linear || vgg16Net[i] is MaxPooling) { ((IParallelizable)vgg16Net[i]).SetGpuEnable(true); } } // VGGをリストからFunctionStackに変換 FunctionStack vgg = new FunctionStack(vgg16Net.ToArray()); // 層を圧縮 vgg.Compress(); // 新しく出力層とその活性化関数を用意 FunctionStack nn = new FunctionStack( new Linear(4096, 1, gpuEnable: true), new Sigmoid() ); // 最適化手法としてAdamをセット nn.SetOptimizer(new Adam()); Console.WriteLine("DataSet Loading..."); // 訓練・テストデータ用のNdArrayを用意 // データセットは以下のURLからダウンロードを行い、 // VGGTransfer /bin/Debug/Data にtrainフォルダを置いてください。 // https://www.kaggle.com/c/dogs-vs-cats/data NdArray[] trainData = new NdArray[TRAIN_DATA_LENGTH * 2]; NdArray[] trainLabel = new NdArray[TRAIN_DATA_LENGTH * 2]; NdArray[] testData = new NdArray[TEST_DATA_LENGTH * 2]; NdArray[] testLabel = new NdArray[TEST_DATA_LENGTH * 2]; for (int i = 0; i < TRAIN_DATA_LENGTH + TEST_DATA_LENGTH; i++) { // 犬・猫の画像読み込み Bitmap baseCatImage = new Bitmap("Data/train/cat." + i + ".jpg"); Bitmap baseDogImage = new Bitmap("Data/train/dog." + i + ".jpg"); // 変換後の画像を格納するBitmapを定義 Bitmap catImage = new Bitmap(224, 224, PixelFormat.Format24bppRgb); Bitmap dogImage = new Bitmap(224, 224, PixelFormat.Format24bppRgb); // Graphicsオブジェクトに変換 Graphics gCat = Graphics.FromImage(catImage); Graphics gDog = Graphics.FromImage(dogImage); // Graphicsオブジェクト(の中のcatImageに)baseImageを変換して描画 gCat.DrawImage(baseCatImage, 0, 0, 224, 224); gDog.DrawImage(baseDogImage, 0, 0, 224, 224); // Graphicsオブジェクトを破棄し、メモリを解放 gCat.Dispose(); gDog.Dispose(); // 訓練・テストデータにデータを格納 // 先にテストデータの枚数分テストデータに保存し、その後訓練データを保存する // 画素値の値域は0 ~ 255のため、255で割ることで0 ~ 1に正規化 if (i < TEST_DATA_LENGTH) { // ImageをNdArrayに変換したものをvggに入力し、出力した特徴量を入力データとして保存 testData[i * 2] = vgg.Predict(NdArrayConverter.Image2NdArray(catImage, false, true) / 255.0)[0]; testLabel[i * 2] = new NdArray(new Real[] { 0 }); testData[i * 2 + 1] = vgg.Predict(NdArrayConverter.Image2NdArray(dogImage, false, true) / 255.0)[0]; testLabel[i * 2 + 1] = new NdArray(new Real[] { 1 }); } else { trainData[(i - TEST_DATA_LENGTH) * 2] = vgg.Predict(NdArrayConverter.Image2NdArray(catImage, false, true) / 255.0)[0]; trainLabel[(i - TEST_DATA_LENGTH) * 2] = new NdArray(new Real[] { 0 }); //new Real [] { 0 }; trainData[(i - TEST_DATA_LENGTH) * 2] = vgg.Predict(NdArrayConverter.Image2NdArray(dogImage, false, true) / 255.0)[0]; trainLabel[(i - TEST_DATA_LENGTH) * 2] = new NdArray(new Real[] { 1 }); // = new Real [] { 1 }; } } Console.WriteLine("Training Start..."); // ミニバッチ用のNdArrayを定義 NdArray batchData = new NdArray(new[] { 4096 }, BATCH_SIZE); NdArray batchLabel = new NdArray(new[] { 1 }, BATCH_SIZE); // 誤差関数を定義(今回は二値分類なので二乗誤差関数(MSE)) LossFunction lossFunction = new MeanSquaredError(); // エポックを回す for (int epoch = 0; epoch < 10; epoch++) { // 1エポックで訓練データ // バッチサイズ の回数分学習 for (int step = 0; step < TRAIN_DATA_COUNT; step++) { // ミニバッチを用意 for (int i = 0; i < BATCH_SIZE; i++) { // 0 ~ 訓練データサイズ-1 の中からランダムで整数を取得 int index = Mother.Dice.Next(trainData.Length); // trainData(NdArray[])を、batchData(NdArray)の形にコピー Array.Copy(trainData[index].Data, 0, batchData.Data, i * batchData.Length, batchData.Length); batchLabel.Data[i] = trainLabel[index].Data[0]; } // 学習(順伝播、誤差の計算、逆伝播、更新) NdArray[] output = nn.Forward(batchData); Real loss = lossFunction.Evaluate(output, batchLabel); nn.Backward(output); nn.Update(); } // 認識率(accuracy)の計算 // テストデータの回数データを回す Real accuracy = 0; for (int i = 0; i < TEST_DATA_LENGTH * 2; i++) { NdArray[] output = nn.Predict(testData[i]); // 出力outputと正解の誤差が0.5以下(正解が0のときにoutput<0.5、正解が1のときにoutput>0.5) // の際に正確に認識したとする if (Math.Abs(output[0].Data[0] - trainLabel[i].Data[0]) < 0.5) { accuracy += 1; } accuracy /= TEST_DATA_LENGTH * 2.0; Console.WriteLine("Epoch:" + epoch + "accuracy:" + accuracy); } } }
public static void Run() { //読み込みたいネットワークの構成を FunctionStack に書き連ね、各 Function のパラメータを合わせる //ここで必ず name を Chainer の変数名に合わせておくこと FunctionStack nn = new FunctionStack( new Convolution2D(1, 2, 3, name: "conv1", gpuEnable: true),//必要であればGPUフラグも忘れずに new ReLU(), new MaxPooling(2, 2), new Convolution2D(2, 2, 2, name: "conv2", gpuEnable: true), new ReLU(), new MaxPooling(2, 2), new Linear(8, 2, name: "fl3"), new ReLU(), new Linear(2, 2, name: "fl4") ); /* Chainerでの宣言 * class NN(chainer.Chain): * def __init__(self): * super(NN, self).__init__( * conv1 = L.Convolution2D(1,2,3), * conv2 = L.Convolution2D(2,2,2), * fl3 = L.Linear(8,2), * fl4 = L.Linear(2,2) * ) * * def __call__(self, x): * h_conv1 = F.relu(self.conv1(x)) * h_pool1 = F.max_pooling_2d(h_conv1, 2) * h_conv2 = F.relu(self.conv2(h_pool1)) * h_pool2 = F.max_pooling_2d(h_conv2, 2) * h_fc1 = F.relu(self.fl3(h_pool2)) * y = self.fl4(h_fc1) * return y */ //パラメータを読み込み ChainerModelDataLoader.ModelLoad(MODEL_FILE_PATH, nn); //あとは通常通り使用する nn.SetOptimizer(new SGD()); //入力データ NdArray x = new NdArray(new Real[, , ] { { { 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.9, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.2, 0.8, 0.9, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.1, 0.8, 0.5, 0.8, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.3, 0.3, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.1, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.8, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.4, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.2, 0.8, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.1, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 } } }); //教師信号 Real[] t = { 0.0, 1.0 }; //訓練を実施 Trainer.Train(nn, x, t, new MeanSquaredError(), false); //結果表示用に退避 Convolution2D l2 = (Convolution2D)nn.Functions[0]; //Updateを実行するとgradが消費されてしまうため値を先に出力 Console.WriteLine("gw1"); Console.WriteLine(l2.Weight.ToString("Grad")); Console.WriteLine("gb1"); Console.WriteLine(l2.Bias.ToString("Grad")); //更新 nn.Update(); Console.WriteLine("w1"); Console.WriteLine(l2.Weight); Console.WriteLine("b1"); Console.WriteLine(l2.Bias); }
public static void Run() { // Prepare MNIST data RILogManager.Default?.SendDebug("MNIST Data Loading..."); MnistData mnistData = new MnistData(28); RILogManager.Default?.SendDebug("Training Start..."); // Write the network configuration in FunctionStack FunctionStack Layer1 = new FunctionStack("Test12 Layer 1", new Linear(true, 28 * 28, 256, name: "l1 Linear"), new BatchNormalization(true, 256, name: "l1 Norm"), new ReLU(name: "l1 ReLU") ); FunctionStack Layer2 = new FunctionStack("Test12 Layer 2", new Linear(true, 256, 256, name: "l2 Linear"), new BatchNormalization(true, 256, name: "l2 Norm"), new ReLU(name: "l2 ReLU") ); FunctionStack Layer3 = new FunctionStack("Test12 Layer 3", new Linear(true, 256, 256, name: "l3 Linear"), new BatchNormalization(true, 256, name: "l3 Norm"), new ReLU(name: "l3 ReLU") ); FunctionStack Layer4 = new FunctionStack("Test12 Layer 4", new Linear(true, 256, 10, name: "l4 Linear") ); // Function stack itself is also stacked as Function FunctionStack nn = new FunctionStack ("Test12", Layer1, Layer2, Layer3, Layer4 ); FunctionStack cDNI1 = new FunctionStack("Test12 DNI 1", new Linear(true, 256 + 10, 1024, name: "cDNI1 Linear1"), new BatchNormalization(true, 1024, name: "cDNI1 Norm1"), new ReLU(name: "cDNI1 ReLU1"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack cDNI2 = new FunctionStack("Test12 DNI 2", new Linear(true, 256 + 10, 1024, name: "cDNI2 Linear1"), new BatchNormalization(true, 1024, name: "cDNI2 Norm1"), new ReLU(name: "cDNI2 ReLU1"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "cDNI2 Linear3") ); FunctionStack cDNI3 = new FunctionStack("Test12 DNI 3", new Linear(true, 256 + 10, 1024, name: "cDNI3 Linear1"), new BatchNormalization(true, 1024, name: "cDNI3 Norm1"), new ReLU(name: "cDNI3 ReLU1"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "cDNI3 Linear3") ); Layer1.SetOptimizer(new Adam("Adam", 0.00003f)); Layer2.SetOptimizer(new Adam("Adam", 0.00003f)); Layer3.SetOptimizer(new Adam("Adam", 0.00003f)); Layer4.SetOptimizer(new Adam("Adam", 0.00003f)); cDNI1.SetOptimizer(new Adam("Adam", 0.00003f)); cDNI2.SetOptimizer(new Adam("Adam", 0.00003f)); cDNI3.SetOptimizer(new Adam("Adam", 0.00003f)); // Describe each function stack; RILogManager.Default?.SendDebug(Layer1.Describe()); RILogManager.Default?.SendDebug(Layer2.Describe()); RILogManager.Default?.SendDebug(Layer3.Describe()); RILogManager.Default?.SendDebug(Layer4.Describe()); RILogManager.Default?.SendDebug(cDNI1.Describe()); RILogManager.Default?.SendDebug(cDNI2.Describe()); RILogManager.Default?.SendDebug(cDNI3.Describe()); for (int epoch = 0; epoch < 10; epoch++) { // Total error in the whole Real totalLoss = 0; Real cDNI1totalLoss = 0; Real cDNI2totalLoss = 0; Real cDNI3totalLoss = 0; long totalLossCount = 0; long cDNI1totalLossCount = 0; long cDNI2totalLossCount = 0; long cDNI3totalLossCount = 0; // how many times to run the batch for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { RILogManager.Default?.SendDebug("epoch: " + (epoch + 1) + " of 10, batch iteration: " + i + " of " + TRAIN_DATA_COUNT); RILogManager.Default?.ViewerSendWatch("Epoch", epoch + 1); RILogManager.Default?.ViewerSendWatch("Batch Iteration", i); // Get data randomly from the training data TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT, 28, 28); // Run first tier NdArray[] layer1ForwardResult = Layer1.Forward(true, datasetX.Data); ResultDataSet layer1ResultDataSet = new ResultDataSet(layer1ForwardResult, datasetX.Label); // Obtain the slope of the first layer NdArray[] cDNI1Result = cDNI1.Forward(true, layer1ResultDataSet.GetTrainData()); // Apply the slope of the first layer layer1ForwardResult[0].Grad = cDNI1Result[0].Data.ToArray(); //Update first layer Layer1.Backward(true, layer1ForwardResult); layer1ForwardResult[0].ParentFunc = null; Layer1.Update(); // Run Layer 2 NdArray[] layer2ForwardResult = Layer2.Forward(true, layer1ResultDataSet.Result); ResultDataSet layer2ResultDataSet = new ResultDataSet(layer2ForwardResult, layer1ResultDataSet.Label); // Get the inclination of the second layer NdArray[] cDNI2Result = cDNI2.Forward(true, layer2ResultDataSet.GetTrainData()); // Apply the slope of the second layer layer2ForwardResult[0].Grad = cDNI2Result[0].Data.ToArray(); //Update layer 2 Layer2.Backward(true, layer2ForwardResult); layer2ForwardResult[0].ParentFunc = null; //Perform learning of first layer cDNI Real cDNI1loss = new MeanSquaredError().Evaluate(cDNI1Result, new NdArray(layer1ResultDataSet.Result[0].Grad, cDNI1Result[0].Shape, cDNI1Result[0].BatchCount)); Layer2.Update(); cDNI1.Backward(true, cDNI1Result); cDNI1.Update(); cDNI1totalLoss += cDNI1loss; cDNI1totalLossCount++; //Run Third Tier NdArray[] layer3ForwardResult = Layer3.Forward(true, layer2ResultDataSet.Result); ResultDataSet layer3ResultDataSet = new ResultDataSet(layer3ForwardResult, layer2ResultDataSet.Label); //Get the inclination of the third layer NdArray[] cDNI3Result = cDNI3.Forward(true, layer3ResultDataSet.GetTrainData()); //Apply the inclination of the third layer layer3ForwardResult[0].Grad = cDNI3Result[0].Data.ToArray(); //Update third layer Layer3.Backward(true, layer3ForwardResult); layer3ForwardResult[0].ParentFunc = null; //Perform learning of cDNI for layer 2 Real cDNI2loss = new MeanSquaredError().Evaluate(cDNI2Result, new NdArray(layer2ResultDataSet.Result[0].Grad, cDNI2Result[0].Shape, cDNI2Result[0].BatchCount)); Layer3.Update(); cDNI2.Backward(true, cDNI2Result); cDNI2.Update(); cDNI2totalLoss += cDNI2loss; cDNI2totalLossCount++; NdArray[] layer4ForwardResult = Layer4.Forward(true, layer3ResultDataSet.Result); Real sumLoss = new SoftmaxCrossEntropy().Evaluate(layer4ForwardResult, layer3ResultDataSet.Label); Layer4.Backward(true, layer4ForwardResult); layer4ForwardResult[0].ParentFunc = null; totalLoss += sumLoss; totalLossCount++; Real cDNI3loss = new MeanSquaredError().Evaluate(cDNI3Result, new NdArray(layer3ResultDataSet.Result[0].Grad, cDNI3Result[0].Shape, cDNI3Result[0].BatchCount)); Layer4.Update(); cDNI3.Backward(true, cDNI3Result); cDNI3.Update(); cDNI3totalLoss += cDNI3loss; cDNI3totalLossCount++; RILogManager.Default?.SendDebug("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); RILogManager.Default?.SendDebug("total loss " + totalLoss / totalLossCount); RILogManager.Default?.SendDebug("local loss " + sumLoss); RILogManager.Default?.SendDebug("\ncDNI1 total loss " + cDNI1totalLoss / cDNI1totalLossCount); RILogManager.Default?.SendDebug("cDNI2 total loss " + cDNI2totalLoss / cDNI2totalLossCount); RILogManager.Default?.SendDebug("cDNI3 total loss " + cDNI3totalLoss / cDNI3totalLossCount); RILogManager.Default?.SendDebug("\ncDNI1 local loss " + cDNI1loss); RILogManager.Default?.SendDebug("cDNI2 local loss " + cDNI2loss); RILogManager.Default?.SendDebug("cDNI3 local loss " + cDNI3loss); if (i % 20 == 0) { RILogManager.Default?.SendDebug("\nTesting..."); TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT, 28); Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); RILogManager.Default?.SendDebug("accuracy " + accuracy); } } } }
public static void Run() { // Describe each initial value Real[,,,] initial_W1 = { { { { 1.0, 0.5, 0.0 }, { 0.5, 0.0, -0.5 }, { 0.0, -0.5, -1.0 } } }, { { { 0.0, -0.1, 0.1 }, { -0.3, 0.4, 0.7 }, { 0.5, -0.2, 0.2 } } } }; Real[] initial_b1 = { 0.5, 1.0 }; Real[,,,] initial_W2 = { { { { -0.1, 0.6 }, { 0.3, -0.9 } }, { { 0.7, 0.9 }, { -0.2, -0.3 } } }, { { { -0.6, -0.1 }, { 0.3, 0.3 } }, { { -0.5, 0.8 }, { 0.9, 0.1 } } } }; Real[] initial_b2 = { 0.1, 0.9 }; Real[,] initial_W3 = { { 0.5, 0.3, 0.4, 0.2, 0.6, 0.1, 0.4, 0.3 }, { 0.6, 0.4, 0.9, 0.1, 0.5, 0.2, 0.3, 0.4 } }; Real[] initial_b3 = { 0.01, 0.02 }; Real[,] initial_W4 = { { 0.8, 0.2 }, { 0.4, 0.6 } }; Real[] initial_b4 = { 0.02, 0.01 }; //Input data NdArray x = new NdArray(new Real[, , ] { { { 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.9, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.2, 0.8, 0.9, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.1, 0.8, 0.5, 0.8, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.3, 0.3, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.1, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.8, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.4, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.2, 0.8, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.1, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 } } }); //teacher signal Real[] t = { 0.0, 1.0 }; // If you want to check the contents of a layer, have an instance as a single layer Convolution2D l2 = new Convolution2D(true, 1, 2, 3, initialW: initial_W1, initialb: initial_b1, name: "l2 Conv2D"); // Write the network configuration in FunctionStack FunctionStack nn = new FunctionStack("Test5", l2, //new Convolution2D(1, 2, 3, initialW: initial_W1, initialb: initial_b1), new ReLU(name: "l2 ReLU"), //new AveragePooling(2, 2, name: "l2 AVGPooling"), new MaxPooling(2, 2, name: "l2 MaxPooling"), new Convolution2D(true, 2, 2, 2, initialW: initial_W2, initialb: initial_b2, name: "l3 Conv2D"), new ReLU(name: "l3 ReLU"), //new AveragePooling(2, 2, name: "l3 AVGPooling"), new MaxPooling(2, 2, name: "l3 MaxPooling"), new Linear(true, 8, 2, initialW: initial_W3, initialb: initial_b3, name: "l4 Linear"), new ReLU(name: "l4 ReLU"), new Linear(true, 2, 2, initialW: initial_W4, initialb: initial_b4, name: "l5 Linear") ); // If you omit the optimizer declaration, the default SGD(0.1) will be used // nn.SetOptimizer(new SGD()); // Training conducted Trainer.Train(nn, x, t, new MeanSquaredError(), false); // If Update is executed, grad is consumed, so output the value first RILogManager.Default?.SendDebug("gw1"); RILogManager.Default?.SendDebug(l2.Weight.ToString("Grad")); RILogManager.Default?.SendDebug("gb1"); RILogManager.Default?.SendDebug(l2.Bias.ToString("Grad")); //update nn.Update(); RILogManager.Default?.SendDebug("w1"); RILogManager.Default?.SendDebug(l2.Weight.ToString()); RILogManager.Default?.SendDebug("b1"); RILogManager.Default?.SendDebug(l2.Bias.ToString()); }
public static void Run() { _outputStream = File.Create(LogPath); _logWriter = new HistogramLogWriter(_outputStream); _logWriter.Write(DateTime.Now); var recorder = HistogramFactory .With64BitBucketSize() ?.WithValuesFrom(1) ?.WithValuesUpTo(2345678912345) ?.WithPrecisionOf(3) ?.WithThreadSafeWrites() ?.WithThreadSafeReads() ?.Create(); var accumulatingHistogram = new LongHistogram(2345678912345, 3); var size = accumulatingHistogram.GetEstimatedFootprintInBytes(); RILogManager.Default?.SendDebug("Histogram size = {0} bytes ({1:F2} MB)", size, size / 1024.0 / 1024.0); RILogManager.Default?.SendDebug("Recorded latencies [in system clock ticks]"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.None, useCsvFormat: true); Console.WriteLine(); RILogManager.Default?.SendDebug("Recorded latencies [in usec]"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToMicroseconds, useCsvFormat: true); Console.WriteLine(); RILogManager.Default?.SendDebug("Recorded latencies [in msec]"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToMilliseconds, useCsvFormat: true); Console.WriteLine(); RILogManager.Default?.SendDebug("Recorded latencies [in sec]"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToSeconds, useCsvFormat: true); DocumentResults(accumulatingHistogram, recorder); RILogManager.Default?.SendDebug("Build Vocabulary."); DocumentResults(accumulatingHistogram, recorder); Vocabulary vocabulary = new Vocabulary(); DocumentResults(accumulatingHistogram, recorder); string trainPath = InternetFileDownloader.Download(DOWNLOAD_URL + TRAIN_FILE, TRAIN_FILE); DocumentResults(accumulatingHistogram, recorder); string validPath = InternetFileDownloader.Download(DOWNLOAD_URL + VALID_FILE, VALID_FILE); DocumentResults(accumulatingHistogram, recorder); string testPath = InternetFileDownloader.Download(DOWNLOAD_URL + TEST_FILE, TEST_FILE); DocumentResults(accumulatingHistogram, recorder); int[] trainData = vocabulary.LoadData(trainPath); DocumentResults(accumulatingHistogram, recorder); int[] validData = vocabulary.LoadData(validPath); DocumentResults(accumulatingHistogram, recorder); int[] testData = vocabulary.LoadData(testPath); DocumentResults(accumulatingHistogram, recorder); int nVocab = vocabulary.Length; RILogManager.Default?.SendDebug("Network Initializing."); FunctionStack model = new FunctionStack("Test10", new EmbedID(nVocab, N_UNITS, name: "l1 EmbedID"), new Dropout(), new LSTM(true, N_UNITS, N_UNITS, name: "l2 LSTM"), new Dropout(), new LSTM(true, N_UNITS, N_UNITS, name: "l3 LSTM"), new Dropout(), new Linear(true, N_UNITS, nVocab, name: "l4 Linear") ); DocumentResults(accumulatingHistogram, recorder); // Do not cease at the given threshold, correct the rate by taking the rate from L2Norm of all parameters GradientClipping gradientClipping = new GradientClipping(threshold: GRAD_CLIP); SGD sgd = new SGD(learningRate: 1); model.SetOptimizer(gradientClipping, sgd); DocumentResults(accumulatingHistogram, recorder); Real wholeLen = trainData.Length; int jump = (int)Math.Floor(wholeLen / BATCH_SIZE); int epoch = 0; Stack <NdArray[]> backNdArrays = new Stack <NdArray[]>(); RILogManager.Default?.SendDebug("Train Start."); double dVal; NdArray x = new NdArray(new[] { 1 }, BATCH_SIZE, (Function)null); NdArray t = new NdArray(new[] { 1 }, BATCH_SIZE, (Function)null); for (int i = 0; i < jump * N_EPOCH; i++) { for (int j = 0; j < BATCH_SIZE; j++) { x.Data[j] = trainData[(int)((jump * j + i) % wholeLen)]; t.Data[j] = trainData[(int)((jump * j + i + 1) % wholeLen)]; } NdArray[] result = model.Forward(true, x); Real sumLoss = new SoftmaxCrossEntropy().Evaluate(result, t); backNdArrays.Push(result); RILogManager.Default?.SendDebug("[{0}/{1}] Loss: {2}", i + 1, jump, sumLoss); //Run truncated BPTT if ((i + 1) % BPROP_LEN == 0) { for (int j = 0; backNdArrays.Count > 0; j++) { RILogManager.Default?.SendDebug("backward" + backNdArrays.Count); model.Backward(true, backNdArrays.Pop()); } model.Update(); model.ResetState(); } if ((i + 1) % jump == 0) { epoch++; RILogManager.Default?.SendDebug("evaluate"); dVal = Evaluate(model, validData); RILogManager.Default?.SendDebug($"validation perplexity: {dVal}"); if (epoch >= 6) { sgd.LearningRate /= 1.2; RILogManager.Default?.SendDebug("learning rate =" + sgd.LearningRate); } } DocumentResults(accumulatingHistogram, recorder); } RILogManager.Default?.SendDebug("test start"); dVal = Evaluate(model, testData); RILogManager.Default?.SendDebug("test perplexity:" + dVal); DocumentResults(accumulatingHistogram, recorder); _logWriter.Dispose(); _outputStream.Dispose(); RILogManager.Default?.SendDebug("Log contents"); RILogManager.Default?.SendDebug(File.ReadAllText(LogPath)); Console.WriteLine(); RILogManager.Default?.SendDebug("Percentile distribution (values reported in milliseconds)"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToMilliseconds, useCsvFormat: true); RILogManager.Default?.SendDebug("Mean: " + BytesToString(accumulatingHistogram.GetMean()) + ", StdDev: " + BytesToString(accumulatingHistogram.GetStdDeviation())); }
public static void Run() { Console.WriteLine("Build Vocabulary."); Vocabulary vocabulary = new Vocabulary(); string trainPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TRAIN_FILE, TRAIN_FILE); string validPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + VALID_FILE, VALID_FILE); string testPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TEST_FILE, TEST_FILE); int[] trainData = vocabulary.LoadData(trainPath); int[] validData = vocabulary.LoadData(validPath); int[] testData = vocabulary.LoadData(testPath); int nVocab = vocabulary.Length; Console.WriteLine("Network Initilizing."); FunctionStack model = new FunctionStack( new EmbedID(nVocab, N_UNITS, name: "l1 EmbedID"), new Dropout(), new LSTM(N_UNITS, N_UNITS, name: "l2 LSTM"), new Dropout(), new LSTM(N_UNITS, N_UNITS, name: "l3 LSTM"), new Dropout(), new Linear(N_UNITS, nVocab, name: "l4 Linear") ); //与えられたthresholdで頭打ちではなく、全パラメータのL2Normからレートを取り補正を行う GradientClipping gradientClipping = new GradientClipping(threshold: GRAD_CLIP); SGD sgd = new SGD(learningRate: 1); model.SetOptimizer(gradientClipping, sgd); Real wholeLen = trainData.Length; int jump = (int)Math.Floor(wholeLen / BATCH_SIZE); int epoch = 0; Stack <NdArray[]> backNdArrays = new Stack <NdArray[]>(); Console.WriteLine("Train Start."); for (int i = 0; i < jump * N_EPOCH; i++) { NdArray x = new NdArray(new[] { 1 }, BATCH_SIZE); NdArray t = new NdArray(new[] { 1 }, BATCH_SIZE); for (int j = 0; j < BATCH_SIZE; j++) { x.Data[j] = trainData[(int)((jump * j + i) % wholeLen)]; t.Data[j] = trainData[(int)((jump * j + i + 1) % wholeLen)]; } NdArray[] result = model.Forward(x); Real sumLoss = new SoftmaxCrossEntropy().Evaluate(result, t); backNdArrays.Push(result); Console.WriteLine("[{0}/{1}] Loss: {2}", i + 1, jump, sumLoss); //Run truncated BPTT if ((i + 1) % BPROP_LEN == 0) { for (int j = 0; backNdArrays.Count > 0; j++) { Console.WriteLine("backward" + backNdArrays.Count); model.Backward(backNdArrays.Pop()); } model.Update(); model.ResetState(); } if ((i + 1) % jump == 0) { epoch++; Console.WriteLine("evaluate"); Console.WriteLine("validation perplexity: {0}", Evaluate(model, validData)); if (epoch >= 6) { sgd.LearningRate /= 1.2; Console.WriteLine("learning rate =" + sgd.LearningRate); } } } Console.WriteLine("test start"); Console.WriteLine("test perplexity:" + Evaluate(model, testData)); }
public static void Run() { //Write the configuration of the network you want to read into FunctionStack and adjust the parameters of each function //Make sure to match name to the variable name of Chainer FunctionStack nn = new FunctionStack( new Convolution2D(1, 2, 3, name: "conv1", gpuEnable: true),//Do not forget the GPU flag if necessary new ReLU(), new MaxPooling(2, 2), new Convolution2D(2, 2, 2, name: "conv2", gpuEnable: true), new ReLU(), new MaxPooling(2, 2), new Linear(8, 2, name: "fl3"), new ReLU(), new Linear(2, 2, name: "fl4") ); /* Declaration in Chainer * class NN (chainer.Chain): * def __init __ (self): * super (NN, self).__ init __ ( * conv 1 = L. Convolution 2 D (1, 2, 3), * conv 2 = L. Convolution 2 D (2, 2, 2), * fl3 = L. Linear (8, 2), * fl4 = L. Linear (2, 2) * ) * * def __call __ (self, x): * h_conv 1 = F.relu (self.conv 1 (x)) * h_pool 1 = F.max_pooling - 2 d (h_conv 1, 2) * h_conv 2 = F.relu (self.conv 2 (h_pool 1)) * h_pool 2 = F.max_pooling - 2 d (h_conv 2, 2) * h_fc1 = F.relu (self.fl3 (h_pool2)) * y = self.fl 4 (h_fc 1) * return y * */ //Read parameters ChainerModelDataLoader.ModelLoad(MODEL_FILE_PATH, nn); //Use it as usual nn.SetOptimizer(new SGD()); //Input data NdArray x = new NdArray(new Real[, , ] { { { 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.9, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.2, 0.8, 0.9, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.1, 0.8, 0.5, 0.8, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.3, 0.3, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.1, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.8, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.4, 0.1, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.2, 0.8, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.1, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.1, 0.7, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 } } }); //Teacher signal Real[] t = { 0.0, 1.0 }; //Training conducted Trainer.Train(nn, x, t, new MeanSquaredError(), false); //Evacuate for results display Convolution2D l2 = (Convolution2D)nn.Functions[0]; //When updating is executed grad will be consumed, so output the value first Console.WriteLine("gw1"); Console.WriteLine(l2.Weight.ToString("Grad")); Console.WriteLine("gb1"); Console.WriteLine(l2.Bias.ToString("Grad")); //update nn.Update(); Console.WriteLine("w1"); Console.WriteLine(l2.Weight); Console.WriteLine("b1"); Console.WriteLine(l2.Bias); }
public static void Run() { Console.WriteLine("Build Vocabulary."); Vocabulary vocabulary = new Vocabulary(); string trainPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TRAIN_FILE, TRAIN_FILE, TRAIN_FILE_HASH); string testPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TEST_FILE, TEST_FILE, TEST_FILE_HASH); int[] trainData = vocabulary.LoadData(trainPath); int[] testData = vocabulary.LoadData(testPath); int nVocab = vocabulary.Length; Console.WriteLine("Done."); Console.WriteLine("Network Initilizing."); FunctionStack model = new FunctionStack( new EmbedID(nVocab, N_UNITS, name: "l1 EmbedID"), new Linear(N_UNITS, N_UNITS, name: "l2 Linear"), new TanhActivation("l2 Tanh"), new Linear(N_UNITS, nVocab, name: "l3 Linear"), new Softmax("l3 Sonftmax") ); model.SetOptimizer(new Adam()); List <int> s = new List <int>(); Console.WriteLine("Train Start."); SoftmaxCrossEntropy softmaxCrossEntropy = new SoftmaxCrossEntropy(); for (int epoch = 0; epoch < TRAINING_EPOCHS; epoch++) { for (int pos = 0; pos < trainData.Length; pos++) { NdArray h = new NdArray(new Real[N_UNITS]); int id = trainData[pos]; s.Add(id); if (id == vocabulary.EosID) { Real accumloss = 0; Stack <NdArray> tmp = new Stack <NdArray>(); for (int i = 0; i < s.Count; i++) { int tx = i == s.Count - 1 ? vocabulary.EosID : s[i + 1]; //l1 EmbedID NdArray l1 = model.Functions[0].Forward(s[i])[0]; //l2 Linear NdArray l2 = model.Functions[1].Forward(h)[0]; //Add NdArray xK = l1 + l2; //l2 Tanh h = model.Functions[2].Forward(xK)[0]; //l3 Linear NdArray h2 = model.Functions[3].Forward(h)[0]; Real loss = softmaxCrossEntropy.Evaluate(h2, tx); tmp.Push(h2); accumloss += loss; } Console.WriteLine(accumloss); for (int i = 0; i < s.Count; i++) { model.Backward(tmp.Pop()); } model.Update(); s.Clear(); } if (pos % 100 == 0) { Console.WriteLine(pos + "/" + trainData.Length + " finished"); } } } Console.WriteLine("Test Start."); Real sum = 0; int wnum = 0; List <int> ts = new List <int>(); bool unkWord = false; for (int pos = 0; pos < 1000; pos++) { int id = testData[pos]; ts.Add(id); if (id > trainData.Length) { unkWord = true; } if (id == vocabulary.EosID) { if (!unkWord) { Console.WriteLine("pos" + pos); Console.WriteLine("tsLen" + ts.Count); Console.WriteLine("sum" + sum); Console.WriteLine("wnum" + wnum); sum += CalPs(model, ts); wnum += ts.Count - 1; } else { unkWord = false; } ts.Clear(); } } Console.WriteLine(Math.Pow(2.0, sum / wnum)); }
public static void Run() { // Prepare MNIST data RILogManager.Default?.SendDebug("MNIST Data Loading..."); MnistData mnistData = new MnistData(28); RILogManager.Default?.SendDebug("Training Start..."); // Write the network configuration in FunctionStack FunctionStack Layer1 = new FunctionStack("Test11 Layer 1", new Linear(true, 28 * 28, 256, name: "l1 Linear"), new BatchNormalization(true, 256, name: "l1 Norm"), new ReLU(name: "l1 ReLU") ); FunctionStack Layer2 = new FunctionStack("Test11 Layer 2", new Linear(true, 256, 256, name: "l2 Linear"), new BatchNormalization(true, 256, name: "l2 Norm"), new ReLU(name: "l2 ReLU") ); FunctionStack Layer3 = new FunctionStack("Test11 Layer 3", new Linear(true, 256, 256, name: "l3 Linear"), new BatchNormalization(true, 256, name: "l3 Norm"), new ReLU(name: "l3 ReLU") ); FunctionStack Layer4 = new FunctionStack("Test11 Layer 4", new Linear(true, 256, 10, name: "l4 Linear") ); // Function stack itself is also stacked as Function FunctionStack nn = new FunctionStack ("Test11", Layer1, Layer2, Layer3, Layer4 ); FunctionStack DNI1 = new FunctionStack("Test11 DNI1", new Linear(true, 256, 1024, name: "DNI1 Linear1"), new BatchNormalization(true, 1024, name: "DNI1 Norm1"), new ReLU(name: "DNI1 ReLU1"), new Linear(true, 1024, 1024, name: "DNI1 Linear2"), new BatchNormalization(true, 1024, name: "DNI1 Norm2"), new ReLU(name: "DNI1 ReLU2"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack DNI2 = new FunctionStack("Test11 DNI2", new Linear(true, 256, 1024, name: "DNI2 Linear1"), new BatchNormalization(true, 1024, name: "DNI2 Norm1"), new ReLU(name: "DNI2 ReLU1"), new Linear(true, 1024, 1024, name: "DNI2 Linear2"), new BatchNormalization(true, 1024, name: "DNI2 Norm2"), new ReLU(name: "DNI2 ReLU2"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "DNI2 Linear3") ); FunctionStack DNI3 = new FunctionStack("Test11 DNI3", new Linear(true, 256, 1024, name: "DNI3 Linear1"), new BatchNormalization(true, 1024, name: "DNI3 Norm1"), new ReLU(name: "DNI3 ReLU1"), new Linear(true, 1024, 1024, name: "DNI3 Linear2"), new BatchNormalization(true, 1024, name: "DNI3 Norm2"), new ReLU(name: "DNI3 ReLU2"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "DNI3 Linear3") ); //optimizer Layer1.SetOptimizer(new Adam()); Layer2.SetOptimizer(new Adam()); Layer3.SetOptimizer(new Adam()); Layer4.SetOptimizer(new Adam()); DNI1.SetOptimizer(new Adam()); DNI2.SetOptimizer(new Adam()); DNI3.SetOptimizer(new Adam()); // Three generations learning for (int epoch = 0; epoch < 20; epoch++) { RILogManager.Default?.SendDebug("epoch " + (epoch + 1)); Real totalLoss = 0; Real DNI1totalLoss = 0; Real DNI2totalLoss = 0; Real DNI3totalLoss = 0; long totalLossCount = 0; long DNI1totalLossCount = 0; long DNI2totalLossCount = 0; long DNI3totalLossCount = 0; // how many times to run the batch for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { // Get data randomly from the training data TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT, 28, 28); // Run first tier NdArray[] layer1ForwardResult = Layer1.Forward(true, datasetX.Data); // Obtain the slope of the first layer NdArray[] DNI1Result = DNI1.Forward(true, layer1ForwardResult); // Apply the slope of the first layer layer1ForwardResult[0].Grad = DNI1Result[0].Data.ToArray(); // Update first layer Layer1.Backward(true, layer1ForwardResult); layer1ForwardResult[0].ParentFunc = null; // Backward was executed and cut off calculation graph Layer1.Update(); // Run Layer 2 NdArray[] layer2ForwardResult = Layer2.Forward(true, layer1ForwardResult); // Get the inclination of the second layer NdArray[] DNI2Result = DNI2.Forward(true, layer2ForwardResult); // Apply the slope of the second layer layer2ForwardResult[0].Grad = DNI2Result[0].Data.ToArray(); // Update layer 2 Layer2.Backward(true, layer2ForwardResult); layer2ForwardResult[0].ParentFunc = null; // Learn DNI for first tier Real DNI1loss = new MeanSquaredError().Evaluate(DNI1Result, new NdArray(layer1ForwardResult[0].Grad, DNI1Result[0].Shape, DNI1Result[0].BatchCount)); Layer2.Update(); DNI1.Backward(true, DNI1Result); DNI1.Update(); DNI1totalLoss += DNI1loss; DNI1totalLossCount++; // run layer 3 NdArray[] layer3ForwardResult = Layer3.Forward(true, layer2ForwardResult); // Get the inclination of the third layer NdArray[] DNI3Result = DNI3.Forward(true, layer3ForwardResult); // Apply the slope of the third layer layer3ForwardResult[0].Grad = DNI3Result[0].Data.ToArray(); // Update layer 3 Layer3.Backward(true, layer3ForwardResult); layer3ForwardResult[0].ParentFunc = null; // Run DNI learning for layer 2 Real DNI2loss = new MeanSquaredError().Evaluate(DNI2Result, new NdArray(layer2ForwardResult[0].Grad, DNI2Result[0].Shape, DNI2Result[0].BatchCount)); Layer3.Update(); DNI2.Backward(true, DNI2Result); DNI2.Update(); DNI2totalLoss += DNI2loss; DNI2totalLossCount++; // run layer 4 NdArray[] layer4ForwardResult = Layer4.Forward(true, layer3ForwardResult); // Obtain the slope of the fourth layer Real sumLoss = new SoftmaxCrossEntropy().Evaluate(layer4ForwardResult, datasetX.Label); // Update fourth layer Layer4.Backward(true, layer4ForwardResult); layer4ForwardResult[0].ParentFunc = null; totalLoss += sumLoss; totalLossCount++; // Run DNI learning for layer 3 Real DNI3loss = new MeanSquaredError().Evaluate(DNI3Result, new NdArray(layer3ForwardResult[0].Grad, DNI3Result[0].Shape, DNI3Result[0].BatchCount)); Layer4.Update(); DNI3.Backward(true, DNI3Result); DNI3.Update(); DNI3totalLoss += DNI3loss; DNI3totalLossCount++; RILogManager.Default?.SendDebug("batch count " + i + "/" + TRAIN_DATA_COUNT); RILogManager.Default?.SendDebug("total loss " + totalLoss / totalLossCount); RILogManager.Default?.SendDebug("local loss " + sumLoss); RILogManager.Default?.SendDebug("DNI1 total loss " + DNI1totalLoss / DNI1totalLossCount); RILogManager.Default?.SendDebug("DNI2 total loss " + DNI2totalLoss / DNI2totalLossCount); RILogManager.Default?.SendDebug("DNI3 total loss " + DNI3totalLoss / DNI3totalLossCount); RILogManager.Default?.SendDebug("DNI1 local loss " + DNI1loss); RILogManager.Default?.SendDebug("DNI2 local loss " + DNI2loss); RILogManager.Default?.SendDebug("DNI3 local loss " + DNI3loss); // Test the accuracy if you move the batch 20 times if (i % 20 == 0) { RILogManager.Default?.SendDebug("Testing..."); // Get data randomly from test data TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT, 28); // Run test Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); RILogManager.Default?.SendDebug("accuracy " + accuracy); } } } }