public static void Run() { //訓練回数 const int learningCount = 10000; //訓練データ Real[][] trainData = { new Real[] { 0, 0 }, new Real[] { 1, 0 }, new Real[] { 0, 1 }, new Real[] { 1, 1 } }; //訓練データラベル Real[][] trainLabel = { new Real[] { 0 }, new Real[] { 1 }, new Real[] { 1 }, new Real[] { 0 } }; //ネットワークの構成を FunctionStack に書き連ねる FunctionStack <Real> nn = new FunctionStack <Real>( new Linear <Real>(2, 2, name: "l1 Linear"), new Sigmoid <Real>(name: "l1 ReLU"), new Linear <Real>(2, 1, name: "l2 Linear") ); //optimizerを宣言(今回はAdam) Adam <Real> adam = new Adam <Real>(); adam.SetUp(nn); //訓練ループ Console.WriteLine("Training..."); for (int i = 0; i < learningCount; i++) { //今回はロス関数にMeanSquaredErrorを使う Trainer.Train(nn, trainData[0], trainLabel[0], new MeanSquaredError <Real>()); Trainer.Train(nn, trainData[1], trainLabel[1], new MeanSquaredError <Real>()); Trainer.Train(nn, trainData[2], trainLabel[2], new MeanSquaredError <Real>()); Trainer.Train(nn, trainData[3], trainLabel[3], new MeanSquaredError <Real>()); //訓練後に毎回更新を実行しなければ、ミニバッチとして更新できる adam.Update(); } //訓練結果を表示 Console.WriteLine("Test Start..."); foreach (Real[] val in trainData) { NdArray <Real> result = nn.Predict(val)[0]; Console.WriteLine(val[0] + " xor " + val[1] + " = " + (result.Data[0] > 0.5 ? 1 : 0) + " " + result); } }
public static void Run() { DataMaker dataMaker = new DataMaker(STEPS_PER_CYCLE, NUMBER_OF_CYCLES); NdArray <Real> trainData = dataMaker.Make(); //ネットワークの構成は FunctionStack に書き連ねる FunctionStack <Real> model = new FunctionStack <Real>( new Linear <Real>(1, 5, name: "Linear l1"), new LSTM <Real>(5, 5, name: "LSTM l2"), new Linear <Real>(5, 1, name: "Linear l3") ); //optimizerを宣言 Adam <Real> adam = new Adam <Real>(); adam.SetUp(model); //訓練ループ Console.WriteLine("Training..."); for (int epoch = 0; epoch < TRAINING_EPOCHS; epoch++) { NdArray <Real>[] sequences = dataMaker.MakeMiniBatch(trainData, MINI_BATCH_SIZE, LENGTH_OF_SEQUENCE); Real loss = ComputeLoss(model, sequences); adam.Update(); model.ResetState(); if (epoch != 0 && epoch % DISPLAY_EPOCH == 0) { Console.WriteLine("[{0}]training loss:\t{1}", epoch, loss); } } Console.WriteLine("Testing..."); NdArray <Real>[] testSequences = dataMaker.MakeMiniBatch(trainData, MINI_BATCH_SIZE, LENGTH_OF_SEQUENCE); int sample_index = 45; predict(testSequences[sample_index], model, PREDICTION_LENGTH); }
public static void Run() { //MNISTのデータを用意する Console.WriteLine("MNIST Data Loading..."); MnistData <Real> mnistData = new MnistData <Real>(); Console.WriteLine("Training Start..."); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack <Real> Layer1 = new FunctionStack <Real>( new Linear <Real>(28 * 28, 256, name: "l1 Linear"), new BatchNormalization <Real>(256, name: "l1 Norm"), new ReLU <Real>(name: "l1 ReLU") ); FunctionStack <Real> Layer2 = new FunctionStack <Real>( new Linear <Real>(256, 256, name: "l2 Linear"), new BatchNormalization <Real>(256, name: "l2 Norm"), new ReLU <Real>(name: "l2 ReLU") ); FunctionStack <Real> Layer3 = new FunctionStack <Real>( new Linear <Real>(256, 256, name: "l3 Linear"), new BatchNormalization <Real>(256, name: "l3 Norm"), new ReLU <Real>(name: "l3 ReLU") ); FunctionStack <Real> Layer4 = new FunctionStack <Real>( new Linear <Real>(256, 10, name: "l4 Linear") ); //FunctionStack自身もFunctionとして積み上げられる FunctionStack <Real> nn = new FunctionStack <Real> ( Layer1, Layer2, Layer3, Layer4 ); FunctionStack <Real> DNI1 = new FunctionStack <Real>( new Linear <Real>(256, 1024, name: "DNI1 Linear1"), new BatchNormalization <Real>(1024, name: "DNI1 Nrom1"), new ReLU <Real>(name: "DNI1 ReLU1"), new Linear <Real>(1024, 1024, name: "DNI1 Linear2"), new BatchNormalization <Real>(1024, name: "DNI1 Nrom2"), new ReLU <Real>(name: "DNI1 ReLU2"), new Linear <Real>(1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack <Real> DNI2 = new FunctionStack <Real>( new Linear <Real>(256, 1024, name: "DNI2 Linear1"), new BatchNormalization <Real>(1024, name: "DNI2 Nrom1"), new ReLU <Real>(name: "DNI2 ReLU1"), new Linear <Real>(1024, 1024, name: "DNI2 Linear2"), new BatchNormalization <Real>(1024, name: "DNI2 Nrom2"), new ReLU <Real>(name: "DNI2 ReLU2"), new Linear <Real>(1024, 256, initialW: new Real[1024, 256], name: "DNI2 Linear3") ); FunctionStack <Real> DNI3 = new FunctionStack <Real>( new Linear <Real>(256, 1024, name: "DNI3 Linear1"), new BatchNormalization <Real>(1024, name: "DNI3 Nrom1"), new ReLU <Real>(name: "DNI3 ReLU1"), new Linear <Real>(1024, 1024, name: "DNI3 Linear2"), new BatchNormalization <Real>(1024, name: "DNI3 Nrom2"), new ReLU <Real>(name: "DNI3 ReLU2"), new Linear <Real>(1024, 256, initialW: new Real[1024, 256], name: "DNI3 Linear3") ); //optimizerを宣言 Adam <Real> L1adam = new Adam <Real>(); Adam <Real> L2adam = new Adam <Real>(); Adam <Real> L3adam = new Adam <Real>(); Adam <Real> L4adam = new Adam <Real>(); L1adam.SetUp(Layer1); L2adam.SetUp(Layer2); L3adam.SetUp(Layer3); L4adam.SetUp(Layer4); Adam <Real> DNI1adam = new Adam <Real>(); Adam <Real> DNI2adam = new Adam <Real>(); Adam <Real> DNI3adam = new Adam <Real>(); DNI1adam.SetUp(DNI1); DNI2adam.SetUp(DNI2); DNI3adam.SetUp(DNI3); //三世代学習 for (int epoch = 0; epoch < 20; epoch++) { Console.WriteLine("epoch " + (epoch + 1)); Real totalLoss = 0; Real DNI1totalLoss = 0; Real DNI2totalLoss = 0; Real DNI3totalLoss = 0; long totalLossCount = 0; long DNI1totalLossCount = 0; long DNI2totalLossCount = 0; long DNI3totalLossCount = 0; //何回バッチを実行するか for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //訓練データからランダムにデータを取得 TestDataSet <Real> datasetX = mnistData.Train.GetRandomDataSet(BATCH_DATA_COUNT); //第一層を実行 NdArray <Real> layer1ForwardResult = Layer1.Forward(datasetX.Data)[0]; //第一層の傾きを取得 NdArray <Real> DNI1Result = DNI1.Forward(layer1ForwardResult)[0]; //第一層の傾きを適用 layer1ForwardResult.Grad = DNI1Result.Data.ToArray(); //第一層を更新 Layer1.Backward(layer1ForwardResult); layer1ForwardResult.ParentFunc = null; //Backwardを実行したので計算グラフを切っておく L1adam.Update(); //第二層を実行 NdArray <Real> layer2ForwardResult = Layer2.Forward(layer1ForwardResult)[0]; //第二層の傾きを取得 NdArray <Real> DNI2Result = DNI2.Forward(layer2ForwardResult)[0]; //第二層の傾きを適用 layer2ForwardResult.Grad = DNI2Result.Data.ToArray(); //第二層を更新 Layer2.Backward(layer2ForwardResult); layer2ForwardResult.ParentFunc = null; //第一層用のDNIの学習を実行 Real DNI1loss = new MeanSquaredError <Real>().Evaluate(DNI1Result, new NdArray <Real>(layer1ForwardResult.Grad, DNI1Result.Shape, DNI1Result.BatchCount)); L2adam.Update(); DNI1.Backward(DNI1Result); DNI1adam.Update(); DNI1totalLoss += DNI1loss; DNI1totalLossCount++; //第三層を実行 NdArray <Real> layer3ForwardResult = Layer3.Forward(layer2ForwardResult)[0]; //第三層の傾きを取得 NdArray <Real> DNI3Result = DNI3.Forward(layer3ForwardResult)[0]; //第三層の傾きを適用 layer3ForwardResult.Grad = DNI3Result.Data.ToArray(); //第三層を更新 Layer3.Backward(layer3ForwardResult); layer3ForwardResult.ParentFunc = null; //第二層用のDNIの学習を実行 Real DNI2loss = new MeanSquaredError <Real>().Evaluate(DNI2Result, new NdArray <Real>(layer2ForwardResult.Grad, DNI2Result.Shape, DNI2Result.BatchCount)); L3adam.Update(); DNI2.Backward(DNI2Result); DNI2adam.Update(); DNI2totalLoss += DNI2loss; DNI2totalLossCount++; //第四層を実行 NdArray <Real> layer4ForwardResult = Layer4.Forward(layer3ForwardResult)[0]; //第四層の傾きを取得 Real sumLoss = new SoftmaxCrossEntropy <Real>().Evaluate(layer4ForwardResult, datasetX.Label); //第四層を更新 Layer4.Backward(layer4ForwardResult); layer4ForwardResult.ParentFunc = null; totalLoss += sumLoss; totalLossCount++; //第三層用のDNIの学習を実行 Real DNI3loss = new MeanSquaredError <Real>().Evaluate(DNI3Result, new NdArray <Real>(layer3ForwardResult.Grad, DNI3Result.Shape, DNI3Result.BatchCount)); L4adam.Update(); DNI3.Backward(DNI3Result); DNI3adam.Update(); DNI3totalLoss += DNI3loss; DNI3totalLossCount++; Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //結果出力 Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); Console.WriteLine("\nDNI1 total loss " + DNI1totalLoss / DNI1totalLossCount); Console.WriteLine("DNI2 total loss " + DNI2totalLoss / DNI2totalLossCount); Console.WriteLine("DNI3 total loss " + DNI3totalLoss / DNI3totalLossCount); Console.WriteLine("\nDNI1 local loss " + DNI1loss); Console.WriteLine("DNI2 local loss " + DNI2loss); Console.WriteLine("DNI3 local loss " + DNI3loss); //20回バッチを動かしたら精度をテストする if (i % 20 == 0) { Console.WriteLine("\nTesting..."); //テストデータからランダムにデータを取得 TestDataSet <Real> datasetY = mnistData.Eval.GetRandomDataSet(TEST_DATA_COUNT); //テストを実行 Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); Console.WriteLine("accuracy " + accuracy); } } } }
public void AdamRandomTest() { Python.Initialize(); Chainer.Initialize(); int inputCount = Mother.Dice.Next(2, 50); int outputCount = Mother.Dice.Next(2, 50); int batchCount = Mother.Dice.Next(1, 5); Real[,] input = Initializer.GetRandomValues <Real[, ]>(batchCount, inputCount); Real[,] dummyGy = Initializer.GetRandomValues <Real[, ]>(batchCount, outputCount); Real[,] w = Initializer.GetRandomValues <Real[, ]>(outputCount, inputCount); Real[] b = Initializer.GetRandomValues <Real[]>(outputCount); float alpha = (float)Mother.Dice.NextDouble(); //0.001f float beta1 = (float)Mother.Dice.NextDouble(); //0.9f; float beta2 = (float)Mother.Dice.NextDouble(); //0.999f; float eps = (float)Mother.Dice.NextDouble(); //1e-08f; float eta = (float)Mother.Dice.NextDouble(); //1.0f; //Chainer NChainer.Linear <Real> cLinear = new NChainer.Linear <Real>(inputCount, outputCount, false, w, b); NChainer.Adam <Real> cAdam = new NChainer.Adam <Real>(alpha, beta1, beta2, eps, eta); cAdam.Setup(cLinear); Variable <Real> cX = new Variable <Real>(input); Variable <Real> cY = cLinear.Forward(cX); cY.Grad = dummyGy; cY.Backward(); cAdam.Update(); //KelpNet KelpNet.CL.Linear <Real> linear = new KelpNet.CL.Linear <Real>(inputCount, outputCount, false, w, b); KelpNet.Adam <Real> adam = new Adam <Real>(alpha, beta1, beta2, eps, eta); adam.SetUp(linear); NdArray <Real> x = new NdArray <Real>(input, asBatch: true); NdArray <Real> y = linear.Forward(x)[0]; y.Grad = dummyGy.Flatten(); y.Backward(); adam.Update(); Real[] cW = ((Real[, ])cLinear.W.Data).Flatten(); Real[] cb = (Real[])cLinear.b.Data; //許容範囲を算出 Real delta = 0.00001f; //W.grad Assert.AreEqual(cW.Length, linear.Weight.Data.Length); for (int i = 0; i < linear.Weight.Data.Length; i++) { Assert.AreEqual(cW[i], linear.Weight.Data[i], delta); } //b.grad Assert.AreEqual(cb.Length, linear.Bias.Data.Length); for (int i = 0; i < linear.Bias.Data.Length; i++) { Assert.AreEqual(cb[i], linear.Bias.Data[i], delta); } }
public static void Run() { Console.WriteLine("Build Vocabulary."); Vocabulary vocabulary = new Vocabulary(); string trainPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TRAIN_FILE, TRAIN_FILE, TRAIN_FILE_HASH); string testPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TEST_FILE, TEST_FILE, TEST_FILE_HASH); int[] trainData = vocabulary.LoadData(trainPath); int[] testData = vocabulary.LoadData(testPath); int nVocab = vocabulary.Length; Console.WriteLine("Done."); Console.WriteLine("Network Initilizing."); FunctionStack <Real> model = new FunctionStack <Real>( new EmbedID <Real>(nVocab, N_UNITS, name: "l1 EmbedID"), new Linear <Real>(N_UNITS, N_UNITS, name: "l2 Linear"), new TanhActivation <Real>("l2 Tanh"), new Linear <Real>(N_UNITS, nVocab, name: "l3 Linear"), new Softmax <Real>("l3 Sonftmax") ); Adam <Real> adam = new Adam <Real>(); adam.SetUp(model); List <int> s = new List <int>(); Console.WriteLine("Train Start."); SoftmaxCrossEntropy <Real> softmaxCrossEntropy = new SoftmaxCrossEntropy <Real>(); for (int epoch = 0; epoch < TRAINING_EPOCHS; epoch++) { for (int pos = 0; pos < trainData.Length; pos++) { NdArray <Real> h = new NdArray <Real>(new Real[N_UNITS]); int id = trainData[pos]; s.Add(id); if (id == vocabulary.EosID) { Real accumloss = 0; Stack <NdArray <Real> > tmp = new Stack <NdArray <Real> >(); for (int i = 0; i < s.Count; i++) { NdArray <int> tx = new NdArray <int>(i == s.Count - 1 ? new[] { vocabulary.EosID } : new[] { s[i + 1] }); //l1 EmbedID NdArray <Real> l1 = model.Functions[0].Forward(s[i])[0]; //l2 Linear NdArray <Real> l2 = model.Functions[1].Forward(h)[0]; //Add NdArray <Real> xK = l1 + l2; //l2 Tanh h = model.Functions[2].Forward(xK)[0]; //l3 Linear NdArray <Real> h2 = model.Functions[3].Forward(h)[0]; Real loss = softmaxCrossEntropy.Evaluate(h2, tx); tmp.Push(h2); accumloss += loss; } Console.WriteLine(accumloss); for (int i = 0; i < s.Count; i++) { model.Backward(tmp.Pop()); } adam.Update(); s.Clear(); } if (pos % 100 == 0) { Console.WriteLine(pos + "/" + trainData.Length + " finished"); } } } Console.WriteLine("Test Start."); Real sum = 0; int wnum = 0; List <int> ts = new List <int>(); bool unkWord = false; for (int pos = 0; pos < 1000; pos++) { int id = testData[pos]; ts.Add(id); if (id > trainData.Length) { unkWord = true; } if (id == vocabulary.EosID) { if (!unkWord) { Console.WriteLine("pos" + pos); Console.WriteLine("tsLen" + ts.Count); Console.WriteLine("sum" + sum); Console.WriteLine("wnum" + wnum); sum += CalPs(model, ts); wnum += ts.Count - 1; } else { unkWord = false; } ts.Clear(); } } Console.WriteLine(Math.Pow(2.0f, sum / wnum)); }
public void AdamRandomTest() { Python.Initialize(); Chainer.Initialize(); int inputCount = Mother.Dice.Next(2, 50); int outputCount = Mother.Dice.Next(2, 50); int batchCount = Mother.Dice.Next(1, 5); Real[,] input = (Real[,])Initializer.GetRealNdArray(new[] { batchCount, inputCount }); Real[,] dummyGy = (Real[,])Initializer.GetRealNdArray(new[] { batchCount, outputCount }); Real[,] w = (Real[,])Initializer.GetRealNdArray(new[] { outputCount, inputCount }); Real[] b = Initializer.GetRealArray(outputCount); float alpha = (float)Mother.Dice.NextDouble(); //0.001f float beta1 = (float)Mother.Dice.NextDouble(); //0.9f; float beta2 = (float)Mother.Dice.NextDouble(); //0.999f; float eps = (float)Mother.Dice.NextDouble(); //1e-08f; float eta = (float)Mother.Dice.NextDouble(); //1.0f; //Chainer NChainer.Linear<Real> cLinear = new NChainer.Linear<Real>(inputCount, outputCount, false, Real.ToBaseNdArray(w), Real.ToBaseArray(b)); NChainer.Adam<Real> cAdam = new NChainer.Adam<Real>(alpha, beta1, beta2, eps, eta); cAdam.Setup(cLinear); Variable<Real> cX = new Variable<Real>(Real.ToBaseNdArray(input)); Variable<Real> cY = cLinear.Forward(cX); cY.Grad = Real.ToBaseNdArray(dummyGy); cY.Backward(); cAdam.Update(); //KelpNet KelpNet.CL.Linear linear = new KelpNet.CL.Linear(inputCount, outputCount, false, w, b); KelpNet.Adam adam = new Adam(alpha, beta1, beta2, eps, eta); adam.SetUp(linear); NdArray x = new NdArray(Real.ToRealArray(input), new[] { inputCount }, batchCount); NdArray y = linear.Forward(x)[0]; y.Grad = Real.ToRealArray(dummyGy); y.Backward(); adam.Update(); Real[] cW = Real.ToRealArray((Real[,])cLinear.W.Data); Real[] cb = (Real[])cLinear.b.Data; //許容範囲を算出 double delta = 0.00001; //W.grad Assert.AreEqual(cW.Length, linear.Weight.Data.Length); for (int i = 0; i < linear.Weight.Data.Length; i++) { Assert.AreEqual(cW[i], linear.Weight.Data[i], delta); } //b.grad Assert.AreEqual(cb.Length, linear.Bias.Data.Length); for (int i = 0; i < linear.Bias.Data.Length; i++) { Assert.AreEqual(cb[i], linear.Bias.Data[i], delta); } }