static double Evaluate(FunctionStack model, int[] dataset) { FunctionStack predictModel = (FunctionStack)model.Clone(); predictModel.ResetState(); Real totalLoss = 0; long totalLossCount = 0; for (int i = 0; i < dataset.Length - 1; i++) { NdArray x = new NdArray(new[] { 1 }, BATCH_SIZE); NdArray t = new NdArray(new[] { 1 }, BATCH_SIZE); for (int j = 0; j < BATCH_SIZE; j++) { x.Data[j] = dataset[j + i]; t.Data[j] = dataset[j + i + 1]; } Real sumLoss = new SoftmaxCrossEntropy().Evaluate(predictModel.Forward(x), t); totalLoss += sumLoss; totalLossCount++; } //calc perplexity return(Math.Exp(totalLoss / (totalLossCount - 1))); }
static Real Evaluate(FunctionStack <Real> model, int[] dataset) { FunctionStack <Real> predictModel = DeepCopyHelper <Real> .DeepCopy(model); predictModel.ResetState(); Real totalLoss = 0; long totalLossCount = 0; for (int i = 0; i < dataset.Length - 1; i++) { NdArray <Real> x = new NdArray <Real>(new[] { 1 }, BATCH_SIZE); NdArray <int> t = new NdArray <int>(new[] { 1 }, BATCH_SIZE); for (int j = 0; j < BATCH_SIZE; j++) { x.Data[j] = dataset[j + i]; t.Data[j] = dataset[j + i + 1]; } NdArray <Real> result = predictModel.Forward(x)[0]; Real sumLoss = new SoftmaxCrossEntropy <Real>().Evaluate(result, t); totalLoss += sumLoss; totalLossCount++; } //calc perplexity return(Math.Exp(totalLoss / (totalLossCount - 1))); }
public void SoftmaxCrossEntropy() { var softmax = new Const <double>(1.0, "softmax"); var y = new Const <double>(1.0, "y"); var op = new SoftmaxCrossEntropy <double>(softmax, y); var xml = op.ToXml(); var deserialized = SerializationExtensions.FromXml <double>(xml) as SoftmaxCrossEntropy <double>; Assert.IsNotNull(deserialized); Assert.AreEqual(2, deserialized.Parents.Count); Assert.AreEqual("softmax", (deserialized.Parents[0] as Const <double>).Name); Assert.AreEqual("y", (deserialized.Parents[1] as Const <double>).Name); }
public static void Run() { Console.WriteLine("Build Vocabulary."); Vocabulary vocabulary = new Vocabulary(); string trainPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TRAIN_FILE, TRAIN_FILE, TRAIN_FILE_HASH); string testPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TEST_FILE, TEST_FILE, TEST_FILE_HASH); int[] trainData = vocabulary.LoadData(trainPath); int[] testData = vocabulary.LoadData(testPath); int nVocab = vocabulary.Length; Console.WriteLine("Done."); Console.WriteLine("Network Initilizing."); FunctionStack model = new FunctionStack( new EmbedID(nVocab, N_UNITS, name: "l1 EmbedID"), new Linear(N_UNITS, N_UNITS, name: "l2 Linear"), new TanhActivation("l2 Tanh"), new Linear(N_UNITS, nVocab, name: "l3 Linear"), new Softmax("l3 Sonftmax") ); model.SetOptimizer(new Adam()); List <int> s = new List <int>(); Console.WriteLine("Train Start."); SoftmaxCrossEntropy softmaxCrossEntropy = new SoftmaxCrossEntropy(); for (int epoch = 0; epoch < TRAINING_EPOCHS; epoch++) { for (int pos = 0; pos < trainData.Length; pos++) { NdArray h = new NdArray(new Real[N_UNITS]); int id = trainData[pos]; s.Add(id); if (id == vocabulary.EosID) { Real accumloss = 0; Stack <NdArray> tmp = new Stack <NdArray>(); for (int i = 0; i < s.Count; i++) { int tx = i == s.Count - 1 ? vocabulary.EosID : s[i + 1]; //l1 EmbedID NdArray l1 = model.Functions[0].Forward(s[i])[0]; //l2 Linear NdArray l2 = model.Functions[1].Forward(h)[0]; //Add NdArray xK = l1 + l2; //l2 Tanh h = model.Functions[2].Forward(xK)[0]; //l3 Linear NdArray h2 = model.Functions[3].Forward(h)[0]; Real loss = softmaxCrossEntropy.Evaluate(h2, tx); tmp.Push(h2); accumloss += loss; } Console.WriteLine(accumloss); for (int i = 0; i < s.Count; i++) { model.Backward(tmp.Pop()); } model.Update(); s.Clear(); } if (pos % 100 == 0) { Console.WriteLine(pos + "/" + trainData.Length + " finished"); } } } Console.WriteLine("Test Start."); Real sum = 0; int wnum = 0; List <int> ts = new List <int>(); bool unkWord = false; for (int pos = 0; pos < 1000; pos++) { int id = testData[pos]; ts.Add(id); if (id > trainData.Length) { unkWord = true; } if (id == vocabulary.EosID) { if (!unkWord) { Console.WriteLine("pos" + pos); Console.WriteLine("tsLen" + ts.Count); Console.WriteLine("sum" + sum); Console.WriteLine("wnum" + wnum); sum += CalPs(model, ts); wnum += ts.Count - 1; } else { unkWord = false; } ts.Clear(); } } Console.WriteLine(Math.Pow(2.0, sum / wnum)); }
/// <summary> /// This sample shows how to serialize and deserialize a ConvNetSharp.Flow network /// 1) Graph creation /// 2) Dummy Training (only use a single data point) /// 3) Serialization /// 4) Deserialization /// </summary> private static void Main() { var cns = new ConvNetSharp <double>(); // 1) Graph creation var input = cns.PlaceHolder("x"); // input var dense1 = cns.Dense(input, 20) + cns.Variable(BuilderInstance <double> .Volume.From(new double[20].Populate(0.1), new Shape(20)), "bias1", true); var relu = cns.Relu(dense1); var dense2 = cns.Dense(relu, 10) + cns.Variable(new Shape(10), "bias2", true); var softmax = cns.Softmax(dense2); // output var output = cns.PlaceHolder("y"); // ground truth var cost = new SoftmaxCrossEntropy <double>(cns, softmax, output); var x = BuilderInstance <double> .Volume.From(new[] { 0.3, -0.5 }, new Shape(2)); var y = BuilderInstance <double> .Volume.From(new[] { 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, new Shape(10)); var dico = new Dictionary <string, Volume <double> > { { "x", x }, { "y", y } }; var count = 0; var optimizer = new GradientDescentOptimizer <double>(cns, 0.01); using (var session = new Session <double>()) { session.Differentiate(cost); // computes dCost/dW at every node of the graph // 2) Dummy Training (only use a single data point) double currentCost; do { currentCost = Math.Abs(session.Run(cost, dico, false).ToArray().Sum()); Console.WriteLine($"cost: {currentCost}"); session.Run(optimizer, dico); count++; } while (currentCost > 1e-2); Console.WriteLine($"{count}"); // Forward pass with original network var result = session.Run(softmax, new Dictionary <string, Volume <double> > { { "x", x } }); Console.WriteLine("probability that x is class 0: " + result.Get(0)); } // 3) Serialization softmax.Save("MyNetwork"); // 4) Deserialization var deserialized = SerializationExtensions.Load <double>("MyNetwork", false)[0]; // first element is the model (second element is the cost if it was saved along) using (var session = new Session <double>()) { // Forward pass with deserialized network var result = session.Run(deserialized, new Dictionary <string, Volume <double> > { { "x", x } }); Console.WriteLine("probability that x is class 0: " + result.Get(0)); // This should give exactly the same result as previous network evaluation } Console.ReadLine(); }
public static void Run() { //MNISTのデータを用意する Console.WriteLine("MNIST Data Loading..."); MnistData mnistData = new MnistData(); Console.WriteLine("Training Start..."); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack Layer1 = new FunctionStack( new Linear(28 * 28, 256, name: "l1 Linear"), new BatchNormalization(256, name: "l1 Norm"), new ReLU(name: "l1 ReLU") ); FunctionStack Layer2 = new FunctionStack( new Linear(256, 256, name: "l2 Linear"), new BatchNormalization(256, name: "l2 Norm"), new ReLU(name: "l2 ReLU") ); FunctionStack Layer3 = new FunctionStack( new Linear(256, 256, name: "l3 Linear"), new BatchNormalization(256, name: "l3 Norm"), new ReLU(name: "l3 ReLU") ); FunctionStack Layer4 = new FunctionStack( new Linear(256, 10, name: "l4 Linear") ); //FunctionStack自身もFunctionとして積み上げられる FunctionStack nn = new FunctionStack ( Layer1, Layer2, Layer3, Layer4 ); FunctionStack DNI1 = new FunctionStack( new Linear(256, 1024, name: "DNI1 Linear1"), new BatchNormalization(1024, name: "DNI1 Nrom1"), new ReLU(name: "DNI1 ReLU1"), new Linear(1024, 1024, name: "DNI1 Linear2"), new BatchNormalization(1024, name: "DNI1 Nrom2"), new ReLU(name: "DNI1 ReLU2"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack DNI2 = new FunctionStack( new Linear(256, 1024, name: "DNI2 Linear1"), new BatchNormalization(1024, name: "DNI2 Nrom1"), new ReLU(name: "DNI2 ReLU1"), new Linear(1024, 1024, name: "DNI2 Linear2"), new BatchNormalization(1024, name: "DNI2 Nrom2"), new ReLU(name: "DNI2 ReLU2"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI2 Linear3") ); FunctionStack DNI3 = new FunctionStack( new Linear(256, 1024, name: "DNI3 Linear1"), new BatchNormalization(1024, name: "DNI3 Nrom1"), new ReLU(name: "DNI3 ReLU1"), new Linear(1024, 1024, name: "DNI3 Linear2"), new BatchNormalization(1024, name: "DNI3 Nrom2"), new ReLU(name: "DNI3 ReLU2"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI3 Linear3") ); //optimizerを宣言 Layer1.SetOptimizer(new Adam()); Layer2.SetOptimizer(new Adam()); Layer3.SetOptimizer(new Adam()); Layer4.SetOptimizer(new Adam()); DNI1.SetOptimizer(new Adam()); DNI2.SetOptimizer(new Adam()); DNI3.SetOptimizer(new Adam()); //三世代学習 for (int epoch = 0; epoch < 20; epoch++) { Console.WriteLine("epoch " + (epoch + 1)); Real totalLoss = 0; Real DNI1totalLoss = 0; Real DNI2totalLoss = 0; Real DNI3totalLoss = 0; long totalLossCount = 0; long DNI1totalLossCount = 0; long DNI2totalLossCount = 0; long DNI3totalLossCount = 0; //何回バッチを実行するか for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //訓練データからランダムにデータを取得 TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT); //第一層を実行 NdArray[] layer1ForwardResult = Layer1.Forward(datasetX.Data); //第一層の傾きを取得 NdArray[] DNI1Result = DNI1.Forward(layer1ForwardResult); //第一層の傾きを適用 layer1ForwardResult[0].Grad = DNI1Result[0].Data.ToArray(); //第一層を更新 Layer1.Backward(layer1ForwardResult); layer1ForwardResult[0].ParentFunc = null; //Backwardを実行したので計算グラフを切っておく Layer1.Update(); //第二層を実行 NdArray[] layer2ForwardResult = Layer2.Forward(layer1ForwardResult); //第二層の傾きを取得 NdArray[] DNI2Result = DNI2.Forward(layer2ForwardResult); //第二層の傾きを適用 layer2ForwardResult[0].Grad = DNI2Result[0].Data.ToArray(); //第二層を更新 Layer2.Backward(layer2ForwardResult); layer2ForwardResult[0].ParentFunc = null; //第一層用のDNIの学習を実行 Real DNI1loss = new MeanSquaredError().Evaluate(DNI1Result, new NdArray(layer1ForwardResult[0].Grad, DNI1Result[0].Shape, DNI1Result[0].BatchCount)); Layer2.Update(); DNI1.Backward(DNI1Result); DNI1.Update(); DNI1totalLoss += DNI1loss; DNI1totalLossCount++; //第三層を実行 NdArray[] layer3ForwardResult = Layer3.Forward(layer2ForwardResult); //第三層の傾きを取得 NdArray[] DNI3Result = DNI3.Forward(layer3ForwardResult); //第三層の傾きを適用 layer3ForwardResult[0].Grad = DNI3Result[0].Data.ToArray(); //第三層を更新 Layer3.Backward(layer3ForwardResult); layer3ForwardResult[0].ParentFunc = null; //第二層用のDNIの学習を実行 Real DNI2loss = new MeanSquaredError().Evaluate(DNI2Result, new NdArray(layer2ForwardResult[0].Grad, DNI2Result[0].Shape, DNI2Result[0].BatchCount)); Layer3.Update(); DNI2.Backward(DNI2Result); DNI2.Update(); DNI2totalLoss += DNI2loss; DNI2totalLossCount++; //第四層を実行 NdArray[] layer4ForwardResult = Layer4.Forward(layer3ForwardResult); //第四層の傾きを取得 Real sumLoss = new SoftmaxCrossEntropy().Evaluate(layer4ForwardResult, datasetX.Label); //第四層を更新 Layer4.Backward(layer4ForwardResult); layer4ForwardResult[0].ParentFunc = null; totalLoss += sumLoss; totalLossCount++; //第三層用のDNIの学習を実行 Real DNI3loss = new MeanSquaredError().Evaluate(DNI3Result, new NdArray(layer3ForwardResult[0].Grad, DNI3Result[0].Shape, DNI3Result[0].BatchCount)); Layer4.Update(); DNI3.Backward(DNI3Result); DNI3.Update(); DNI3totalLoss += DNI3loss; DNI3totalLossCount++; Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //結果出力 Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); Console.WriteLine("\nDNI1 total loss " + DNI1totalLoss / DNI1totalLossCount); Console.WriteLine("DNI2 total loss " + DNI2totalLoss / DNI2totalLossCount); Console.WriteLine("DNI3 total loss " + DNI3totalLoss / DNI3totalLossCount); Console.WriteLine("\nDNI1 local loss " + DNI1loss); Console.WriteLine("DNI2 local loss " + DNI2loss); Console.WriteLine("DNI3 local loss " + DNI3loss); //20回バッチを動かしたら精度をテストする if (i % 20 == 0) { Console.WriteLine("\nTesting..."); //テストデータからランダムにデータを取得 TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT); //テストを実行 Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); Console.WriteLine("accuracy " + accuracy); } } } }
public static void Run() { Console.WriteLine("Build Vocabulary."); Vocabulary vocabulary = new Vocabulary(); string trainPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TRAIN_FILE, TRAIN_FILE); string validPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + VALID_FILE, VALID_FILE); string testPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TEST_FILE, TEST_FILE); int[] trainData = vocabulary.LoadData(trainPath); int[] validData = vocabulary.LoadData(validPath); int[] testData = vocabulary.LoadData(testPath); int nVocab = vocabulary.Length; Console.WriteLine("Network Initilizing."); FunctionStack model = new FunctionStack( new EmbedID(nVocab, N_UNITS, name: "l1 EmbedID"), new Dropout(), new LSTM(N_UNITS, N_UNITS, name: "l2 LSTM"), new Dropout(), new LSTM(N_UNITS, N_UNITS, name: "l3 LSTM"), new Dropout(), new Linear(N_UNITS, nVocab, name: "l4 Linear") ); //与えられたthresholdで頭打ちではなく、全パラメータのL2Normからレートを取り補正を行う GradientClipping gradientClipping = new GradientClipping(threshold: GRAD_CLIP); SGD sgd = new SGD(learningRate: 1); model.SetOptimizer(gradientClipping, sgd); Real wholeLen = trainData.Length; int jump = (int)Math.Floor(wholeLen / BATCH_SIZE); int epoch = 0; Stack <NdArray[]> backNdArrays = new Stack <NdArray[]>(); Console.WriteLine("Train Start."); for (int i = 0; i < jump * N_EPOCH; i++) { NdArray x = new NdArray(new[] { 1 }, BATCH_SIZE); NdArray t = new NdArray(new[] { 1 }, BATCH_SIZE); for (int j = 0; j < BATCH_SIZE; j++) { x.Data[j] = trainData[(int)((jump * j + i) % wholeLen)]; t.Data[j] = trainData[(int)((jump * j + i + 1) % wholeLen)]; } NdArray[] result = model.Forward(x); Real sumLoss = new SoftmaxCrossEntropy().Evaluate(result, t); backNdArrays.Push(result); Console.WriteLine("[{0}/{1}] Loss: {2}", i + 1, jump, sumLoss); //Run truncated BPTT if ((i + 1) % BPROP_LEN == 0) { for (int j = 0; backNdArrays.Count > 0; j++) { Console.WriteLine("backward" + backNdArrays.Count); model.Backward(backNdArrays.Pop()); } model.Update(); model.ResetState(); } if ((i + 1) % jump == 0) { epoch++; Console.WriteLine("evaluate"); Console.WriteLine("validation perplexity: {0}", Evaluate(model, validData)); if (epoch >= 6) { sgd.LearningRate /= 1.2; Console.WriteLine("learning rate =" + sgd.LearningRate); } } } Console.WriteLine("test start"); Console.WriteLine("test perplexity:" + Evaluate(model, testData)); }
private static void FacePresence() { var batchSize = 1000; int width = 32; int height = 32; BuilderInstance <float> .Volume = new VolumeBuilder(); // For GPU var imageLoader = new ImageLoader(); var randomImageLoader = new ImageLoader(true, 2); // Load Dataset - Faces var faces1 = LfwCropLoader.LoadDataset(@"..\..\..\Dataset\lfwcrop_grey", width, height); var faces2 = imageLoader.LoadDataset(@"..\..\..\Dataset\custom\faces", width, height); // dump you own face images here // Load Dataset - Non-faces var nonFaces1 = randomImageLoader.LoadDataset(@"..\..\..\Dataset\scene_categories", width, height); var nonFaces2 = randomImageLoader.LoadDataset(@"..\..\..\Dataset\TextureDatabase", width, height); var nonFaces3 = randomImageLoader.LoadDataset(@"..\..\..\Dataset\cars_brad_bg", width, height); var nonFaces4 = randomImageLoader.LoadDataset(@"..\..\..\Dataset\houses", width, height); var nonFaces5 = imageLoader.LoadDataset(@"..\..\..\Dataset\custom\non_faces", width, height); // dump you own non-face images here var facesDataset = new FaceDetectionDataset(width, height); facesDataset.TrainSet.AddRange(faces1); facesDataset.TrainSet.AddRange(faces2); facesDataset.TrainSet.AddRange(nonFaces1); facesDataset.TrainSet.AddRange(nonFaces2); facesDataset.TrainSet.AddRange(nonFaces3); facesDataset.TrainSet.AddRange(nonFaces4); facesDataset.TrainSet.AddRange(nonFaces5); Console.WriteLine(" Done."); ConvNetSharp <float> cns; // Model Op <float> softmax = null; if (File.Exists("FaceDetection.json")) { Console.WriteLine("Loading model from disk..."); softmax = SerializationExtensions.Load <float>("FaceDetection", false)[0]; // first element is the model (second element is the cost if it was saved along) cns = softmax.Graph; // Deserialization creates its own graph that we have to use. TODO: make it simplier in ConvNetSharp } else { cns = new ConvNetSharp <float>(); } var x = cns.PlaceHolder("x"); var dropProb = cns.PlaceHolder("dropProb"); if (softmax == null) { // Inspired by https://github.com/PCJohn/FaceDetect var layer1 = cns.Relu(cns.Conv(x, 5, 5, 4, 2) + cns.Variable(new Shape(1, 1, 4, 1), "bias1", true)); var layer2 = cns.Relu(cns.Conv(layer1, 3, 3, 16, 2) + cns.Variable(new Shape(1, 1, 16, 1), "bias2", true)); var layer3 = cns.Relu(cns.Conv(layer2, 3, 3, 32) + cns.Variable(new Shape(1, 1, 32, 1), "bias3", true)); var flatten = cns.Flatten(layer3); var dense1 = cns.Dropout(cns.Relu(cns.Dense(flatten, 600)) + cns.Variable(new Shape(1, 1, 600, 1), "bias4", true), dropProb); var dense2 = cns.Dense(dense1, 2) + cns.Variable(new Shape(1, 1, 2, 1), "bias5", true); softmax = cns.Softmax(dense2); } var y = cns.PlaceHolder("y"); // Cost var cost = new SoftmaxCrossEntropy <float>(cns, softmax, y); // Optimizer var optimizer = new AdamOptimizer <float>(cns, 1e-4f, 0.9f, 0.999f, 1e-16f); //if (File.Exists("loss.csv")) //{ // File.Delete("loss.csv"); //} Volume <float> trainingProb = 0.5f; Volume <float> testingProb = 0.0f; // Training using (var session = new Session <float>()) { session.Differentiate(cost); // computes dCost/dW at every node of the graph var iteration = 0; double currentCost; do { var batch = facesDataset.GetBatch(batchSize); var input = batch.Item1; var output = batch.Item2; var dico = new Dictionary <string, Volume <float> > { { "x", input }, { "y", output }, { "dropProb", trainingProb } }; var stopwatch = Stopwatch.StartNew(); // session.Run(softmax, dico); Debug.WriteLine(stopwatch.ElapsedMilliseconds); currentCost = session.Run(cost, dico); Console.WriteLine($"cost: {currentCost}"); File.AppendAllLines("loss.csv", new[] { currentCost.ToString(CultureInfo.InvariantCulture) }); session.Run(optimizer, dico); if (iteration++ % 100 == 0) { // Test on a on random picture var test = facesDataset.GetBatch(100); dico = new Dictionary <string, Volume <float> > { { "x", test.Item1 }, { "dropProb", testingProb } }; var result = session.Run(softmax, dico); int correct = 0; for (int i = 0; i < 100; i++) { var class0Prob = result.Get(0, 0, 0, i); var class1Prob = result.Get(0, 0, 1, i); if ((test.Item3[i].IsFace && class1Prob > class0Prob) || (!test.Item3[i].IsFace && class0Prob > class1Prob)) { correct++; } } Console.WriteLine($"Test: {correct}%"); File.AppendAllLines("accuracy.csv", new[] { correct.ToString() }); var filename = test.Item3[0].Filename; softmax.Save("FaceDetection"); } } while (currentCost > 1e-5 && !Console.KeyAvailable); softmax.Save("FaceDetection"); } }
public static void Run() { _outputStream = File.Create(LogPath); _logWriter = new HistogramLogWriter(_outputStream); _logWriter.Write(DateTime.Now); var recorder = HistogramFactory .With64BitBucketSize() ?.WithValuesFrom(1) ?.WithValuesUpTo(2345678912345) ?.WithPrecisionOf(3) ?.WithThreadSafeWrites() ?.WithThreadSafeReads() ?.Create(); var accumulatingHistogram = new LongHistogram(2345678912345, 3); var size = accumulatingHistogram.GetEstimatedFootprintInBytes(); RILogManager.Default?.SendDebug("Histogram size = {0} bytes ({1:F2} MB)", size, size / 1024.0 / 1024.0); RILogManager.Default?.SendDebug("Recorded latencies [in system clock ticks]"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.None, useCsvFormat: true); Console.WriteLine(); RILogManager.Default?.SendDebug("Recorded latencies [in usec]"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToMicroseconds, useCsvFormat: true); Console.WriteLine(); RILogManager.Default?.SendDebug("Recorded latencies [in msec]"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToMilliseconds, useCsvFormat: true); Console.WriteLine(); RILogManager.Default?.SendDebug("Recorded latencies [in sec]"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToSeconds, useCsvFormat: true); DocumentResults(accumulatingHistogram, recorder); RILogManager.Default?.SendDebug("Build Vocabulary."); DocumentResults(accumulatingHistogram, recorder); Vocabulary vocabulary = new Vocabulary(); DocumentResults(accumulatingHistogram, recorder); string trainPath = InternetFileDownloader.Download(DOWNLOAD_URL + TRAIN_FILE, TRAIN_FILE); DocumentResults(accumulatingHistogram, recorder); string validPath = InternetFileDownloader.Download(DOWNLOAD_URL + VALID_FILE, VALID_FILE); DocumentResults(accumulatingHistogram, recorder); string testPath = InternetFileDownloader.Download(DOWNLOAD_URL + TEST_FILE, TEST_FILE); DocumentResults(accumulatingHistogram, recorder); int[] trainData = vocabulary.LoadData(trainPath); DocumentResults(accumulatingHistogram, recorder); int[] validData = vocabulary.LoadData(validPath); DocumentResults(accumulatingHistogram, recorder); int[] testData = vocabulary.LoadData(testPath); DocumentResults(accumulatingHistogram, recorder); int nVocab = vocabulary.Length; RILogManager.Default?.SendDebug("Network Initializing."); FunctionStack model = new FunctionStack("Test10", new EmbedID(nVocab, N_UNITS, name: "l1 EmbedID"), new Dropout(), new LSTM(true, N_UNITS, N_UNITS, name: "l2 LSTM"), new Dropout(), new LSTM(true, N_UNITS, N_UNITS, name: "l3 LSTM"), new Dropout(), new Linear(true, N_UNITS, nVocab, name: "l4 Linear") ); DocumentResults(accumulatingHistogram, recorder); // Do not cease at the given threshold, correct the rate by taking the rate from L2Norm of all parameters GradientClipping gradientClipping = new GradientClipping(threshold: GRAD_CLIP); SGD sgd = new SGD(learningRate: 1); model.SetOptimizer(gradientClipping, sgd); DocumentResults(accumulatingHistogram, recorder); Real wholeLen = trainData.Length; int jump = (int)Math.Floor(wholeLen / BATCH_SIZE); int epoch = 0; Stack <NdArray[]> backNdArrays = new Stack <NdArray[]>(); RILogManager.Default?.SendDebug("Train Start."); double dVal; NdArray x = new NdArray(new[] { 1 }, BATCH_SIZE, (Function)null); NdArray t = new NdArray(new[] { 1 }, BATCH_SIZE, (Function)null); for (int i = 0; i < jump * N_EPOCH; i++) { for (int j = 0; j < BATCH_SIZE; j++) { x.Data[j] = trainData[(int)((jump * j + i) % wholeLen)]; t.Data[j] = trainData[(int)((jump * j + i + 1) % wholeLen)]; } NdArray[] result = model.Forward(true, x); Real sumLoss = new SoftmaxCrossEntropy().Evaluate(result, t); backNdArrays.Push(result); RILogManager.Default?.SendDebug("[{0}/{1}] Loss: {2}", i + 1, jump, sumLoss); //Run truncated BPTT if ((i + 1) % BPROP_LEN == 0) { for (int j = 0; backNdArrays.Count > 0; j++) { RILogManager.Default?.SendDebug("backward" + backNdArrays.Count); model.Backward(true, backNdArrays.Pop()); } model.Update(); model.ResetState(); } if ((i + 1) % jump == 0) { epoch++; RILogManager.Default?.SendDebug("evaluate"); dVal = Evaluate(model, validData); RILogManager.Default?.SendDebug($"validation perplexity: {dVal}"); if (epoch >= 6) { sgd.LearningRate /= 1.2; RILogManager.Default?.SendDebug("learning rate =" + sgd.LearningRate); } } DocumentResults(accumulatingHistogram, recorder); } RILogManager.Default?.SendDebug("test start"); dVal = Evaluate(model, testData); RILogManager.Default?.SendDebug("test perplexity:" + dVal); DocumentResults(accumulatingHistogram, recorder); _logWriter.Dispose(); _outputStream.Dispose(); RILogManager.Default?.SendDebug("Log contents"); RILogManager.Default?.SendDebug(File.ReadAllText(LogPath)); Console.WriteLine(); RILogManager.Default?.SendDebug("Percentile distribution (values reported in milliseconds)"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToMilliseconds, useCsvFormat: true); RILogManager.Default?.SendDebug("Mean: " + BytesToString(accumulatingHistogram.GetMean()) + ", StdDev: " + BytesToString(accumulatingHistogram.GetStdDeviation())); }
const Real L2_SCALE = 1e-4f; //l2 loss scale public static void Run() { //MNISTのデータを用意する Console.WriteLine("MNIST data loading..."); MnistData <Real> mnistData = new MnistData <Real>(); //テストデータから全データを取得 TestDataSet <Real> datasetY = mnistData.Eval.GetAllDataSet(); Console.WriteLine("\nNetwork initializing..."); int numBatches = mnistData.Train.Length / BATCH_SIZE; // 600 = 60000 / 100 int batchPerEpoch = mnistData.Train.Length / BATCH_SIZE; int[] boundaries = { LR_DROP_EPOCH *batchPerEpoch, (LR_DROP_EPOCH + 20) * batchPerEpoch }; Dictionary <string, Real> customSparsities = new Dictionary <string, Real> { { "layer2", END_SPARSITY *SPARSITY_SCALE }, { "layer3", END_SPARSITY * 0 } }; MaskedLinear <Real> layer1 = new MaskedLinear <Real>(28 * 28, 300, name: "layer1", gpuEnable: true); MaskedLinear <Real> layer2 = new MaskedLinear <Real>(300, 100, name: "layer2", gpuEnable: true); MaskedLinear <Real> layer3 = new MaskedLinear <Real>(100, 10, name: "layer3", gpuEnable: true); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack <Real> nn = new FunctionStack <Real>( layer1, new ReLU <Real>(name: "l1 ReLU"), layer2, new ReLU <Real>(name: "l2 ReLU"), layer3 ); SoftmaxCrossEntropy <Real> sce = new SoftmaxCrossEntropy <Real>(); WeightDecay <Real> weightDecay = new WeightDecay <Real>(L2_SCALE); weightDecay.AddParameters(layer1.Weight, layer2.Weight, layer3.Weight); MomentumSGD <Real> mSGD = new MomentumSGD <Real>(LEARNING_RATE); mSGD.SetUp(nn); var opt = new SparseRigLOptimizer(mSGD, MASKUPDATE_BEGIN_STEP, MASKUPDATE_END_STEP, MASKUPDATE_FREQUENCY, DROP_FRACTION, "cosine", "zeros", RIGL_ACC_SCALE); NdArray <Real>[] allMasks = { layer1.Mask, layer2.Mask, layer3.Mask, }; string[] LayerNames = { layer1.Name, layer2.Name, layer3.Name, }; NdArray <Real>[] allWights = { layer1.Weight, layer2.Weight, layer3.Weight, }; //マスクの初期化 SparseUtils.MaskInit(allMasks, LayerNames, "erdos_renyi", END_SPARSITY, customSparsities); Console.WriteLine("[Global sparsity] " + SparseUtils.CalculateSparsity(allMasks)); var weightSparsity = GetWeightSparsity(allMasks); Console.WriteLine("[Sparsity] Layer0, Layer1 : " + weightSparsity[0] + ", " + weightSparsity[1]); Console.WriteLine("\nTraining Start..."); //学習開始 for (int i = 0; i < NUM_EPOCHS * numBatches; i++) { //訓練データからランダムにデータを取得 TestDataSet <Real> datasetX = mnistData.Train.GetRandomDataSet(BATCH_SIZE); //バッチ学習を実行する NdArray <Real> y = nn.Forward(datasetX.Data)[0]; Real loss = sce.Evaluate(y, datasetX.Label); nn.Backward(y); weightDecay.Update(); opt._optimizer.LearningRate = PiecewiseConstant(opt._optimizer.UpdateCount, boundaries, LEARNING_RATE); opt.condMaskUpdate(allMasks, allWights); ////10回毎に結果出力 //if (i % 10 + 1 == 10) //{ // Console.WriteLine("\nbatch count:" + (i + 1) + " (lr:" + opt._optimizer.LearningRate + ")"); // Console.WriteLine("loss " + loss); //} //精度をテストする if (i % numBatches + 1 == numBatches) { Console.WriteLine("\nEpoch:" + Math.Floor((i + 1) / (Real)numBatches) + " Iteration:" + (i + 1) + " Testing... "); //テストを実行 Real accuracy = Trainer.Accuracy(nn, datasetY, new SoftmaxCrossEntropy <Real>(), out loss); Console.WriteLine("loss: " + loss); Console.WriteLine("accuracy: " + accuracy); } } }
static void Main(string[] args) { Console.WriteLine("MNIST Test"); int seed; using (var rng = new RNGCryptoServiceProvider()) { var buffer = new byte[sizeof(int)]; rng.GetBytes(buffer); seed = BitConverter.ToInt32(buffer, 0); } RandomProvider.SetSeed(seed); var assembly = Assembly.GetExecutingAssembly(); var filename = "CNN.xml"; var serializer = new DataContractSerializer(typeof(IEnumerable <Layer>), new Type[] { typeof(Convolution), typeof(BatchNormalization), typeof(Activation), typeof(ReLU), typeof(MaxPooling), typeof(FullyConnected), typeof(Dropout), typeof(Softmax) }); var trainingList = new List <ValueTuple <double[], double[]> >(); var testList = new List <ValueTuple <double[], double[]> >(); var accuracyList = new List <double>(); var lossList = new List <double>(); var logPath = "Log.csv"; var channels = 1; var imageWidth = 28; var imageHeight = 28; var filters = 30; var filterWidth = 5; var filterHeight = 5; var poolWidth = 2; var poolHeight = 2; var activationMapWidth = Convolution.GetActivationMapLength(imageWidth, filterWidth); var activationMapHeight = Convolution.GetActivationMapLength(imageHeight, filterHeight); var outputWidth = MaxPooling.GetOutputLength(activationMapWidth, poolWidth); var outputHeight = MaxPooling.GetOutputLength(activationMapHeight, poolHeight); Model model; using (Stream imagesStream = assembly.GetManifestResourceStream("MNISTTest.train-images.idx3-ubyte"), labelsStream = assembly.GetManifestResourceStream("MNISTTest.train-labels.idx1-ubyte")) { foreach (var image in MnistImage.Load(imagesStream, labelsStream).Take(1000)) { var t = new double[10]; for (int i = 0; i < 10; i++) { if (i == image.Label) { t[i] = 1.0; } else { t[i] = 0.0; } } trainingList.Add(ValueTuple.Create <double[], double[]>(image.Normalize(), t)); } } using (Stream imagesStream = assembly.GetManifestResourceStream("MNISTTest.t10k-images.idx3-ubyte"), labelsStream = assembly.GetManifestResourceStream("MNISTTest.t10k-labels.idx1-ubyte")) { foreach (var image in MnistImage.Load(imagesStream, labelsStream).Take(1000)) { var t = new double[10]; for (int i = 0; i < 10; i++) { if (i == image.Label) { t[i] = 1.0; } else { t[i] = 0.0; } } testList.Add(ValueTuple.Create <double[], double[]>(image.Normalize(), t)); } } if (File.Exists(filename)) { using (XmlReader xmlReader = XmlReader.Create(filename)) { model = new Model((IEnumerable <Layer>)serializer.ReadObject(xmlReader)); } } else { int epochs = 50; int iterations = 1; ILossFunction lossFunction = new SoftmaxCrossEntropy(); model = new Model( new Convolution(channels, imageWidth, imageHeight, filters, filterWidth, filterHeight, (fanIn, fanOut) => Initializers.HeNormal(fanIn), new Activation(new ReLU(), new MaxPooling(filters, activationMapWidth, activationMapHeight, poolWidth, poolHeight, new FullyConnected(filters * outputWidth * outputHeight, (fanIn, fanOut) => Initializers.HeNormal(fanIn), new Activation(new ReLU(), new Dropout(0.5, new FullyConnected(100, (fanIn, fanOut) => Initializers.GlorotNormal(fanIn, fanOut), new Dropout(10, 0.5))))))))); //model.WeightDecayRate = 0.1; model.Stepped += (sender, e) => { double tptn = 0.0; trainingList.ForEach(x => { if (ArgMax(model.Predict(x.Item1)) == ArgMax(x.Item2)) { tptn += 1.0; } }); var accuracy = tptn / trainingList.Count; var loss = model.GetLoss(trainingList, lossFunction); accuracyList.Add(accuracy); lossList.Add(loss); Console.WriteLine("Epoch {0}/{1}", iterations, epochs); Console.WriteLine("Accuracy: {0}, Loss: {1}", accuracy, loss); iterations++; }; Console.WriteLine("Training..."); var stopwatch = Stopwatch.StartNew(); model.Fit(trainingList, epochs, 100, new Adam(), lossFunction); stopwatch.Stop(); Console.WriteLine("Done ({0}).", stopwatch.Elapsed.ToString()); } double testTptn = 0.0; testList.ForEach(x => { var vector = model.Predict(x.Item1); var i = ArgMax(vector); var j = ArgMax(x.Item2); if (i == j && Math.Round(vector[i]) == x.Item2[j]) { testTptn += 1.0; } }); Console.WriteLine("Accuracy: {0}", testTptn / testList.Count); if (accuracyList.Count > 0) { var logDictionary = new Dictionary <string, IEnumerable <double> >(); logDictionary.Add("Accuracy", accuracyList); logDictionary.Add("Loss", lossList); ToCsv(logPath, logDictionary); Console.WriteLine("Saved log to {0}...", logPath); } XmlWriterSettings settings = new XmlWriterSettings(); settings.Indent = true; settings.Encoding = new System.Text.UTF8Encoding(false); using (XmlWriter xmlWriter = XmlWriter.Create(filename, settings)) { serializer.WriteObject(xmlWriter, model.Layers); xmlWriter.Flush(); } }
public static void Run() { //Prepare MNIST data Console.WriteLine("MNIST Data Loading..."); MnistData mnistData = new MnistData(); Console.WriteLine("Training Start..."); //Writing the network configuration in FunctionStack FunctionStack Layer1 = new FunctionStack( new Linear(28 * 28, 256, name: "l1 Linear"), new BatchNormalization(256, name: "l1 Norm"), new ReLU(name: "l1 ReLU") ); FunctionStack Layer2 = new FunctionStack( new Linear(256, 256, name: "l2 Linear"), new BatchNormalization(256, name: "l2 Norm"), new ReLU(name: "l2 ReLU") ); FunctionStack Layer3 = new FunctionStack( new Linear(256, 256, name: "l3 Linear"), new BatchNormalization(256, name: "l3 Norm"), new ReLU(name: "l3 ReLU") ); FunctionStack Layer4 = new FunctionStack( new Linear(256, 10, name: "l4 Linear") ); //FunctionStack itself is also stacked as Function FunctionStack nn = new FunctionStack ( Layer1, Layer2, Layer3, Layer4 ); FunctionStack cDNI1 = new FunctionStack( new Linear(256 + 10, 1024, name: "cDNI1 Linear1"), new BatchNormalization(1024, name: "cDNI1 Nrom1"), new ReLU(name: "cDNI1 ReLU1"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack cDNI2 = new FunctionStack( new Linear(256 + 10, 1024, name: "cDNI2 Linear1"), new BatchNormalization(1024, name: "cDNI2 Nrom1"), new ReLU(name: "cDNI2 ReLU1"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "cDNI2 Linear3") ); FunctionStack cDNI3 = new FunctionStack( new Linear(256 + 10, 1024, name: "cDNI3 Linear1"), new BatchNormalization(1024, name: "cDNI3 Nrom1"), new ReLU(name: "cDNI3 ReLU1"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "cDNI3 Linear3") ); //Declare optimizer Layer1.SetOptimizer(new Adam(0.00003f)); Layer2.SetOptimizer(new Adam(0.00003f)); Layer3.SetOptimizer(new Adam(0.00003f)); Layer4.SetOptimizer(new Adam(0.00003f)); cDNI1.SetOptimizer(new Adam(0.00003f)); cDNI2.SetOptimizer(new Adam(0.00003f)); cDNI3.SetOptimizer(new Adam(0.00003f)); for (int epoch = 0; epoch < 10; epoch++) { Console.WriteLine("epoch " + (epoch + 1)); //Total error in the whole Real totalLoss = 0; Real cDNI1totalLoss = 0; Real cDNI2totalLoss = 0; Real cDNI3totalLoss = 0; long totalLossCount = 0; long cDNI1totalLossCount = 0; long cDNI2totalLossCount = 0; long cDNI3totalLossCount = 0; //How many times to run the batch for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //Get data randomly from training data TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT); //Run first tier NdArray[] layer1ForwardResult = Layer1.Forward(datasetX.Data); ResultDataSet layer1ResultDataSet = new ResultDataSet(layer1ForwardResult, datasetX.Label); //Get the inclination of the first layer NdArray[] cDNI1Result = cDNI1.Forward(layer1ResultDataSet.GetTrainData()); //Apply the inclination of the first layer layer1ForwardResult[0].Grad = cDNI1Result[0].Data.ToArray(); //Update first layer Layer1.Backward(layer1ForwardResult); layer1ForwardResult[0].ParentFunc = null; Layer1.Update(); //Run Layer 2 NdArray[] layer2ForwardResult = Layer2.Forward(layer1ResultDataSet.Result); ResultDataSet layer2ResultDataSet = new ResultDataSet(layer2ForwardResult, layer1ResultDataSet.Label); //Get inclination of second layer NdArray[] cDNI2Result = cDNI2.Forward(layer2ResultDataSet.GetTrainData()); //Apply the inclination of the second layer layer2ForwardResult[0].Grad = cDNI2Result[0].Data.ToArray(); //Update 2nd tier Layer2.Backward(layer2ForwardResult); layer2ForwardResult[0].ParentFunc = null; //Perform learning of first layer cDNI Real cDNI1loss = new MeanSquaredError().Evaluate(cDNI1Result, new NdArray(layer1ResultDataSet.Result[0].Grad, cDNI1Result[0].Shape, cDNI1Result[0].BatchCount)); Layer2.Update(); cDNI1.Backward(cDNI1Result); cDNI1.Update(); cDNI1totalLoss += cDNI1loss; cDNI1totalLossCount++; //Run Third Tier NdArray[] layer3ForwardResult = Layer3.Forward(layer2ResultDataSet.Result); ResultDataSet layer3ResultDataSet = new ResultDataSet(layer3ForwardResult, layer2ResultDataSet.Label); //Get the inclination of the third layer NdArray[] cDNI3Result = cDNI3.Forward(layer3ResultDataSet.GetTrainData()); //Apply the inclination of the third layer layer3ForwardResult[0].Grad = cDNI3Result[0].Data.ToArray(); //Update third layer Layer3.Backward(layer3ForwardResult); layer3ForwardResult[0].ParentFunc = null; //Perform learning of cDNI for layer 2 Real cDNI2loss = new MeanSquaredError().Evaluate(cDNI2Result, new NdArray(layer2ResultDataSet.Result[0].Grad, cDNI2Result[0].Shape, cDNI2Result[0].BatchCount)); Layer3.Update(); cDNI2.Backward(cDNI2Result); cDNI2.Update(); cDNI2totalLoss += cDNI2loss; cDNI2totalLossCount++; //Run Layer 4 NdArray[] layer4ForwardResult = Layer4.Forward(layer3ResultDataSet.Result); //Get inclination of the fourth layer Real sumLoss = new SoftmaxCrossEntropy().Evaluate(layer4ForwardResult, layer3ResultDataSet.Label); //Update fourth layer Layer4.Backward(layer4ForwardResult); layer4ForwardResult[0].ParentFunc = null; totalLoss += sumLoss; totalLossCount++; //Perform learning of cDNI for the third layer Real cDNI3loss = new MeanSquaredError().Evaluate(cDNI3Result, new NdArray(layer3ResultDataSet.Result[0].Grad, cDNI3Result[0].Shape, cDNI3Result[0].BatchCount)); Layer4.Update(); cDNI3.Backward(cDNI3Result); cDNI3.Update(); cDNI3totalLoss += cDNI3loss; cDNI3totalLossCount++; Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //Result output Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); Console.WriteLine("\ncDNI1 total loss " + cDNI1totalLoss / cDNI1totalLossCount); Console.WriteLine("cDNI2 total loss " + cDNI2totalLoss / cDNI2totalLossCount); Console.WriteLine("cDNI3 total loss " + cDNI3totalLoss / cDNI3totalLossCount); Console.WriteLine("\ncDNI1 local loss " + cDNI1loss); Console.WriteLine("cDNI2 local loss " + cDNI2loss); Console.WriteLine("cDNI3 local loss " + cDNI3loss); //Test the accuracy if you move the batch 20 times if (i % 20 == 0) { Console.WriteLine("\nTesting..."); //Get data randomly from test data TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT); //Run test Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); Console.WriteLine("accuracy " + accuracy); } } } }
public static void Run() { // Prepare MNIST data RILogManager.Default?.SendDebug("MNIST Data Loading..."); MnistData mnistData = new MnistData(28); RILogManager.Default?.SendDebug("Training Start..."); // Write the network configuration in FunctionStack FunctionStack Layer1 = new FunctionStack("Test12 Layer 1", new Linear(true, 28 * 28, 256, name: "l1 Linear"), new BatchNormalization(true, 256, name: "l1 Norm"), new ReLU(name: "l1 ReLU") ); FunctionStack Layer2 = new FunctionStack("Test12 Layer 2", new Linear(true, 256, 256, name: "l2 Linear"), new BatchNormalization(true, 256, name: "l2 Norm"), new ReLU(name: "l2 ReLU") ); FunctionStack Layer3 = new FunctionStack("Test12 Layer 3", new Linear(true, 256, 256, name: "l3 Linear"), new BatchNormalization(true, 256, name: "l3 Norm"), new ReLU(name: "l3 ReLU") ); FunctionStack Layer4 = new FunctionStack("Test12 Layer 4", new Linear(true, 256, 10, name: "l4 Linear") ); // Function stack itself is also stacked as Function FunctionStack nn = new FunctionStack ("Test12", Layer1, Layer2, Layer3, Layer4 ); FunctionStack cDNI1 = new FunctionStack("Test12 DNI 1", new Linear(true, 256 + 10, 1024, name: "cDNI1 Linear1"), new BatchNormalization(true, 1024, name: "cDNI1 Norm1"), new ReLU(name: "cDNI1 ReLU1"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack cDNI2 = new FunctionStack("Test12 DNI 2", new Linear(true, 256 + 10, 1024, name: "cDNI2 Linear1"), new BatchNormalization(true, 1024, name: "cDNI2 Norm1"), new ReLU(name: "cDNI2 ReLU1"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "cDNI2 Linear3") ); FunctionStack cDNI3 = new FunctionStack("Test12 DNI 3", new Linear(true, 256 + 10, 1024, name: "cDNI3 Linear1"), new BatchNormalization(true, 1024, name: "cDNI3 Norm1"), new ReLU(name: "cDNI3 ReLU1"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "cDNI3 Linear3") ); Layer1.SetOptimizer(new Adam("Adam", 0.00003f)); Layer2.SetOptimizer(new Adam("Adam", 0.00003f)); Layer3.SetOptimizer(new Adam("Adam", 0.00003f)); Layer4.SetOptimizer(new Adam("Adam", 0.00003f)); cDNI1.SetOptimizer(new Adam("Adam", 0.00003f)); cDNI2.SetOptimizer(new Adam("Adam", 0.00003f)); cDNI3.SetOptimizer(new Adam("Adam", 0.00003f)); // Describe each function stack; RILogManager.Default?.SendDebug(Layer1.Describe()); RILogManager.Default?.SendDebug(Layer2.Describe()); RILogManager.Default?.SendDebug(Layer3.Describe()); RILogManager.Default?.SendDebug(Layer4.Describe()); RILogManager.Default?.SendDebug(cDNI1.Describe()); RILogManager.Default?.SendDebug(cDNI2.Describe()); RILogManager.Default?.SendDebug(cDNI3.Describe()); for (int epoch = 0; epoch < 10; epoch++) { // Total error in the whole Real totalLoss = 0; Real cDNI1totalLoss = 0; Real cDNI2totalLoss = 0; Real cDNI3totalLoss = 0; long totalLossCount = 0; long cDNI1totalLossCount = 0; long cDNI2totalLossCount = 0; long cDNI3totalLossCount = 0; // how many times to run the batch for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { RILogManager.Default?.SendDebug("epoch: " + (epoch + 1) + " of 10, batch iteration: " + i + " of " + TRAIN_DATA_COUNT); RILogManager.Default?.ViewerSendWatch("Epoch", epoch + 1); RILogManager.Default?.ViewerSendWatch("Batch Iteration", i); // Get data randomly from the training data TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT, 28, 28); // Run first tier NdArray[] layer1ForwardResult = Layer1.Forward(true, datasetX.Data); ResultDataSet layer1ResultDataSet = new ResultDataSet(layer1ForwardResult, datasetX.Label); // Obtain the slope of the first layer NdArray[] cDNI1Result = cDNI1.Forward(true, layer1ResultDataSet.GetTrainData()); // Apply the slope of the first layer layer1ForwardResult[0].Grad = cDNI1Result[0].Data.ToArray(); //Update first layer Layer1.Backward(true, layer1ForwardResult); layer1ForwardResult[0].ParentFunc = null; Layer1.Update(); // Run Layer 2 NdArray[] layer2ForwardResult = Layer2.Forward(true, layer1ResultDataSet.Result); ResultDataSet layer2ResultDataSet = new ResultDataSet(layer2ForwardResult, layer1ResultDataSet.Label); // Get the inclination of the second layer NdArray[] cDNI2Result = cDNI2.Forward(true, layer2ResultDataSet.GetTrainData()); // Apply the slope of the second layer layer2ForwardResult[0].Grad = cDNI2Result[0].Data.ToArray(); //Update layer 2 Layer2.Backward(true, layer2ForwardResult); layer2ForwardResult[0].ParentFunc = null; //Perform learning of first layer cDNI Real cDNI1loss = new MeanSquaredError().Evaluate(cDNI1Result, new NdArray(layer1ResultDataSet.Result[0].Grad, cDNI1Result[0].Shape, cDNI1Result[0].BatchCount)); Layer2.Update(); cDNI1.Backward(true, cDNI1Result); cDNI1.Update(); cDNI1totalLoss += cDNI1loss; cDNI1totalLossCount++; //Run Third Tier NdArray[] layer3ForwardResult = Layer3.Forward(true, layer2ResultDataSet.Result); ResultDataSet layer3ResultDataSet = new ResultDataSet(layer3ForwardResult, layer2ResultDataSet.Label); //Get the inclination of the third layer NdArray[] cDNI3Result = cDNI3.Forward(true, layer3ResultDataSet.GetTrainData()); //Apply the inclination of the third layer layer3ForwardResult[0].Grad = cDNI3Result[0].Data.ToArray(); //Update third layer Layer3.Backward(true, layer3ForwardResult); layer3ForwardResult[0].ParentFunc = null; //Perform learning of cDNI for layer 2 Real cDNI2loss = new MeanSquaredError().Evaluate(cDNI2Result, new NdArray(layer2ResultDataSet.Result[0].Grad, cDNI2Result[0].Shape, cDNI2Result[0].BatchCount)); Layer3.Update(); cDNI2.Backward(true, cDNI2Result); cDNI2.Update(); cDNI2totalLoss += cDNI2loss; cDNI2totalLossCount++; NdArray[] layer4ForwardResult = Layer4.Forward(true, layer3ResultDataSet.Result); Real sumLoss = new SoftmaxCrossEntropy().Evaluate(layer4ForwardResult, layer3ResultDataSet.Label); Layer4.Backward(true, layer4ForwardResult); layer4ForwardResult[0].ParentFunc = null; totalLoss += sumLoss; totalLossCount++; Real cDNI3loss = new MeanSquaredError().Evaluate(cDNI3Result, new NdArray(layer3ResultDataSet.Result[0].Grad, cDNI3Result[0].Shape, cDNI3Result[0].BatchCount)); Layer4.Update(); cDNI3.Backward(true, cDNI3Result); cDNI3.Update(); cDNI3totalLoss += cDNI3loss; cDNI3totalLossCount++; RILogManager.Default?.SendDebug("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); RILogManager.Default?.SendDebug("total loss " + totalLoss / totalLossCount); RILogManager.Default?.SendDebug("local loss " + sumLoss); RILogManager.Default?.SendDebug("\ncDNI1 total loss " + cDNI1totalLoss / cDNI1totalLossCount); RILogManager.Default?.SendDebug("cDNI2 total loss " + cDNI2totalLoss / cDNI2totalLossCount); RILogManager.Default?.SendDebug("cDNI3 total loss " + cDNI3totalLoss / cDNI3totalLossCount); RILogManager.Default?.SendDebug("\ncDNI1 local loss " + cDNI1loss); RILogManager.Default?.SendDebug("cDNI2 local loss " + cDNI2loss); RILogManager.Default?.SendDebug("cDNI3 local loss " + cDNI3loss); if (i % 20 == 0) { RILogManager.Default?.SendDebug("\nTesting..."); TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT, 28); Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); RILogManager.Default?.SendDebug("accuracy " + accuracy); } } } }
public static void Run() { Console.WriteLine("Build Vocabulary."); Vocabulary vocabulary = new Vocabulary(); string trainPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TRAIN_FILE, TRAIN_FILE, TRAIN_FILE_HASH); string validPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + VALID_FILE, VALID_FILE, VALID_FILE_HASH); string testPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TEST_FILE, TEST_FILE, TEST_FILE_HASH); int[] trainData = vocabulary.LoadData(trainPath); int[] validData = vocabulary.LoadData(validPath); int[] testData = vocabulary.LoadData(testPath); int nVocab = vocabulary.Length; Console.WriteLine("Network Initilizing."); FunctionStack <Real> model = new FunctionStack <Real>( new EmbedID <Real>(nVocab, N_UNITS, name: "l1 EmbedID"), new Dropout <Real>(), new LSTM <Real>(N_UNITS, N_UNITS, name: "l2 LSTM"), new Dropout <Real>(), new LSTM <Real>(N_UNITS, N_UNITS, name: "l3 LSTM"), new Dropout <Real>(), new Linear <Real>(N_UNITS, nVocab, name: "l4 Linear") ); for (int i = 0; i < model.Functions.Length; i++) { for (int j = 0; j < model.Functions[i].Parameters.Length; j++) { for (int k = 0; k < model.Functions[i].Parameters[j].Data.Length; k++) { model.Functions[i].Parameters[j].Data[k] = ((Real)Mother.Dice.NextDouble() * 2.0f - 1.0f) / 10.0f; } } } //与えられたthresholdで頭打ちではなく、全パラメータのL2Normからレートを取り補正を行う GradientClipping <Real> gradientClipping = new GradientClipping <Real>(threshold: GRAD_CLIP); SGD <Real> sgd = new SGD <Real>(learningRate: 0.1f); gradientClipping.SetUp(model); sgd.SetUp(model); Real wholeLen = trainData.Length; int jump = (int)Math.Floor(wholeLen / BATCH_SIZE); int epoch = 0; Console.WriteLine("Train Start."); for (int i = 0; i < jump * N_EPOCH; i++) { NdArray <Real> x = new NdArray <Real>(new[] { 1 }, BATCH_SIZE); NdArray <int> t = new NdArray <int>(new[] { 1 }, BATCH_SIZE); for (int j = 0; j < BATCH_SIZE; j++) { x.Data[j] = trainData[(int)((jump * j + i) % wholeLen)]; t.Data[j] = trainData[(int)((jump * j + i + 1) % wholeLen)]; } NdArray <Real> result = model.Forward(x)[0]; Real sumLoss = new SoftmaxCrossEntropy <Real>().Evaluate(result, t); Console.WriteLine("[{0}/{1}] Loss: {2}", i + 1, jump, sumLoss); model.Backward(result); //Run truncated BPTT if ((i + 1) % BPROP_LEN == 0) { gradientClipping.Update(); sgd.Update(); model.ResetState(); } if ((i + 1) % jump == 0) { epoch++; Console.WriteLine("evaluate"); Console.WriteLine("validation perplexity: {0}", Evaluate(model, validData)); if (epoch >= 6) { sgd.LearningRate /= 1.2f; Console.WriteLine("learning rate =" + sgd.LearningRate); } } } Console.WriteLine("test start"); Console.WriteLine("test perplexity:" + Evaluate(model, testData)); }
public static void Run() { //MNISTのデータを用意する Console.WriteLine("MNIST Data Loading..."); MnistData <Real> mnistData = new MnistData <Real>(); Console.WriteLine("Training Start..."); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack <Real> Layer1 = new FunctionStack <Real>( new Linear <Real>(28 * 28, 256, name: "l1 Linear"), new BatchNormalization <Real>(256, name: "l1 Norm"), new ReLU <Real>(name: "l1 ReLU") ); FunctionStack <Real> Layer2 = new FunctionStack <Real>( new Linear <Real>(256, 256, name: "l2 Linear"), new BatchNormalization <Real>(256, name: "l2 Norm"), new ReLU <Real>(name: "l2 ReLU") ); FunctionStack <Real> Layer3 = new FunctionStack <Real>( new Linear <Real>(256, 256, name: "l3 Linear"), new BatchNormalization <Real>(256, name: "l3 Norm"), new ReLU <Real>(name: "l3 ReLU") ); FunctionStack <Real> Layer4 = new FunctionStack <Real>( new Linear <Real>(256, 10, name: "l4 Linear") ); //FunctionStack自身もFunctionとして積み上げられる FunctionStack <Real> nn = new FunctionStack <Real> ( Layer1, Layer2, Layer3, Layer4 ); FunctionStack <Real> cDNI1 = new FunctionStack <Real>( new Linear <Real>(256 + 10, 1024, name: "cDNI1 Linear1"), new BatchNormalization <Real>(1024, name: "cDNI1 Nrom1"), new ReLU <Real>(name: "cDNI1 ReLU1"), new Linear <Real>(1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack <Real> cDNI2 = new FunctionStack <Real>( new Linear <Real>(256 + 10, 1024, name: "cDNI2 Linear1"), new BatchNormalization <Real>(1024, name: "cDNI2 Nrom1"), new ReLU <Real>(name: "cDNI2 ReLU1"), new Linear <Real>(1024, 256, initialW: new Real[1024, 256], name: "cDNI2 Linear3") ); FunctionStack <Real> cDNI3 = new FunctionStack <Real>( new Linear <Real>(256 + 10, 1024, name: "cDNI3 Linear1"), new BatchNormalization <Real>(1024, name: "cDNI3 Nrom1"), new ReLU <Real>(name: "cDNI3 ReLU1"), new Linear <Real>(1024, 256, initialW: new Real[1024, 256], name: "cDNI3 Linear3") ); //optimizerを宣言 //optimizerを宣言 Adam <Real> L1adam = new Adam <Real>(0.00003f); Adam <Real> L2adam = new Adam <Real>(0.00003f); Adam <Real> L3adam = new Adam <Real>(0.00003f); Adam <Real> L4adam = new Adam <Real>(0.00003f); L1adam.SetUp(Layer1); L2adam.SetUp(Layer2); L3adam.SetUp(Layer3); L4adam.SetUp(Layer4); Adam <Real> cDNI1adam = new Adam <Real>(0.00003f); Adam <Real> cDNI2adam = new Adam <Real>(0.00003f); Adam <Real> cDNI3adam = new Adam <Real>(0.00003f); cDNI1adam.SetUp(cDNI1); cDNI2adam.SetUp(cDNI2); cDNI3adam.SetUp(cDNI3); for (int epoch = 0; epoch < 10; epoch++) { Console.WriteLine("epoch " + (epoch + 1)); //全体での誤差を集計 Real totalLoss = 0; Real cDNI1totalLoss = 0; Real cDNI2totalLoss = 0; Real cDNI3totalLoss = 0; long totalLossCount = 0; long cDNI1totalLossCount = 0; long cDNI2totalLossCount = 0; long cDNI3totalLossCount = 0; //何回バッチを実行するか for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //訓練データからランダムにデータを取得 TestDataSet <Real> datasetX = mnistData.Train.GetRandomDataSet(BATCH_DATA_COUNT); //第一層を実行 NdArray <Real> layer1ForwardResult = Layer1.Forward(datasetX.Data)[0]; ResultDataSet layer1ResultDataSet = new ResultDataSet(layer1ForwardResult, datasetX.Label); //第一層の傾きを取得 NdArray <Real> cDNI1Result = cDNI1.Forward(layer1ResultDataSet.GetTrainData())[0]; //第一層の傾きを適用 layer1ForwardResult.Grad = cDNI1Result.Data.ToArray(); //第一層を更新 Layer1.Backward(layer1ForwardResult); layer1ForwardResult.ParentFunc = null; L1adam.Update(); //第二層を実行 NdArray <Real> layer2ForwardResult = Layer2.Forward(layer1ResultDataSet.Result)[0]; ResultDataSet layer2ResultDataSet = new ResultDataSet(layer2ForwardResult, layer1ResultDataSet.Label); //第二層の傾きを取得 NdArray <Real> cDNI2Result = cDNI2.Forward(layer2ResultDataSet.GetTrainData())[0]; //第二層の傾きを適用 layer2ForwardResult.Grad = cDNI2Result.Data.ToArray(); //第二層を更新 Layer2.Backward(layer2ForwardResult); layer2ForwardResult.ParentFunc = null; //第一層用のcDNIの学習を実行 Real cDNI1loss = new MeanSquaredError <Real>().Evaluate(cDNI1Result, new NdArray <Real>(layer1ResultDataSet.Result.Grad, cDNI1Result.Shape, cDNI1Result.BatchCount)); L2adam.Update(); cDNI1.Backward(cDNI1Result); cDNI1adam.Update(); cDNI1totalLoss += cDNI1loss; cDNI1totalLossCount++; //第三層を実行 NdArray <Real> layer3ForwardResult = Layer3.Forward(layer2ResultDataSet.Result)[0]; ResultDataSet layer3ResultDataSet = new ResultDataSet(layer3ForwardResult, layer2ResultDataSet.Label); //第三層の傾きを取得 NdArray <Real> cDNI3Result = cDNI3.Forward(layer3ResultDataSet.GetTrainData())[0]; //第三層の傾きを適用 layer3ForwardResult.Grad = cDNI3Result.Data.ToArray(); //第三層を更新 Layer3.Backward(layer3ForwardResult); layer3ForwardResult.ParentFunc = null; //第二層用のcDNIの学習を実行 Real cDNI2loss = new MeanSquaredError <Real>().Evaluate(cDNI2Result, new NdArray <Real>(layer2ResultDataSet.Result.Grad, cDNI2Result.Shape, cDNI2Result.BatchCount)); L3adam.Update(); cDNI2.Backward(cDNI2Result); cDNI2adam.Update(); cDNI2totalLoss += cDNI2loss; cDNI2totalLossCount++; //第四層を実行 NdArray <Real> layer4ForwardResult = Layer4.Forward(layer3ResultDataSet.Result)[0]; //第四層の傾きを取得 Real sumLoss = new SoftmaxCrossEntropy <Real>().Evaluate(layer4ForwardResult, layer3ResultDataSet.Label); //第四層を更新 Layer4.Backward(layer4ForwardResult); layer4ForwardResult.ParentFunc = null; totalLoss += sumLoss; totalLossCount++; //第三層用のcDNIの学習を実行 Real cDNI3loss = new MeanSquaredError <Real>().Evaluate(cDNI3Result, new NdArray <Real>(layer3ResultDataSet.Result.Grad, cDNI3Result.Shape, cDNI3Result.BatchCount)); L4adam.Update(); cDNI3.Backward(cDNI3Result); cDNI3adam.Update(); cDNI3totalLoss += cDNI3loss; cDNI3totalLossCount++; Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //結果出力 Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); Console.WriteLine("\ncDNI1 total loss " + cDNI1totalLoss / cDNI1totalLossCount); Console.WriteLine("cDNI2 total loss " + cDNI2totalLoss / cDNI2totalLossCount); Console.WriteLine("cDNI3 total loss " + cDNI3totalLoss / cDNI3totalLossCount); Console.WriteLine("\ncDNI1 local loss " + cDNI1loss); Console.WriteLine("cDNI2 local loss " + cDNI2loss); Console.WriteLine("cDNI3 local loss " + cDNI3loss); //20回バッチを動かしたら精度をテストする if (i % 20 == 0) { Console.WriteLine("\nTesting..."); //テストデータからランダムにデータを取得 TestDataSet <Real> datasetY = mnistData.Eval.GetRandomDataSet(TEST_DATA_COUNT); //テストを実行 Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); Console.WriteLine("accuracy " + accuracy); } } } }
public static void Run() { // Prepare MNIST data RILogManager.Default?.SendDebug("MNIST Data Loading..."); MnistData mnistData = new MnistData(28); RILogManager.Default?.SendDebug("Training Start..."); // Write the network configuration in FunctionStack FunctionStack Layer1 = new FunctionStack("Test11 Layer 1", new Linear(true, 28 * 28, 256, name: "l1 Linear"), new BatchNormalization(true, 256, name: "l1 Norm"), new ReLU(name: "l1 ReLU") ); FunctionStack Layer2 = new FunctionStack("Test11 Layer 2", new Linear(true, 256, 256, name: "l2 Linear"), new BatchNormalization(true, 256, name: "l2 Norm"), new ReLU(name: "l2 ReLU") ); FunctionStack Layer3 = new FunctionStack("Test11 Layer 3", new Linear(true, 256, 256, name: "l3 Linear"), new BatchNormalization(true, 256, name: "l3 Norm"), new ReLU(name: "l3 ReLU") ); FunctionStack Layer4 = new FunctionStack("Test11 Layer 4", new Linear(true, 256, 10, name: "l4 Linear") ); // Function stack itself is also stacked as Function FunctionStack nn = new FunctionStack ("Test11", Layer1, Layer2, Layer3, Layer4 ); FunctionStack DNI1 = new FunctionStack("Test11 DNI1", new Linear(true, 256, 1024, name: "DNI1 Linear1"), new BatchNormalization(true, 1024, name: "DNI1 Norm1"), new ReLU(name: "DNI1 ReLU1"), new Linear(true, 1024, 1024, name: "DNI1 Linear2"), new BatchNormalization(true, 1024, name: "DNI1 Norm2"), new ReLU(name: "DNI1 ReLU2"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack DNI2 = new FunctionStack("Test11 DNI2", new Linear(true, 256, 1024, name: "DNI2 Linear1"), new BatchNormalization(true, 1024, name: "DNI2 Norm1"), new ReLU(name: "DNI2 ReLU1"), new Linear(true, 1024, 1024, name: "DNI2 Linear2"), new BatchNormalization(true, 1024, name: "DNI2 Norm2"), new ReLU(name: "DNI2 ReLU2"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "DNI2 Linear3") ); FunctionStack DNI3 = new FunctionStack("Test11 DNI3", new Linear(true, 256, 1024, name: "DNI3 Linear1"), new BatchNormalization(true, 1024, name: "DNI3 Norm1"), new ReLU(name: "DNI3 ReLU1"), new Linear(true, 1024, 1024, name: "DNI3 Linear2"), new BatchNormalization(true, 1024, name: "DNI3 Norm2"), new ReLU(name: "DNI3 ReLU2"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "DNI3 Linear3") ); //optimizer Layer1.SetOptimizer(new Adam()); Layer2.SetOptimizer(new Adam()); Layer3.SetOptimizer(new Adam()); Layer4.SetOptimizer(new Adam()); DNI1.SetOptimizer(new Adam()); DNI2.SetOptimizer(new Adam()); DNI3.SetOptimizer(new Adam()); // Three generations learning for (int epoch = 0; epoch < 20; epoch++) { RILogManager.Default?.SendDebug("epoch " + (epoch + 1)); Real totalLoss = 0; Real DNI1totalLoss = 0; Real DNI2totalLoss = 0; Real DNI3totalLoss = 0; long totalLossCount = 0; long DNI1totalLossCount = 0; long DNI2totalLossCount = 0; long DNI3totalLossCount = 0; // how many times to run the batch for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { // Get data randomly from the training data TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT, 28, 28); // Run first tier NdArray[] layer1ForwardResult = Layer1.Forward(true, datasetX.Data); // Obtain the slope of the first layer NdArray[] DNI1Result = DNI1.Forward(true, layer1ForwardResult); // Apply the slope of the first layer layer1ForwardResult[0].Grad = DNI1Result[0].Data.ToArray(); // Update first layer Layer1.Backward(true, layer1ForwardResult); layer1ForwardResult[0].ParentFunc = null; // Backward was executed and cut off calculation graph Layer1.Update(); // Run Layer 2 NdArray[] layer2ForwardResult = Layer2.Forward(true, layer1ForwardResult); // Get the inclination of the second layer NdArray[] DNI2Result = DNI2.Forward(true, layer2ForwardResult); // Apply the slope of the second layer layer2ForwardResult[0].Grad = DNI2Result[0].Data.ToArray(); // Update layer 2 Layer2.Backward(true, layer2ForwardResult); layer2ForwardResult[0].ParentFunc = null; // Learn DNI for first tier Real DNI1loss = new MeanSquaredError().Evaluate(DNI1Result, new NdArray(layer1ForwardResult[0].Grad, DNI1Result[0].Shape, DNI1Result[0].BatchCount)); Layer2.Update(); DNI1.Backward(true, DNI1Result); DNI1.Update(); DNI1totalLoss += DNI1loss; DNI1totalLossCount++; // run layer 3 NdArray[] layer3ForwardResult = Layer3.Forward(true, layer2ForwardResult); // Get the inclination of the third layer NdArray[] DNI3Result = DNI3.Forward(true, layer3ForwardResult); // Apply the slope of the third layer layer3ForwardResult[0].Grad = DNI3Result[0].Data.ToArray(); // Update layer 3 Layer3.Backward(true, layer3ForwardResult); layer3ForwardResult[0].ParentFunc = null; // Run DNI learning for layer 2 Real DNI2loss = new MeanSquaredError().Evaluate(DNI2Result, new NdArray(layer2ForwardResult[0].Grad, DNI2Result[0].Shape, DNI2Result[0].BatchCount)); Layer3.Update(); DNI2.Backward(true, DNI2Result); DNI2.Update(); DNI2totalLoss += DNI2loss; DNI2totalLossCount++; // run layer 4 NdArray[] layer4ForwardResult = Layer4.Forward(true, layer3ForwardResult); // Obtain the slope of the fourth layer Real sumLoss = new SoftmaxCrossEntropy().Evaluate(layer4ForwardResult, datasetX.Label); // Update fourth layer Layer4.Backward(true, layer4ForwardResult); layer4ForwardResult[0].ParentFunc = null; totalLoss += sumLoss; totalLossCount++; // Run DNI learning for layer 3 Real DNI3loss = new MeanSquaredError().Evaluate(DNI3Result, new NdArray(layer3ForwardResult[0].Grad, DNI3Result[0].Shape, DNI3Result[0].BatchCount)); Layer4.Update(); DNI3.Backward(true, DNI3Result); DNI3.Update(); DNI3totalLoss += DNI3loss; DNI3totalLossCount++; RILogManager.Default?.SendDebug("batch count " + i + "/" + TRAIN_DATA_COUNT); RILogManager.Default?.SendDebug("total loss " + totalLoss / totalLossCount); RILogManager.Default?.SendDebug("local loss " + sumLoss); RILogManager.Default?.SendDebug("DNI1 total loss " + DNI1totalLoss / DNI1totalLossCount); RILogManager.Default?.SendDebug("DNI2 total loss " + DNI2totalLoss / DNI2totalLossCount); RILogManager.Default?.SendDebug("DNI3 total loss " + DNI3totalLoss / DNI3totalLossCount); RILogManager.Default?.SendDebug("DNI1 local loss " + DNI1loss); RILogManager.Default?.SendDebug("DNI2 local loss " + DNI2loss); RILogManager.Default?.SendDebug("DNI3 local loss " + DNI3loss); // Test the accuracy if you move the batch 20 times if (i % 20 == 0) { RILogManager.Default?.SendDebug("Testing..."); // Get data randomly from test data TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT, 28); // Run test Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); RILogManager.Default?.SendDebug("accuracy " + accuracy); } } } }