static Real ComputeLoss(FunctionStack model, NdArray[] sequences) { Ensure.Argument(model).NotNull(); Ensure.Argument(sequences).NotNull(); // Total error in the whole Real totalLoss = 0; NdArray x = new NdArray(new[] { 1 }, MINI_BATCH_SIZE, (Function)null); NdArray t = new NdArray(new[] { 1 }, MINI_BATCH_SIZE, (Function)null); Stack <NdArray[]> backNdArrays = new Stack <NdArray[]>(); for (int i = 0; i < LENGTH_OF_SEQUENCE - 1; i++) { for (int j = 0; j < MINI_BATCH_SIZE; j++) { x.Data[j] = sequences[j].Data[i]; t.Data[j] = sequences[j].Data[i + 1]; } NdArray[] result = model.Forward(true, x); totalLoss += new MeanSquaredError().Evaluate(result, t); backNdArrays.Push(result); } for (int i = 0; backNdArrays.Count > 0; i++) { model.Backward(true, backNdArrays.Pop()); } return(totalLoss / (LENGTH_OF_SEQUENCE - 1)); }
static Real ComputeLoss(FunctionStack model, NdArray[] sequences) { //全体での誤差を集計 Real totalLoss = 0; NdArray x = new NdArray(new[] { 1 }, MINI_BATCH_SIZE); NdArray t = new NdArray(new[] { 1 }, MINI_BATCH_SIZE); Stack <NdArray[]> backNdArrays = new Stack <NdArray[]>(); for (int i = 0; i < LENGTH_OF_SEQUENCE - 1; i++) { for (int j = 0; j < MINI_BATCH_SIZE; j++) { x.Data[j] = sequences[j].Data[i]; t.Data[j] = sequences[j].Data[i + 1]; } NdArray[] result = model.Forward(x); totalLoss += new MeanSquaredError().Evaluate(result, t); backNdArrays.Push(result); } for (int i = 0; backNdArrays.Count > 0; i++) { model.Backward(backNdArrays.Pop()); } return(totalLoss / (LENGTH_OF_SEQUENCE - 1)); }
//バッチで学習処理を行う public static Real Train(FunctionStack functionStack, NdArray input, NdArray teach, LossFunction lossFunction, bool isUpdate = true) { //結果の誤差保存用 NdArray[] result = functionStack.Forward(input); Real sumLoss = lossFunction.Evaluate(result, teach); //Backwardのバッチを実行 functionStack.Backward(result); //更新 if (isUpdate) { functionStack.Update(); } return(sumLoss); }
//Perform learning process in batch public static Real Train(FunctionStack functionStack, NdArray input, NdArray teach, LossFunction lossFunction, bool isUpdate = true) { //For preserving error of result NdArray[] result = functionStack.Forward(input); Real sumLoss = lossFunction.Evaluate(result, teach); //Run Backward's batch functionStack.Backward(result); //update if (isUpdate) { functionStack.Update(); } return(sumLoss); }
//////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> Do a learning process with a batch. </summary> /// /// <param name="functionStack"> Stack of functions. </param> /// <param name="input"> The input data. </param> /// <param name="teach"> The teaching data. </param> /// <param name="lossFunction"> The loss function. </param> /// <param name="isUpdate"> (Optional) True if this object is being updated. </param> /// /// <returns> A Real. </returns> //////////////////////////////////////////////////////////////////////////////////////////////////// public static Real Train([NotNull] FunctionStack functionStack, [CanBeNull] NdArray input, [CanBeNull] NdArray teach, [NotNull] LossFunction lossFunction, bool isUpdate = true, bool verbose = true) { if (verbose) { RILogManager.Default?.EnterMethod("Training " + functionStack.Name); } if (verbose) { RILogManager.Default?.SendDebug("Forward propagation"); } NdArray[] result = functionStack.Forward(verbose, input); if (verbose) { RILogManager.Default?.SendDebug("Evaluating loss"); } Real sumLoss = lossFunction.Evaluate(result, teach); // Run Backward batch if (verbose) { RILogManager.Default?.SendDebug("Backward propagation"); } functionStack.Backward(verbose, result); if (isUpdate) { if (verbose) { RILogManager.Default?.SendDebug("Updating stack"); } functionStack.Update(); } if (verbose) { RILogManager.Default?.ExitMethod("Training " + functionStack.Name); RILogManager.Default?.ViewerSendWatch("Local Loss", sumLoss.ToString(), sumLoss); } return(sumLoss); }
static Real ComputeLoss(FunctionStack <Real> model, NdArray <Real>[] sequences) { //全体での誤差を集計 Real totalLoss = 0; NdArray <Real> x = new NdArray <Real>(new[] { 1 }, MINI_BATCH_SIZE); NdArray <Real> t = new NdArray <Real>(new[] { 1 }, MINI_BATCH_SIZE); for (int i = 0; i < LENGTH_OF_SEQUENCE - 1; i++) { for (int j = 0; j < MINI_BATCH_SIZE; j++) { x.Data[j] = sequences[j].Data[i]; t.Data[j] = sequences[j].Data[i + 1]; } NdArray <Real> result = model.Forward(x)[0]; totalLoss += new MeanSquaredError <Real>().Evaluate(result, t); model.Backward(result); } return(totalLoss / (LENGTH_OF_SEQUENCE - 1)); }
public static void Run() { //MNISTのデータを用意する Console.WriteLine("MNIST Data Loading..."); MnistData mnistData = new MnistData(); Console.WriteLine("Training Start..."); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack Layer1 = new FunctionStack( new Linear(28 * 28, 256, name: "l1 Linear"), new BatchNormalization(256, name: "l1 Norm"), new ReLU(name: "l1 ReLU") ); FunctionStack Layer2 = new FunctionStack( new Linear(256, 256, name: "l2 Linear"), new BatchNormalization(256, name: "l2 Norm"), new ReLU(name: "l2 ReLU") ); FunctionStack Layer3 = new FunctionStack( new Linear(256, 256, name: "l3 Linear"), new BatchNormalization(256, name: "l3 Norm"), new ReLU(name: "l3 ReLU") ); FunctionStack Layer4 = new FunctionStack( new Linear(256, 10, name: "l4 Linear") ); //FunctionStack自身もFunctionとして積み上げられる FunctionStack nn = new FunctionStack ( Layer1, Layer2, Layer3, Layer4 ); FunctionStack DNI1 = new FunctionStack( new Linear(256, 1024, name: "DNI1 Linear1"), new BatchNormalization(1024, name: "DNI1 Nrom1"), new ReLU(name: "DNI1 ReLU1"), new Linear(1024, 1024, name: "DNI1 Linear2"), new BatchNormalization(1024, name: "DNI1 Nrom2"), new ReLU(name: "DNI1 ReLU2"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack DNI2 = new FunctionStack( new Linear(256, 1024, name: "DNI2 Linear1"), new BatchNormalization(1024, name: "DNI2 Nrom1"), new ReLU(name: "DNI2 ReLU1"), new Linear(1024, 1024, name: "DNI2 Linear2"), new BatchNormalization(1024, name: "DNI2 Nrom2"), new ReLU(name: "DNI2 ReLU2"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI2 Linear3") ); FunctionStack DNI3 = new FunctionStack( new Linear(256, 1024, name: "DNI3 Linear1"), new BatchNormalization(1024, name: "DNI3 Nrom1"), new ReLU(name: "DNI3 ReLU1"), new Linear(1024, 1024, name: "DNI3 Linear2"), new BatchNormalization(1024, name: "DNI3 Nrom2"), new ReLU(name: "DNI3 ReLU2"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI3 Linear3") ); //optimizerを宣言 Layer1.SetOptimizer(new Adam()); Layer2.SetOptimizer(new Adam()); Layer3.SetOptimizer(new Adam()); Layer4.SetOptimizer(new Adam()); DNI1.SetOptimizer(new Adam()); DNI2.SetOptimizer(new Adam()); DNI3.SetOptimizer(new Adam()); //三世代学習 for (int epoch = 0; epoch < 20; epoch++) { Console.WriteLine("epoch " + (epoch + 1)); Real totalLoss = 0; Real DNI1totalLoss = 0; Real DNI2totalLoss = 0; Real DNI3totalLoss = 0; long totalLossCount = 0; long DNI1totalLossCount = 0; long DNI2totalLossCount = 0; long DNI3totalLossCount = 0; //何回バッチを実行するか for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //訓練データからランダムにデータを取得 TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT); //第一層を実行 NdArray[] layer1ForwardResult = Layer1.Forward(datasetX.Data); //第一層の傾きを取得 NdArray[] DNI1Result = DNI1.Forward(layer1ForwardResult); //第一層の傾きを適用 layer1ForwardResult[0].Grad = DNI1Result[0].Data.ToArray(); //第一層を更新 Layer1.Backward(layer1ForwardResult); layer1ForwardResult[0].ParentFunc = null; //Backwardを実行したので計算グラフを切っておく Layer1.Update(); //第二層を実行 NdArray[] layer2ForwardResult = Layer2.Forward(layer1ForwardResult); //第二層の傾きを取得 NdArray[] DNI2Result = DNI2.Forward(layer2ForwardResult); //第二層の傾きを適用 layer2ForwardResult[0].Grad = DNI2Result[0].Data.ToArray(); //第二層を更新 Layer2.Backward(layer2ForwardResult); layer2ForwardResult[0].ParentFunc = null; //第一層用のDNIの学習を実行 Real DNI1loss = new MeanSquaredError().Evaluate(DNI1Result, new NdArray(layer1ForwardResult[0].Grad, DNI1Result[0].Shape, DNI1Result[0].BatchCount)); Layer2.Update(); DNI1.Backward(DNI1Result); DNI1.Update(); DNI1totalLoss += DNI1loss; DNI1totalLossCount++; //第三層を実行 NdArray[] layer3ForwardResult = Layer3.Forward(layer2ForwardResult); //第三層の傾きを取得 NdArray[] DNI3Result = DNI3.Forward(layer3ForwardResult); //第三層の傾きを適用 layer3ForwardResult[0].Grad = DNI3Result[0].Data.ToArray(); //第三層を更新 Layer3.Backward(layer3ForwardResult); layer3ForwardResult[0].ParentFunc = null; //第二層用のDNIの学習を実行 Real DNI2loss = new MeanSquaredError().Evaluate(DNI2Result, new NdArray(layer2ForwardResult[0].Grad, DNI2Result[0].Shape, DNI2Result[0].BatchCount)); Layer3.Update(); DNI2.Backward(DNI2Result); DNI2.Update(); DNI2totalLoss += DNI2loss; DNI2totalLossCount++; //第四層を実行 NdArray[] layer4ForwardResult = Layer4.Forward(layer3ForwardResult); //第四層の傾きを取得 Real sumLoss = new SoftmaxCrossEntropy().Evaluate(layer4ForwardResult, datasetX.Label); //第四層を更新 Layer4.Backward(layer4ForwardResult); layer4ForwardResult[0].ParentFunc = null; totalLoss += sumLoss; totalLossCount++; //第三層用のDNIの学習を実行 Real DNI3loss = new MeanSquaredError().Evaluate(DNI3Result, new NdArray(layer3ForwardResult[0].Grad, DNI3Result[0].Shape, DNI3Result[0].BatchCount)); Layer4.Update(); DNI3.Backward(DNI3Result); DNI3.Update(); DNI3totalLoss += DNI3loss; DNI3totalLossCount++; Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //結果出力 Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); Console.WriteLine("\nDNI1 total loss " + DNI1totalLoss / DNI1totalLossCount); Console.WriteLine("DNI2 total loss " + DNI2totalLoss / DNI2totalLossCount); Console.WriteLine("DNI3 total loss " + DNI3totalLoss / DNI3totalLossCount); Console.WriteLine("\nDNI1 local loss " + DNI1loss); Console.WriteLine("DNI2 local loss " + DNI2loss); Console.WriteLine("DNI3 local loss " + DNI3loss); //20回バッチを動かしたら精度をテストする if (i % 20 == 0) { Console.WriteLine("\nTesting..."); //テストデータからランダムにデータを取得 TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT); //テストを実行 Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); Console.WriteLine("accuracy " + accuracy); } } } }
public static void Run() { Console.WriteLine("Build Vocabulary."); Vocabulary vocabulary = new Vocabulary(); string trainPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TRAIN_FILE, TRAIN_FILE); string validPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + VALID_FILE, VALID_FILE); string testPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TEST_FILE, TEST_FILE); int[] trainData = vocabulary.LoadData(trainPath); int[] validData = vocabulary.LoadData(validPath); int[] testData = vocabulary.LoadData(testPath); int nVocab = vocabulary.Length; Console.WriteLine("Network Initilizing."); FunctionStack model = new FunctionStack( new EmbedID(nVocab, N_UNITS, name: "l1 EmbedID"), new Dropout(), new LSTM(N_UNITS, N_UNITS, name: "l2 LSTM"), new Dropout(), new LSTM(N_UNITS, N_UNITS, name: "l3 LSTM"), new Dropout(), new Linear(N_UNITS, nVocab, name: "l4 Linear") ); //与えられたthresholdで頭打ちではなく、全パラメータのL2Normからレートを取り補正を行う GradientClipping gradientClipping = new GradientClipping(threshold: GRAD_CLIP); SGD sgd = new SGD(learningRate: 1); model.SetOptimizer(gradientClipping, sgd); Real wholeLen = trainData.Length; int jump = (int)Math.Floor(wholeLen / BATCH_SIZE); int epoch = 0; Stack <NdArray[]> backNdArrays = new Stack <NdArray[]>(); Console.WriteLine("Train Start."); for (int i = 0; i < jump * N_EPOCH; i++) { NdArray x = new NdArray(new[] { 1 }, BATCH_SIZE); NdArray t = new NdArray(new[] { 1 }, BATCH_SIZE); for (int j = 0; j < BATCH_SIZE; j++) { x.Data[j] = trainData[(int)((jump * j + i) % wholeLen)]; t.Data[j] = trainData[(int)((jump * j + i + 1) % wholeLen)]; } NdArray[] result = model.Forward(x); Real sumLoss = new SoftmaxCrossEntropy().Evaluate(result, t); backNdArrays.Push(result); Console.WriteLine("[{0}/{1}] Loss: {2}", i + 1, jump, sumLoss); //Run truncated BPTT if ((i + 1) % BPROP_LEN == 0) { for (int j = 0; backNdArrays.Count > 0; j++) { Console.WriteLine("backward" + backNdArrays.Count); model.Backward(backNdArrays.Pop()); } model.Update(); model.ResetState(); } if ((i + 1) % jump == 0) { epoch++; Console.WriteLine("evaluate"); Console.WriteLine("validation perplexity: {0}", Evaluate(model, validData)); if (epoch >= 6) { sgd.LearningRate /= 1.2; Console.WriteLine("learning rate =" + sgd.LearningRate); } } } Console.WriteLine("test start"); Console.WriteLine("test perplexity:" + Evaluate(model, testData)); }
public static void Run() { _outputStream = File.Create(LogPath); _logWriter = new HistogramLogWriter(_outputStream); _logWriter.Write(DateTime.Now); var recorder = HistogramFactory .With64BitBucketSize() ?.WithValuesFrom(1) ?.WithValuesUpTo(2345678912345) ?.WithPrecisionOf(3) ?.WithThreadSafeWrites() ?.WithThreadSafeReads() ?.Create(); var accumulatingHistogram = new LongHistogram(2345678912345, 3); var size = accumulatingHistogram.GetEstimatedFootprintInBytes(); RILogManager.Default?.SendDebug("Histogram size = {0} bytes ({1:F2} MB)", size, size / 1024.0 / 1024.0); RILogManager.Default?.SendDebug("Recorded latencies [in system clock ticks]"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.None, useCsvFormat: true); Console.WriteLine(); RILogManager.Default?.SendDebug("Recorded latencies [in usec]"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToMicroseconds, useCsvFormat: true); Console.WriteLine(); RILogManager.Default?.SendDebug("Recorded latencies [in msec]"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToMilliseconds, useCsvFormat: true); Console.WriteLine(); RILogManager.Default?.SendDebug("Recorded latencies [in sec]"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToSeconds, useCsvFormat: true); DocumentResults(accumulatingHistogram, recorder); RILogManager.Default?.SendDebug("Build Vocabulary."); DocumentResults(accumulatingHistogram, recorder); Vocabulary vocabulary = new Vocabulary(); DocumentResults(accumulatingHistogram, recorder); string trainPath = InternetFileDownloader.Download(DOWNLOAD_URL + TRAIN_FILE, TRAIN_FILE); DocumentResults(accumulatingHistogram, recorder); string validPath = InternetFileDownloader.Download(DOWNLOAD_URL + VALID_FILE, VALID_FILE); DocumentResults(accumulatingHistogram, recorder); string testPath = InternetFileDownloader.Download(DOWNLOAD_URL + TEST_FILE, TEST_FILE); DocumentResults(accumulatingHistogram, recorder); int[] trainData = vocabulary.LoadData(trainPath); DocumentResults(accumulatingHistogram, recorder); int[] validData = vocabulary.LoadData(validPath); DocumentResults(accumulatingHistogram, recorder); int[] testData = vocabulary.LoadData(testPath); DocumentResults(accumulatingHistogram, recorder); int nVocab = vocabulary.Length; RILogManager.Default?.SendDebug("Network Initializing."); FunctionStack model = new FunctionStack("Test10", new EmbedID(nVocab, N_UNITS, name: "l1 EmbedID"), new Dropout(), new LSTM(true, N_UNITS, N_UNITS, name: "l2 LSTM"), new Dropout(), new LSTM(true, N_UNITS, N_UNITS, name: "l3 LSTM"), new Dropout(), new Linear(true, N_UNITS, nVocab, name: "l4 Linear") ); DocumentResults(accumulatingHistogram, recorder); // Do not cease at the given threshold, correct the rate by taking the rate from L2Norm of all parameters GradientClipping gradientClipping = new GradientClipping(threshold: GRAD_CLIP); SGD sgd = new SGD(learningRate: 1); model.SetOptimizer(gradientClipping, sgd); DocumentResults(accumulatingHistogram, recorder); Real wholeLen = trainData.Length; int jump = (int)Math.Floor(wholeLen / BATCH_SIZE); int epoch = 0; Stack <NdArray[]> backNdArrays = new Stack <NdArray[]>(); RILogManager.Default?.SendDebug("Train Start."); double dVal; NdArray x = new NdArray(new[] { 1 }, BATCH_SIZE, (Function)null); NdArray t = new NdArray(new[] { 1 }, BATCH_SIZE, (Function)null); for (int i = 0; i < jump * N_EPOCH; i++) { for (int j = 0; j < BATCH_SIZE; j++) { x.Data[j] = trainData[(int)((jump * j + i) % wholeLen)]; t.Data[j] = trainData[(int)((jump * j + i + 1) % wholeLen)]; } NdArray[] result = model.Forward(true, x); Real sumLoss = new SoftmaxCrossEntropy().Evaluate(result, t); backNdArrays.Push(result); RILogManager.Default?.SendDebug("[{0}/{1}] Loss: {2}", i + 1, jump, sumLoss); //Run truncated BPTT if ((i + 1) % BPROP_LEN == 0) { for (int j = 0; backNdArrays.Count > 0; j++) { RILogManager.Default?.SendDebug("backward" + backNdArrays.Count); model.Backward(true, backNdArrays.Pop()); } model.Update(); model.ResetState(); } if ((i + 1) % jump == 0) { epoch++; RILogManager.Default?.SendDebug("evaluate"); dVal = Evaluate(model, validData); RILogManager.Default?.SendDebug($"validation perplexity: {dVal}"); if (epoch >= 6) { sgd.LearningRate /= 1.2; RILogManager.Default?.SendDebug("learning rate =" + sgd.LearningRate); } } DocumentResults(accumulatingHistogram, recorder); } RILogManager.Default?.SendDebug("test start"); dVal = Evaluate(model, testData); RILogManager.Default?.SendDebug("test perplexity:" + dVal); DocumentResults(accumulatingHistogram, recorder); _logWriter.Dispose(); _outputStream.Dispose(); RILogManager.Default?.SendDebug("Log contents"); RILogManager.Default?.SendDebug(File.ReadAllText(LogPath)); Console.WriteLine(); RILogManager.Default?.SendDebug("Percentile distribution (values reported in milliseconds)"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToMilliseconds, useCsvFormat: true); RILogManager.Default?.SendDebug("Mean: " + BytesToString(accumulatingHistogram.GetMean()) + ", StdDev: " + BytesToString(accumulatingHistogram.GetStdDeviation())); }
public static void Run() { //MNISTのデータを用意する Console.WriteLine("MNIST Data Loading..."); MnistData <Real> mnistData = new MnistData <Real>(); Console.WriteLine("Training Start..."); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack <Real> Layer1 = new FunctionStack <Real>( new Linear <Real>(28 * 28, 256, name: "l1 Linear"), new BatchNormalization <Real>(256, name: "l1 Norm"), new ReLU <Real>(name: "l1 ReLU") ); FunctionStack <Real> Layer2 = new FunctionStack <Real>( new Linear <Real>(256, 256, name: "l2 Linear"), new BatchNormalization <Real>(256, name: "l2 Norm"), new ReLU <Real>(name: "l2 ReLU") ); FunctionStack <Real> Layer3 = new FunctionStack <Real>( new Linear <Real>(256, 256, name: "l3 Linear"), new BatchNormalization <Real>(256, name: "l3 Norm"), new ReLU <Real>(name: "l3 ReLU") ); FunctionStack <Real> Layer4 = new FunctionStack <Real>( new Linear <Real>(256, 10, name: "l4 Linear") ); //FunctionStack自身もFunctionとして積み上げられる FunctionStack <Real> nn = new FunctionStack <Real> ( Layer1, Layer2, Layer3, Layer4 ); FunctionStack <Real> cDNI1 = new FunctionStack <Real>( new Linear <Real>(256 + 10, 1024, name: "cDNI1 Linear1"), new BatchNormalization <Real>(1024, name: "cDNI1 Nrom1"), new ReLU <Real>(name: "cDNI1 ReLU1"), new Linear <Real>(1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack <Real> cDNI2 = new FunctionStack <Real>( new Linear <Real>(256 + 10, 1024, name: "cDNI2 Linear1"), new BatchNormalization <Real>(1024, name: "cDNI2 Nrom1"), new ReLU <Real>(name: "cDNI2 ReLU1"), new Linear <Real>(1024, 256, initialW: new Real[1024, 256], name: "cDNI2 Linear3") ); FunctionStack <Real> cDNI3 = new FunctionStack <Real>( new Linear <Real>(256 + 10, 1024, name: "cDNI3 Linear1"), new BatchNormalization <Real>(1024, name: "cDNI3 Nrom1"), new ReLU <Real>(name: "cDNI3 ReLU1"), new Linear <Real>(1024, 256, initialW: new Real[1024, 256], name: "cDNI3 Linear3") ); //optimizerを宣言 //optimizerを宣言 Adam <Real> L1adam = new Adam <Real>(0.00003f); Adam <Real> L2adam = new Adam <Real>(0.00003f); Adam <Real> L3adam = new Adam <Real>(0.00003f); Adam <Real> L4adam = new Adam <Real>(0.00003f); L1adam.SetUp(Layer1); L2adam.SetUp(Layer2); L3adam.SetUp(Layer3); L4adam.SetUp(Layer4); Adam <Real> cDNI1adam = new Adam <Real>(0.00003f); Adam <Real> cDNI2adam = new Adam <Real>(0.00003f); Adam <Real> cDNI3adam = new Adam <Real>(0.00003f); cDNI1adam.SetUp(cDNI1); cDNI2adam.SetUp(cDNI2); cDNI3adam.SetUp(cDNI3); for (int epoch = 0; epoch < 10; epoch++) { Console.WriteLine("epoch " + (epoch + 1)); //全体での誤差を集計 Real totalLoss = 0; Real cDNI1totalLoss = 0; Real cDNI2totalLoss = 0; Real cDNI3totalLoss = 0; long totalLossCount = 0; long cDNI1totalLossCount = 0; long cDNI2totalLossCount = 0; long cDNI3totalLossCount = 0; //何回バッチを実行するか for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //訓練データからランダムにデータを取得 TestDataSet <Real> datasetX = mnistData.Train.GetRandomDataSet(BATCH_DATA_COUNT); //第一層を実行 NdArray <Real> layer1ForwardResult = Layer1.Forward(datasetX.Data)[0]; ResultDataSet layer1ResultDataSet = new ResultDataSet(layer1ForwardResult, datasetX.Label); //第一層の傾きを取得 NdArray <Real> cDNI1Result = cDNI1.Forward(layer1ResultDataSet.GetTrainData())[0]; //第一層の傾きを適用 layer1ForwardResult.Grad = cDNI1Result.Data.ToArray(); //第一層を更新 Layer1.Backward(layer1ForwardResult); layer1ForwardResult.ParentFunc = null; L1adam.Update(); //第二層を実行 NdArray <Real> layer2ForwardResult = Layer2.Forward(layer1ResultDataSet.Result)[0]; ResultDataSet layer2ResultDataSet = new ResultDataSet(layer2ForwardResult, layer1ResultDataSet.Label); //第二層の傾きを取得 NdArray <Real> cDNI2Result = cDNI2.Forward(layer2ResultDataSet.GetTrainData())[0]; //第二層の傾きを適用 layer2ForwardResult.Grad = cDNI2Result.Data.ToArray(); //第二層を更新 Layer2.Backward(layer2ForwardResult); layer2ForwardResult.ParentFunc = null; //第一層用のcDNIの学習を実行 Real cDNI1loss = new MeanSquaredError <Real>().Evaluate(cDNI1Result, new NdArray <Real>(layer1ResultDataSet.Result.Grad, cDNI1Result.Shape, cDNI1Result.BatchCount)); L2adam.Update(); cDNI1.Backward(cDNI1Result); cDNI1adam.Update(); cDNI1totalLoss += cDNI1loss; cDNI1totalLossCount++; //第三層を実行 NdArray <Real> layer3ForwardResult = Layer3.Forward(layer2ResultDataSet.Result)[0]; ResultDataSet layer3ResultDataSet = new ResultDataSet(layer3ForwardResult, layer2ResultDataSet.Label); //第三層の傾きを取得 NdArray <Real> cDNI3Result = cDNI3.Forward(layer3ResultDataSet.GetTrainData())[0]; //第三層の傾きを適用 layer3ForwardResult.Grad = cDNI3Result.Data.ToArray(); //第三層を更新 Layer3.Backward(layer3ForwardResult); layer3ForwardResult.ParentFunc = null; //第二層用のcDNIの学習を実行 Real cDNI2loss = new MeanSquaredError <Real>().Evaluate(cDNI2Result, new NdArray <Real>(layer2ResultDataSet.Result.Grad, cDNI2Result.Shape, cDNI2Result.BatchCount)); L3adam.Update(); cDNI2.Backward(cDNI2Result); cDNI2adam.Update(); cDNI2totalLoss += cDNI2loss; cDNI2totalLossCount++; //第四層を実行 NdArray <Real> layer4ForwardResult = Layer4.Forward(layer3ResultDataSet.Result)[0]; //第四層の傾きを取得 Real sumLoss = new SoftmaxCrossEntropy <Real>().Evaluate(layer4ForwardResult, layer3ResultDataSet.Label); //第四層を更新 Layer4.Backward(layer4ForwardResult); layer4ForwardResult.ParentFunc = null; totalLoss += sumLoss; totalLossCount++; //第三層用のcDNIの学習を実行 Real cDNI3loss = new MeanSquaredError <Real>().Evaluate(cDNI3Result, new NdArray <Real>(layer3ResultDataSet.Result.Grad, cDNI3Result.Shape, cDNI3Result.BatchCount)); L4adam.Update(); cDNI3.Backward(cDNI3Result); cDNI3adam.Update(); cDNI3totalLoss += cDNI3loss; cDNI3totalLossCount++; Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //結果出力 Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); Console.WriteLine("\ncDNI1 total loss " + cDNI1totalLoss / cDNI1totalLossCount); Console.WriteLine("cDNI2 total loss " + cDNI2totalLoss / cDNI2totalLossCount); Console.WriteLine("cDNI3 total loss " + cDNI3totalLoss / cDNI3totalLossCount); Console.WriteLine("\ncDNI1 local loss " + cDNI1loss); Console.WriteLine("cDNI2 local loss " + cDNI2loss); Console.WriteLine("cDNI3 local loss " + cDNI3loss); //20回バッチを動かしたら精度をテストする if (i % 20 == 0) { Console.WriteLine("\nTesting..."); //テストデータからランダムにデータを取得 TestDataSet <Real> datasetY = mnistData.Eval.GetRandomDataSet(TEST_DATA_COUNT); //テストを実行 Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); Console.WriteLine("accuracy " + accuracy); } } } }
public static void Run() { //Prepare MNIST data Console.WriteLine("MNIST Data Loading..."); MnistData mnistData = new MnistData(); Console.WriteLine("Training Start..."); //Writing the network configuration in FunctionStack FunctionStack Layer1 = new FunctionStack( new Linear(28 * 28, 256, name: "l1 Linear"), new BatchNormalization(256, name: "l1 Norm"), new ReLU(name: "l1 ReLU") ); FunctionStack Layer2 = new FunctionStack( new Linear(256, 256, name: "l2 Linear"), new BatchNormalization(256, name: "l2 Norm"), new ReLU(name: "l2 ReLU") ); FunctionStack Layer3 = new FunctionStack( new Linear(256, 256, name: "l3 Linear"), new BatchNormalization(256, name: "l3 Norm"), new ReLU(name: "l3 ReLU") ); FunctionStack Layer4 = new FunctionStack( new Linear(256, 10, name: "l4 Linear") ); //FunctionStack itself is also stacked as Function FunctionStack nn = new FunctionStack ( Layer1, Layer2, Layer3, Layer4 ); FunctionStack cDNI1 = new FunctionStack( new Linear(256 + 10, 1024, name: "cDNI1 Linear1"), new BatchNormalization(1024, name: "cDNI1 Nrom1"), new ReLU(name: "cDNI1 ReLU1"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack cDNI2 = new FunctionStack( new Linear(256 + 10, 1024, name: "cDNI2 Linear1"), new BatchNormalization(1024, name: "cDNI2 Nrom1"), new ReLU(name: "cDNI2 ReLU1"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "cDNI2 Linear3") ); FunctionStack cDNI3 = new FunctionStack( new Linear(256 + 10, 1024, name: "cDNI3 Linear1"), new BatchNormalization(1024, name: "cDNI3 Nrom1"), new ReLU(name: "cDNI3 ReLU1"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "cDNI3 Linear3") ); //Declare optimizer Layer1.SetOptimizer(new Adam(0.00003f)); Layer2.SetOptimizer(new Adam(0.00003f)); Layer3.SetOptimizer(new Adam(0.00003f)); Layer4.SetOptimizer(new Adam(0.00003f)); cDNI1.SetOptimizer(new Adam(0.00003f)); cDNI2.SetOptimizer(new Adam(0.00003f)); cDNI3.SetOptimizer(new Adam(0.00003f)); for (int epoch = 0; epoch < 10; epoch++) { Console.WriteLine("epoch " + (epoch + 1)); //Total error in the whole Real totalLoss = 0; Real cDNI1totalLoss = 0; Real cDNI2totalLoss = 0; Real cDNI3totalLoss = 0; long totalLossCount = 0; long cDNI1totalLossCount = 0; long cDNI2totalLossCount = 0; long cDNI3totalLossCount = 0; //How many times to run the batch for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //Get data randomly from training data TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT); //Run first tier NdArray[] layer1ForwardResult = Layer1.Forward(datasetX.Data); ResultDataSet layer1ResultDataSet = new ResultDataSet(layer1ForwardResult, datasetX.Label); //Get the inclination of the first layer NdArray[] cDNI1Result = cDNI1.Forward(layer1ResultDataSet.GetTrainData()); //Apply the inclination of the first layer layer1ForwardResult[0].Grad = cDNI1Result[0].Data.ToArray(); //Update first layer Layer1.Backward(layer1ForwardResult); layer1ForwardResult[0].ParentFunc = null; Layer1.Update(); //Run Layer 2 NdArray[] layer2ForwardResult = Layer2.Forward(layer1ResultDataSet.Result); ResultDataSet layer2ResultDataSet = new ResultDataSet(layer2ForwardResult, layer1ResultDataSet.Label); //Get inclination of second layer NdArray[] cDNI2Result = cDNI2.Forward(layer2ResultDataSet.GetTrainData()); //Apply the inclination of the second layer layer2ForwardResult[0].Grad = cDNI2Result[0].Data.ToArray(); //Update 2nd tier Layer2.Backward(layer2ForwardResult); layer2ForwardResult[0].ParentFunc = null; //Perform learning of first layer cDNI Real cDNI1loss = new MeanSquaredError().Evaluate(cDNI1Result, new NdArray(layer1ResultDataSet.Result[0].Grad, cDNI1Result[0].Shape, cDNI1Result[0].BatchCount)); Layer2.Update(); cDNI1.Backward(cDNI1Result); cDNI1.Update(); cDNI1totalLoss += cDNI1loss; cDNI1totalLossCount++; //Run Third Tier NdArray[] layer3ForwardResult = Layer3.Forward(layer2ResultDataSet.Result); ResultDataSet layer3ResultDataSet = new ResultDataSet(layer3ForwardResult, layer2ResultDataSet.Label); //Get the inclination of the third layer NdArray[] cDNI3Result = cDNI3.Forward(layer3ResultDataSet.GetTrainData()); //Apply the inclination of the third layer layer3ForwardResult[0].Grad = cDNI3Result[0].Data.ToArray(); //Update third layer Layer3.Backward(layer3ForwardResult); layer3ForwardResult[0].ParentFunc = null; //Perform learning of cDNI for layer 2 Real cDNI2loss = new MeanSquaredError().Evaluate(cDNI2Result, new NdArray(layer2ResultDataSet.Result[0].Grad, cDNI2Result[0].Shape, cDNI2Result[0].BatchCount)); Layer3.Update(); cDNI2.Backward(cDNI2Result); cDNI2.Update(); cDNI2totalLoss += cDNI2loss; cDNI2totalLossCount++; //Run Layer 4 NdArray[] layer4ForwardResult = Layer4.Forward(layer3ResultDataSet.Result); //Get inclination of the fourth layer Real sumLoss = new SoftmaxCrossEntropy().Evaluate(layer4ForwardResult, layer3ResultDataSet.Label); //Update fourth layer Layer4.Backward(layer4ForwardResult); layer4ForwardResult[0].ParentFunc = null; totalLoss += sumLoss; totalLossCount++; //Perform learning of cDNI for the third layer Real cDNI3loss = new MeanSquaredError().Evaluate(cDNI3Result, new NdArray(layer3ResultDataSet.Result[0].Grad, cDNI3Result[0].Shape, cDNI3Result[0].BatchCount)); Layer4.Update(); cDNI3.Backward(cDNI3Result); cDNI3.Update(); cDNI3totalLoss += cDNI3loss; cDNI3totalLossCount++; Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //Result output Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); Console.WriteLine("\ncDNI1 total loss " + cDNI1totalLoss / cDNI1totalLossCount); Console.WriteLine("cDNI2 total loss " + cDNI2totalLoss / cDNI2totalLossCount); Console.WriteLine("cDNI3 total loss " + cDNI3totalLoss / cDNI3totalLossCount); Console.WriteLine("\ncDNI1 local loss " + cDNI1loss); Console.WriteLine("cDNI2 local loss " + cDNI2loss); Console.WriteLine("cDNI3 local loss " + cDNI3loss); //Test the accuracy if you move the batch 20 times if (i % 20 == 0) { Console.WriteLine("\nTesting..."); //Get data randomly from test data TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT); //Run test Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); Console.WriteLine("accuracy " + accuracy); } } } }
public static void Main() { // platformIdは、OpenCL・GPUの導入の記事に書いてある方法でご確認ください // https://jinbeizame.hateblo.jp/entry/kelpnet_opencl_gpu Weaver.Initialize(ComputeDeviceTypes.Gpu, platformId: 1, deviceIndex: 0); // ネットからVGGの学習済みモデルをダウンロード string modelFilePath = InternetFileDownloader.Donwload(DOWNLOAD_URL, MODEL_FILE); // 学習済みモデルをFunctionのリストとして保存 List <Function> vgg16Net = CaffemodelDataLoader.ModelLoad(modelFilePath); // VGGの出力層とその活性化関数を削除 vgg16Net.RemoveAt(vgg16Net.Count() - 1); vgg16Net.RemoveAt(vgg16Net.Count() - 1); // VGGの各FunctionのgpuEnableをtrueに for (int i = 0; i < vgg16Net.Count - 1; i++) { // GPUに対応している層であれば、GPU対応へ if (vgg16Net[i] is Convolution2D || vgg16Net[i] is Linear || vgg16Net[i] is MaxPooling) { ((IParallelizable)vgg16Net[i]).SetGpuEnable(true); } } // VGGをリストからFunctionStackに変換 FunctionStack vgg = new FunctionStack(vgg16Net.ToArray()); // 層を圧縮 vgg.Compress(); // 新しく出力層とその活性化関数を用意 FunctionStack nn = new FunctionStack( new Linear(4096, 1, gpuEnable: true), new Sigmoid() ); // 最適化手法としてAdamをセット nn.SetOptimizer(new Adam()); Console.WriteLine("DataSet Loading..."); // 訓練・テストデータ用のNdArrayを用意 // データセットは以下のURLからダウンロードを行い、 // VGGTransfer /bin/Debug/Data にtrainフォルダを置いてください。 // https://www.kaggle.com/c/dogs-vs-cats/data NdArray[] trainData = new NdArray[TRAIN_DATA_LENGTH * 2]; NdArray[] trainLabel = new NdArray[TRAIN_DATA_LENGTH * 2]; NdArray[] testData = new NdArray[TEST_DATA_LENGTH * 2]; NdArray[] testLabel = new NdArray[TEST_DATA_LENGTH * 2]; for (int i = 0; i < TRAIN_DATA_LENGTH + TEST_DATA_LENGTH; i++) { // 犬・猫の画像読み込み Bitmap baseCatImage = new Bitmap("Data/train/cat." + i + ".jpg"); Bitmap baseDogImage = new Bitmap("Data/train/dog." + i + ".jpg"); // 変換後の画像を格納するBitmapを定義 Bitmap catImage = new Bitmap(224, 224, PixelFormat.Format24bppRgb); Bitmap dogImage = new Bitmap(224, 224, PixelFormat.Format24bppRgb); // Graphicsオブジェクトに変換 Graphics gCat = Graphics.FromImage(catImage); Graphics gDog = Graphics.FromImage(dogImage); // Graphicsオブジェクト(の中のcatImageに)baseImageを変換して描画 gCat.DrawImage(baseCatImage, 0, 0, 224, 224); gDog.DrawImage(baseDogImage, 0, 0, 224, 224); // Graphicsオブジェクトを破棄し、メモリを解放 gCat.Dispose(); gDog.Dispose(); // 訓練・テストデータにデータを格納 // 先にテストデータの枚数分テストデータに保存し、その後訓練データを保存する // 画素値の値域は0 ~ 255のため、255で割ることで0 ~ 1に正規化 if (i < TEST_DATA_LENGTH) { // ImageをNdArrayに変換したものをvggに入力し、出力した特徴量を入力データとして保存 testData[i * 2] = vgg.Predict(NdArrayConverter.Image2NdArray(catImage, false, true) / 255.0)[0]; testLabel[i * 2] = new NdArray(new Real[] { 0 }); testData[i * 2 + 1] = vgg.Predict(NdArrayConverter.Image2NdArray(dogImage, false, true) / 255.0)[0]; testLabel[i * 2 + 1] = new NdArray(new Real[] { 1 }); } else { trainData[(i - TEST_DATA_LENGTH) * 2] = vgg.Predict(NdArrayConverter.Image2NdArray(catImage, false, true) / 255.0)[0]; trainLabel[(i - TEST_DATA_LENGTH) * 2] = new NdArray(new Real[] { 0 }); //new Real [] { 0 }; trainData[(i - TEST_DATA_LENGTH) * 2] = vgg.Predict(NdArrayConverter.Image2NdArray(dogImage, false, true) / 255.0)[0]; trainLabel[(i - TEST_DATA_LENGTH) * 2] = new NdArray(new Real[] { 1 }); // = new Real [] { 1 }; } } Console.WriteLine("Training Start..."); // ミニバッチ用のNdArrayを定義 NdArray batchData = new NdArray(new[] { 4096 }, BATCH_SIZE); NdArray batchLabel = new NdArray(new[] { 1 }, BATCH_SIZE); // 誤差関数を定義(今回は二値分類なので二乗誤差関数(MSE)) LossFunction lossFunction = new MeanSquaredError(); // エポックを回す for (int epoch = 0; epoch < 10; epoch++) { // 1エポックで訓練データ // バッチサイズ の回数分学習 for (int step = 0; step < TRAIN_DATA_COUNT; step++) { // ミニバッチを用意 for (int i = 0; i < BATCH_SIZE; i++) { // 0 ~ 訓練データサイズ-1 の中からランダムで整数を取得 int index = Mother.Dice.Next(trainData.Length); // trainData(NdArray[])を、batchData(NdArray)の形にコピー Array.Copy(trainData[index].Data, 0, batchData.Data, i * batchData.Length, batchData.Length); batchLabel.Data[i] = trainLabel[index].Data[0]; } // 学習(順伝播、誤差の計算、逆伝播、更新) NdArray[] output = nn.Forward(batchData); Real loss = lossFunction.Evaluate(output, batchLabel); nn.Backward(output); nn.Update(); } // 認識率(accuracy)の計算 // テストデータの回数データを回す Real accuracy = 0; for (int i = 0; i < TEST_DATA_LENGTH * 2; i++) { NdArray[] output = nn.Predict(testData[i]); // 出力outputと正解の誤差が0.5以下(正解が0のときにoutput<0.5、正解が1のときにoutput>0.5) // の際に正確に認識したとする if (Math.Abs(output[0].Data[0] - trainLabel[i].Data[0]) < 0.5) { accuracy += 1; } accuracy /= TEST_DATA_LENGTH * 2.0; Console.WriteLine("Epoch:" + epoch + "accuracy:" + accuracy); } } }
public static void Run() { // Prepare MNIST data RILogManager.Default?.SendDebug("MNIST Data Loading..."); MnistData mnistData = new MnistData(28); RILogManager.Default?.SendDebug("Training Start..."); // Write the network configuration in FunctionStack FunctionStack Layer1 = new FunctionStack("Test12 Layer 1", new Linear(true, 28 * 28, 256, name: "l1 Linear"), new BatchNormalization(true, 256, name: "l1 Norm"), new ReLU(name: "l1 ReLU") ); FunctionStack Layer2 = new FunctionStack("Test12 Layer 2", new Linear(true, 256, 256, name: "l2 Linear"), new BatchNormalization(true, 256, name: "l2 Norm"), new ReLU(name: "l2 ReLU") ); FunctionStack Layer3 = new FunctionStack("Test12 Layer 3", new Linear(true, 256, 256, name: "l3 Linear"), new BatchNormalization(true, 256, name: "l3 Norm"), new ReLU(name: "l3 ReLU") ); FunctionStack Layer4 = new FunctionStack("Test12 Layer 4", new Linear(true, 256, 10, name: "l4 Linear") ); // Function stack itself is also stacked as Function FunctionStack nn = new FunctionStack ("Test12", Layer1, Layer2, Layer3, Layer4 ); FunctionStack cDNI1 = new FunctionStack("Test12 DNI 1", new Linear(true, 256 + 10, 1024, name: "cDNI1 Linear1"), new BatchNormalization(true, 1024, name: "cDNI1 Norm1"), new ReLU(name: "cDNI1 ReLU1"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack cDNI2 = new FunctionStack("Test12 DNI 2", new Linear(true, 256 + 10, 1024, name: "cDNI2 Linear1"), new BatchNormalization(true, 1024, name: "cDNI2 Norm1"), new ReLU(name: "cDNI2 ReLU1"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "cDNI2 Linear3") ); FunctionStack cDNI3 = new FunctionStack("Test12 DNI 3", new Linear(true, 256 + 10, 1024, name: "cDNI3 Linear1"), new BatchNormalization(true, 1024, name: "cDNI3 Norm1"), new ReLU(name: "cDNI3 ReLU1"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "cDNI3 Linear3") ); Layer1.SetOptimizer(new Adam("Adam", 0.00003f)); Layer2.SetOptimizer(new Adam("Adam", 0.00003f)); Layer3.SetOptimizer(new Adam("Adam", 0.00003f)); Layer4.SetOptimizer(new Adam("Adam", 0.00003f)); cDNI1.SetOptimizer(new Adam("Adam", 0.00003f)); cDNI2.SetOptimizer(new Adam("Adam", 0.00003f)); cDNI3.SetOptimizer(new Adam("Adam", 0.00003f)); // Describe each function stack; RILogManager.Default?.SendDebug(Layer1.Describe()); RILogManager.Default?.SendDebug(Layer2.Describe()); RILogManager.Default?.SendDebug(Layer3.Describe()); RILogManager.Default?.SendDebug(Layer4.Describe()); RILogManager.Default?.SendDebug(cDNI1.Describe()); RILogManager.Default?.SendDebug(cDNI2.Describe()); RILogManager.Default?.SendDebug(cDNI3.Describe()); for (int epoch = 0; epoch < 10; epoch++) { // Total error in the whole Real totalLoss = 0; Real cDNI1totalLoss = 0; Real cDNI2totalLoss = 0; Real cDNI3totalLoss = 0; long totalLossCount = 0; long cDNI1totalLossCount = 0; long cDNI2totalLossCount = 0; long cDNI3totalLossCount = 0; // how many times to run the batch for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { RILogManager.Default?.SendDebug("epoch: " + (epoch + 1) + " of 10, batch iteration: " + i + " of " + TRAIN_DATA_COUNT); RILogManager.Default?.ViewerSendWatch("Epoch", epoch + 1); RILogManager.Default?.ViewerSendWatch("Batch Iteration", i); // Get data randomly from the training data TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT, 28, 28); // Run first tier NdArray[] layer1ForwardResult = Layer1.Forward(true, datasetX.Data); ResultDataSet layer1ResultDataSet = new ResultDataSet(layer1ForwardResult, datasetX.Label); // Obtain the slope of the first layer NdArray[] cDNI1Result = cDNI1.Forward(true, layer1ResultDataSet.GetTrainData()); // Apply the slope of the first layer layer1ForwardResult[0].Grad = cDNI1Result[0].Data.ToArray(); //Update first layer Layer1.Backward(true, layer1ForwardResult); layer1ForwardResult[0].ParentFunc = null; Layer1.Update(); // Run Layer 2 NdArray[] layer2ForwardResult = Layer2.Forward(true, layer1ResultDataSet.Result); ResultDataSet layer2ResultDataSet = new ResultDataSet(layer2ForwardResult, layer1ResultDataSet.Label); // Get the inclination of the second layer NdArray[] cDNI2Result = cDNI2.Forward(true, layer2ResultDataSet.GetTrainData()); // Apply the slope of the second layer layer2ForwardResult[0].Grad = cDNI2Result[0].Data.ToArray(); //Update layer 2 Layer2.Backward(true, layer2ForwardResult); layer2ForwardResult[0].ParentFunc = null; //Perform learning of first layer cDNI Real cDNI1loss = new MeanSquaredError().Evaluate(cDNI1Result, new NdArray(layer1ResultDataSet.Result[0].Grad, cDNI1Result[0].Shape, cDNI1Result[0].BatchCount)); Layer2.Update(); cDNI1.Backward(true, cDNI1Result); cDNI1.Update(); cDNI1totalLoss += cDNI1loss; cDNI1totalLossCount++; //Run Third Tier NdArray[] layer3ForwardResult = Layer3.Forward(true, layer2ResultDataSet.Result); ResultDataSet layer3ResultDataSet = new ResultDataSet(layer3ForwardResult, layer2ResultDataSet.Label); //Get the inclination of the third layer NdArray[] cDNI3Result = cDNI3.Forward(true, layer3ResultDataSet.GetTrainData()); //Apply the inclination of the third layer layer3ForwardResult[0].Grad = cDNI3Result[0].Data.ToArray(); //Update third layer Layer3.Backward(true, layer3ForwardResult); layer3ForwardResult[0].ParentFunc = null; //Perform learning of cDNI for layer 2 Real cDNI2loss = new MeanSquaredError().Evaluate(cDNI2Result, new NdArray(layer2ResultDataSet.Result[0].Grad, cDNI2Result[0].Shape, cDNI2Result[0].BatchCount)); Layer3.Update(); cDNI2.Backward(true, cDNI2Result); cDNI2.Update(); cDNI2totalLoss += cDNI2loss; cDNI2totalLossCount++; NdArray[] layer4ForwardResult = Layer4.Forward(true, layer3ResultDataSet.Result); Real sumLoss = new SoftmaxCrossEntropy().Evaluate(layer4ForwardResult, layer3ResultDataSet.Label); Layer4.Backward(true, layer4ForwardResult); layer4ForwardResult[0].ParentFunc = null; totalLoss += sumLoss; totalLossCount++; Real cDNI3loss = new MeanSquaredError().Evaluate(cDNI3Result, new NdArray(layer3ResultDataSet.Result[0].Grad, cDNI3Result[0].Shape, cDNI3Result[0].BatchCount)); Layer4.Update(); cDNI3.Backward(true, cDNI3Result); cDNI3.Update(); cDNI3totalLoss += cDNI3loss; cDNI3totalLossCount++; RILogManager.Default?.SendDebug("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); RILogManager.Default?.SendDebug("total loss " + totalLoss / totalLossCount); RILogManager.Default?.SendDebug("local loss " + sumLoss); RILogManager.Default?.SendDebug("\ncDNI1 total loss " + cDNI1totalLoss / cDNI1totalLossCount); RILogManager.Default?.SendDebug("cDNI2 total loss " + cDNI2totalLoss / cDNI2totalLossCount); RILogManager.Default?.SendDebug("cDNI3 total loss " + cDNI3totalLoss / cDNI3totalLossCount); RILogManager.Default?.SendDebug("\ncDNI1 local loss " + cDNI1loss); RILogManager.Default?.SendDebug("cDNI2 local loss " + cDNI2loss); RILogManager.Default?.SendDebug("cDNI3 local loss " + cDNI3loss); if (i % 20 == 0) { RILogManager.Default?.SendDebug("\nTesting..."); TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT, 28); Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); RILogManager.Default?.SendDebug("accuracy " + accuracy); } } } }
public static void Run() { Console.WriteLine("Build Vocabulary."); Vocabulary vocabulary = new Vocabulary(); string trainPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TRAIN_FILE, TRAIN_FILE, TRAIN_FILE_HASH); string validPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + VALID_FILE, VALID_FILE, VALID_FILE_HASH); string testPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TEST_FILE, TEST_FILE, TEST_FILE_HASH); int[] trainData = vocabulary.LoadData(trainPath); int[] validData = vocabulary.LoadData(validPath); int[] testData = vocabulary.LoadData(testPath); int nVocab = vocabulary.Length; Console.WriteLine("Network Initilizing."); FunctionStack <Real> model = new FunctionStack <Real>( new EmbedID <Real>(nVocab, N_UNITS, name: "l1 EmbedID"), new Dropout <Real>(), new LSTM <Real>(N_UNITS, N_UNITS, name: "l2 LSTM"), new Dropout <Real>(), new LSTM <Real>(N_UNITS, N_UNITS, name: "l3 LSTM"), new Dropout <Real>(), new Linear <Real>(N_UNITS, nVocab, name: "l4 Linear") ); for (int i = 0; i < model.Functions.Length; i++) { for (int j = 0; j < model.Functions[i].Parameters.Length; j++) { for (int k = 0; k < model.Functions[i].Parameters[j].Data.Length; k++) { model.Functions[i].Parameters[j].Data[k] = ((Real)Mother.Dice.NextDouble() * 2.0f - 1.0f) / 10.0f; } } } //与えられたthresholdで頭打ちではなく、全パラメータのL2Normからレートを取り補正を行う GradientClipping <Real> gradientClipping = new GradientClipping <Real>(threshold: GRAD_CLIP); SGD <Real> sgd = new SGD <Real>(learningRate: 0.1f); gradientClipping.SetUp(model); sgd.SetUp(model); Real wholeLen = trainData.Length; int jump = (int)Math.Floor(wholeLen / BATCH_SIZE); int epoch = 0; Console.WriteLine("Train Start."); for (int i = 0; i < jump * N_EPOCH; i++) { NdArray <Real> x = new NdArray <Real>(new[] { 1 }, BATCH_SIZE); NdArray <int> t = new NdArray <int>(new[] { 1 }, BATCH_SIZE); for (int j = 0; j < BATCH_SIZE; j++) { x.Data[j] = trainData[(int)((jump * j + i) % wholeLen)]; t.Data[j] = trainData[(int)((jump * j + i + 1) % wholeLen)]; } NdArray <Real> result = model.Forward(x)[0]; Real sumLoss = new SoftmaxCrossEntropy <Real>().Evaluate(result, t); Console.WriteLine("[{0}/{1}] Loss: {2}", i + 1, jump, sumLoss); model.Backward(result); //Run truncated BPTT if ((i + 1) % BPROP_LEN == 0) { gradientClipping.Update(); sgd.Update(); model.ResetState(); } if ((i + 1) % jump == 0) { epoch++; Console.WriteLine("evaluate"); Console.WriteLine("validation perplexity: {0}", Evaluate(model, validData)); if (epoch >= 6) { sgd.LearningRate /= 1.2f; Console.WriteLine("learning rate =" + sgd.LearningRate); } } } Console.WriteLine("test start"); Console.WriteLine("test perplexity:" + Evaluate(model, testData)); }
const Real L2_SCALE = 1e-4f; //l2 loss scale public static void Run() { //MNISTのデータを用意する Console.WriteLine("MNIST data loading..."); MnistData <Real> mnistData = new MnistData <Real>(); //テストデータから全データを取得 TestDataSet <Real> datasetY = mnistData.Eval.GetAllDataSet(); Console.WriteLine("\nNetwork initializing..."); int numBatches = mnistData.Train.Length / BATCH_SIZE; // 600 = 60000 / 100 int batchPerEpoch = mnistData.Train.Length / BATCH_SIZE; int[] boundaries = { LR_DROP_EPOCH *batchPerEpoch, (LR_DROP_EPOCH + 20) * batchPerEpoch }; Dictionary <string, Real> customSparsities = new Dictionary <string, Real> { { "layer2", END_SPARSITY *SPARSITY_SCALE }, { "layer3", END_SPARSITY * 0 } }; MaskedLinear <Real> layer1 = new MaskedLinear <Real>(28 * 28, 300, name: "layer1", gpuEnable: true); MaskedLinear <Real> layer2 = new MaskedLinear <Real>(300, 100, name: "layer2", gpuEnable: true); MaskedLinear <Real> layer3 = new MaskedLinear <Real>(100, 10, name: "layer3", gpuEnable: true); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack <Real> nn = new FunctionStack <Real>( layer1, new ReLU <Real>(name: "l1 ReLU"), layer2, new ReLU <Real>(name: "l2 ReLU"), layer3 ); SoftmaxCrossEntropy <Real> sce = new SoftmaxCrossEntropy <Real>(); WeightDecay <Real> weightDecay = new WeightDecay <Real>(L2_SCALE); weightDecay.AddParameters(layer1.Weight, layer2.Weight, layer3.Weight); MomentumSGD <Real> mSGD = new MomentumSGD <Real>(LEARNING_RATE); mSGD.SetUp(nn); var opt = new SparseRigLOptimizer(mSGD, MASKUPDATE_BEGIN_STEP, MASKUPDATE_END_STEP, MASKUPDATE_FREQUENCY, DROP_FRACTION, "cosine", "zeros", RIGL_ACC_SCALE); NdArray <Real>[] allMasks = { layer1.Mask, layer2.Mask, layer3.Mask, }; string[] LayerNames = { layer1.Name, layer2.Name, layer3.Name, }; NdArray <Real>[] allWights = { layer1.Weight, layer2.Weight, layer3.Weight, }; //マスクの初期化 SparseUtils.MaskInit(allMasks, LayerNames, "erdos_renyi", END_SPARSITY, customSparsities); Console.WriteLine("[Global sparsity] " + SparseUtils.CalculateSparsity(allMasks)); var weightSparsity = GetWeightSparsity(allMasks); Console.WriteLine("[Sparsity] Layer0, Layer1 : " + weightSparsity[0] + ", " + weightSparsity[1]); Console.WriteLine("\nTraining Start..."); //学習開始 for (int i = 0; i < NUM_EPOCHS * numBatches; i++) { //訓練データからランダムにデータを取得 TestDataSet <Real> datasetX = mnistData.Train.GetRandomDataSet(BATCH_SIZE); //バッチ学習を実行する NdArray <Real> y = nn.Forward(datasetX.Data)[0]; Real loss = sce.Evaluate(y, datasetX.Label); nn.Backward(y); weightDecay.Update(); opt._optimizer.LearningRate = PiecewiseConstant(opt._optimizer.UpdateCount, boundaries, LEARNING_RATE); opt.condMaskUpdate(allMasks, allWights); ////10回毎に結果出力 //if (i % 10 + 1 == 10) //{ // Console.WriteLine("\nbatch count:" + (i + 1) + " (lr:" + opt._optimizer.LearningRate + ")"); // Console.WriteLine("loss " + loss); //} //精度をテストする if (i % numBatches + 1 == numBatches) { Console.WriteLine("\nEpoch:" + Math.Floor((i + 1) / (Real)numBatches) + " Iteration:" + (i + 1) + " Testing... "); //テストを実行 Real accuracy = Trainer.Accuracy(nn, datasetY, new SoftmaxCrossEntropy <Real>(), out loss); Console.WriteLine("loss: " + loss); Console.WriteLine("accuracy: " + accuracy); } } }
public static void Run() { Console.WriteLine("Build Vocabulary."); Vocabulary vocabulary = new Vocabulary(); string trainPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TRAIN_FILE, TRAIN_FILE, TRAIN_FILE_HASH); string testPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TEST_FILE, TEST_FILE, TEST_FILE_HASH); int[] trainData = vocabulary.LoadData(trainPath); int[] testData = vocabulary.LoadData(testPath); int nVocab = vocabulary.Length; Console.WriteLine("Done."); Console.WriteLine("Network Initilizing."); FunctionStack model = new FunctionStack( new EmbedID(nVocab, N_UNITS, name: "l1 EmbedID"), new Linear(N_UNITS, N_UNITS, name: "l2 Linear"), new TanhActivation("l2 Tanh"), new Linear(N_UNITS, nVocab, name: "l3 Linear"), new Softmax("l3 Sonftmax") ); model.SetOptimizer(new Adam()); List <int> s = new List <int>(); Console.WriteLine("Train Start."); SoftmaxCrossEntropy softmaxCrossEntropy = new SoftmaxCrossEntropy(); for (int epoch = 0; epoch < TRAINING_EPOCHS; epoch++) { for (int pos = 0; pos < trainData.Length; pos++) { NdArray h = new NdArray(new Real[N_UNITS]); int id = trainData[pos]; s.Add(id); if (id == vocabulary.EosID) { Real accumloss = 0; Stack <NdArray> tmp = new Stack <NdArray>(); for (int i = 0; i < s.Count; i++) { int tx = i == s.Count - 1 ? vocabulary.EosID : s[i + 1]; //l1 EmbedID NdArray l1 = model.Functions[0].Forward(s[i])[0]; //l2 Linear NdArray l2 = model.Functions[1].Forward(h)[0]; //Add NdArray xK = l1 + l2; //l2 Tanh h = model.Functions[2].Forward(xK)[0]; //l3 Linear NdArray h2 = model.Functions[3].Forward(h)[0]; Real loss = softmaxCrossEntropy.Evaluate(h2, tx); tmp.Push(h2); accumloss += loss; } Console.WriteLine(accumloss); for (int i = 0; i < s.Count; i++) { model.Backward(tmp.Pop()); } model.Update(); s.Clear(); } if (pos % 100 == 0) { Console.WriteLine(pos + "/" + trainData.Length + " finished"); } } } Console.WriteLine("Test Start."); Real sum = 0; int wnum = 0; List <int> ts = new List <int>(); bool unkWord = false; for (int pos = 0; pos < 1000; pos++) { int id = testData[pos]; ts.Add(id); if (id > trainData.Length) { unkWord = true; } if (id == vocabulary.EosID) { if (!unkWord) { Console.WriteLine("pos" + pos); Console.WriteLine("tsLen" + ts.Count); Console.WriteLine("sum" + sum); Console.WriteLine("wnum" + wnum); sum += CalPs(model, ts); wnum += ts.Count - 1; } else { unkWord = false; } ts.Clear(); } } Console.WriteLine(Math.Pow(2.0, sum / wnum)); }
public static void Run() { // Prepare MNIST data RILogManager.Default?.SendDebug("MNIST Data Loading..."); MnistData mnistData = new MnistData(28); RILogManager.Default?.SendDebug("Training Start..."); // Write the network configuration in FunctionStack FunctionStack Layer1 = new FunctionStack("Test11 Layer 1", new Linear(true, 28 * 28, 256, name: "l1 Linear"), new BatchNormalization(true, 256, name: "l1 Norm"), new ReLU(name: "l1 ReLU") ); FunctionStack Layer2 = new FunctionStack("Test11 Layer 2", new Linear(true, 256, 256, name: "l2 Linear"), new BatchNormalization(true, 256, name: "l2 Norm"), new ReLU(name: "l2 ReLU") ); FunctionStack Layer3 = new FunctionStack("Test11 Layer 3", new Linear(true, 256, 256, name: "l3 Linear"), new BatchNormalization(true, 256, name: "l3 Norm"), new ReLU(name: "l3 ReLU") ); FunctionStack Layer4 = new FunctionStack("Test11 Layer 4", new Linear(true, 256, 10, name: "l4 Linear") ); // Function stack itself is also stacked as Function FunctionStack nn = new FunctionStack ("Test11", Layer1, Layer2, Layer3, Layer4 ); FunctionStack DNI1 = new FunctionStack("Test11 DNI1", new Linear(true, 256, 1024, name: "DNI1 Linear1"), new BatchNormalization(true, 1024, name: "DNI1 Norm1"), new ReLU(name: "DNI1 ReLU1"), new Linear(true, 1024, 1024, name: "DNI1 Linear2"), new BatchNormalization(true, 1024, name: "DNI1 Norm2"), new ReLU(name: "DNI1 ReLU2"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack DNI2 = new FunctionStack("Test11 DNI2", new Linear(true, 256, 1024, name: "DNI2 Linear1"), new BatchNormalization(true, 1024, name: "DNI2 Norm1"), new ReLU(name: "DNI2 ReLU1"), new Linear(true, 1024, 1024, name: "DNI2 Linear2"), new BatchNormalization(true, 1024, name: "DNI2 Norm2"), new ReLU(name: "DNI2 ReLU2"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "DNI2 Linear3") ); FunctionStack DNI3 = new FunctionStack("Test11 DNI3", new Linear(true, 256, 1024, name: "DNI3 Linear1"), new BatchNormalization(true, 1024, name: "DNI3 Norm1"), new ReLU(name: "DNI3 ReLU1"), new Linear(true, 1024, 1024, name: "DNI3 Linear2"), new BatchNormalization(true, 1024, name: "DNI3 Norm2"), new ReLU(name: "DNI3 ReLU2"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "DNI3 Linear3") ); //optimizer Layer1.SetOptimizer(new Adam()); Layer2.SetOptimizer(new Adam()); Layer3.SetOptimizer(new Adam()); Layer4.SetOptimizer(new Adam()); DNI1.SetOptimizer(new Adam()); DNI2.SetOptimizer(new Adam()); DNI3.SetOptimizer(new Adam()); // Three generations learning for (int epoch = 0; epoch < 20; epoch++) { RILogManager.Default?.SendDebug("epoch " + (epoch + 1)); Real totalLoss = 0; Real DNI1totalLoss = 0; Real DNI2totalLoss = 0; Real DNI3totalLoss = 0; long totalLossCount = 0; long DNI1totalLossCount = 0; long DNI2totalLossCount = 0; long DNI3totalLossCount = 0; // how many times to run the batch for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { // Get data randomly from the training data TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT, 28, 28); // Run first tier NdArray[] layer1ForwardResult = Layer1.Forward(true, datasetX.Data); // Obtain the slope of the first layer NdArray[] DNI1Result = DNI1.Forward(true, layer1ForwardResult); // Apply the slope of the first layer layer1ForwardResult[0].Grad = DNI1Result[0].Data.ToArray(); // Update first layer Layer1.Backward(true, layer1ForwardResult); layer1ForwardResult[0].ParentFunc = null; // Backward was executed and cut off calculation graph Layer1.Update(); // Run Layer 2 NdArray[] layer2ForwardResult = Layer2.Forward(true, layer1ForwardResult); // Get the inclination of the second layer NdArray[] DNI2Result = DNI2.Forward(true, layer2ForwardResult); // Apply the slope of the second layer layer2ForwardResult[0].Grad = DNI2Result[0].Data.ToArray(); // Update layer 2 Layer2.Backward(true, layer2ForwardResult); layer2ForwardResult[0].ParentFunc = null; // Learn DNI for first tier Real DNI1loss = new MeanSquaredError().Evaluate(DNI1Result, new NdArray(layer1ForwardResult[0].Grad, DNI1Result[0].Shape, DNI1Result[0].BatchCount)); Layer2.Update(); DNI1.Backward(true, DNI1Result); DNI1.Update(); DNI1totalLoss += DNI1loss; DNI1totalLossCount++; // run layer 3 NdArray[] layer3ForwardResult = Layer3.Forward(true, layer2ForwardResult); // Get the inclination of the third layer NdArray[] DNI3Result = DNI3.Forward(true, layer3ForwardResult); // Apply the slope of the third layer layer3ForwardResult[0].Grad = DNI3Result[0].Data.ToArray(); // Update layer 3 Layer3.Backward(true, layer3ForwardResult); layer3ForwardResult[0].ParentFunc = null; // Run DNI learning for layer 2 Real DNI2loss = new MeanSquaredError().Evaluate(DNI2Result, new NdArray(layer2ForwardResult[0].Grad, DNI2Result[0].Shape, DNI2Result[0].BatchCount)); Layer3.Update(); DNI2.Backward(true, DNI2Result); DNI2.Update(); DNI2totalLoss += DNI2loss; DNI2totalLossCount++; // run layer 4 NdArray[] layer4ForwardResult = Layer4.Forward(true, layer3ForwardResult); // Obtain the slope of the fourth layer Real sumLoss = new SoftmaxCrossEntropy().Evaluate(layer4ForwardResult, datasetX.Label); // Update fourth layer Layer4.Backward(true, layer4ForwardResult); layer4ForwardResult[0].ParentFunc = null; totalLoss += sumLoss; totalLossCount++; // Run DNI learning for layer 3 Real DNI3loss = new MeanSquaredError().Evaluate(DNI3Result, new NdArray(layer3ForwardResult[0].Grad, DNI3Result[0].Shape, DNI3Result[0].BatchCount)); Layer4.Update(); DNI3.Backward(true, DNI3Result); DNI3.Update(); DNI3totalLoss += DNI3loss; DNI3totalLossCount++; RILogManager.Default?.SendDebug("batch count " + i + "/" + TRAIN_DATA_COUNT); RILogManager.Default?.SendDebug("total loss " + totalLoss / totalLossCount); RILogManager.Default?.SendDebug("local loss " + sumLoss); RILogManager.Default?.SendDebug("DNI1 total loss " + DNI1totalLoss / DNI1totalLossCount); RILogManager.Default?.SendDebug("DNI2 total loss " + DNI2totalLoss / DNI2totalLossCount); RILogManager.Default?.SendDebug("DNI3 total loss " + DNI3totalLoss / DNI3totalLossCount); RILogManager.Default?.SendDebug("DNI1 local loss " + DNI1loss); RILogManager.Default?.SendDebug("DNI2 local loss " + DNI2loss); RILogManager.Default?.SendDebug("DNI3 local loss " + DNI3loss); // Test the accuracy if you move the batch 20 times if (i % 20 == 0) { RILogManager.Default?.SendDebug("Testing..."); // Get data randomly from test data TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT, 28); // Run test Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); RILogManager.Default?.SendDebug("accuracy " + accuracy); } } } }