public static void Run() { //Create a target filter (If it is practical, here is an unknown value) Deconvolution2D decon_core = new Deconvolution2D(1, 1, 15, 1, 7, gpuEnable: true) { Weight = { Data = MakeOneCore() } }; Deconvolution2D model = new Deconvolution2D(1, 1, 15, 1, 7, gpuEnable: true); SGD optimizer = new SGD(learningRate: 0.00005); //When it is big, it diverges. model.SetOptimizer(optimizer); MeanSquaredError meanSquaredError = new MeanSquaredError(); //At the transplant source, we are educating with the same educational image, but changing to learning closer to practice for (int i = 0; i < 11; i++) { //Generate random dotted images NdArray img_p = getRandomImage(); //Output a learning image with a target filter NdArray[] img_core = decon_core.Forward(img_p); //Output an image with an unlearned filter NdArray[] img_y = model.Forward(img_p); Real loss = meanSquaredError.Evaluate(img_y, img_core); model.Backward(img_y); model.Update(); Console.WriteLine("epoch" + i + " : " + loss); } }
static Real ComputeLoss(FunctionStack model, NdArray[] sequences) { //全体での誤差を集計 Real totalLoss = 0; NdArray x = new NdArray(new[] { 1 }, MINI_BATCH_SIZE); NdArray t = new NdArray(new[] { 1 }, MINI_BATCH_SIZE); Stack <NdArray[]> backNdArrays = new Stack <NdArray[]>(); for (int i = 0; i < LENGTH_OF_SEQUENCE - 1; i++) { for (int j = 0; j < MINI_BATCH_SIZE; j++) { x.Data[j] = sequences[j].Data[i]; t.Data[j] = sequences[j].Data[i + 1]; } NdArray[] result = model.Forward(x); totalLoss += new MeanSquaredError().Evaluate(result, t); backNdArrays.Push(result); } for (int i = 0; backNdArrays.Count > 0; i++) { model.Backward(backNdArrays.Pop()); } return(totalLoss / (LENGTH_OF_SEQUENCE - 1)); }
static Real ComputeLoss(FunctionStack model, NdArray[] sequences) { Ensure.Argument(model).NotNull(); Ensure.Argument(sequences).NotNull(); // Total error in the whole Real totalLoss = 0; NdArray x = new NdArray(new[] { 1 }, MINI_BATCH_SIZE, (Function)null); NdArray t = new NdArray(new[] { 1 }, MINI_BATCH_SIZE, (Function)null); Stack <NdArray[]> backNdArrays = new Stack <NdArray[]>(); for (int i = 0; i < LENGTH_OF_SEQUENCE - 1; i++) { for (int j = 0; j < MINI_BATCH_SIZE; j++) { x.Data[j] = sequences[j].Data[i]; t.Data[j] = sequences[j].Data[i + 1]; } NdArray[] result = model.Forward(true, x); totalLoss += new MeanSquaredError().Evaluate(result, t); backNdArrays.Push(result); } for (int i = 0; backNdArrays.Count > 0; i++) { model.Backward(true, backNdArrays.Pop()); } return(totalLoss / (LENGTH_OF_SEQUENCE - 1)); }
public static void Run() { //目標とするフィルタを作成(実践であればココは不明な値となる) Deconvolution2D decon_core = new Deconvolution2D(1, 1, 15, 1, 7, gpuEnable: true) { Weight = { Data = MakeOneCore() } }; Deconvolution2D model = new Deconvolution2D(1, 1, 15, 1, 7, gpuEnable: true); SGD optimizer = new SGD(learningRate: 0.00005); //大きいと発散する model.SetOptimizer(optimizer); MeanSquaredError meanSquaredError = new MeanSquaredError(); //移植元では同じ教育画像で教育しているが、より実践に近い学習に変更 for (int i = 0; i < 11; i++) { //ランダムに点が打たれた画像を生成 NdArray img_p = getRandomImage(); //目標とするフィルタで学習用の画像を出力 NdArray[] img_core = decon_core.Forward(img_p); //未学習のフィルタで画像を出力 NdArray[] img_y = model.Forward(img_p); Real loss = meanSquaredError.Evaluate(img_y, img_core); model.Backward(img_y); model.Update(); Console.WriteLine("epoch" + i + " : " + loss); } }
public void LinkのParameterがoptimizerで更新される() { var optimizer = new optimizers.SGD(lr: 0.001f); var link = new SimpleLink(); optimizer.Setup(link); var loss = MeanSquaredError.ForwardStatic( link.Forward(new Variable(Matrix <float> .Build.DenseOfArray(new float[, ] { { 1, 1, 1 } }).Transpose())), new Variable(Matrix <float> .Build.DenseOfArray(new float[, ] { { 1, 2, 3 } }).Transpose()) ); var before = link.constParam.Value; optimizer.ZeroGrads(); loss.Backward(); optimizer.Update(); var after = link.constParam.Value; Helper.AssertMatrixNotAlmostEqual(before, after, delta: 0); }
static void FourthNN() { var r = new Random(); var data = new Tensor((Matrix) new double[, ] { { 0, 0 }, { 0, 1 }, { 1, 0 }, { 1, 1 } }, true); var target = new Tensor((Matrix) new double[, ] { { 0 }, { 1 }, { 0 }, { 1 } }, true); var seq = new Sequential(); seq.Layers.Add(new Linear(2, 3, r)); seq.Layers.Add(new Linear(3, 1, r)); var sgd = new StochasticGradientDescent(seq.Parameters, 0.1f); var mse = new MeanSquaredError(); for (var i = 0; i < 10; i++) { var pred = seq.Forward(data); var loss = mse.Forward(pred, target); loss.Backward(new Tensor(Matrix.Ones(loss.Data.X, loss.Data.Y))); sgd.Step(); Console.WriteLine($"Epoch: {i} Loss: {loss}"); } }
public static void Run() { // Create a target filter (In case of practice, here is the unknown value) Deconvolution2D decon_core = new Deconvolution2D(true, 1, 1, 15, 1, 7, gpuEnable: true) { Weight = { Data = MakeOneCore() } }; Deconvolution2D model = new Deconvolution2D(true, 1, 1, 15, 1, 7, gpuEnable: true); SGD optimizer = new SGD(learningRate: 0.00005); // diverge if big model.SetOptimizer(optimizer); MeanSquaredError meanSquaredError = new MeanSquaredError(); // I am educating with the same educational image at the transplanting source, but changing to learning closer to practice for (int i = 0; i < 11; i++) { // Generate an image with randomly struck points NdArray img_p = getRandomImage(); // Output a learning image with a target filter NdArray[] img_core = decon_core.Forward(true, img_p); // Output an image with an unlearned filter NdArray[] img_y = model.Forward(true, img_p); Real loss = meanSquaredError.Evaluate(img_y, img_core); model.Backward(true, img_y); model.Update(); RILogManager.Default?.SendDebug("epoch" + i + " : " + loss); } }
/// <summary> /// Add a set of evaluation metrics to the set of observations. /// </summary> /// <param name="metrics">The observed regression evaluation metric</param> void IMetricsStatistics <RegressionMetrics> .Add(RegressionMetrics metrics) { MeanAbsoluteError.Add(metrics.MeanAbsoluteError); MeanSquaredError.Add(metrics.MeanSquaredError); RootMeanSquaredError.Add(metrics.RootMeanSquaredError); LossFunction.Add(metrics.LossFunction); RSquared.Add(metrics.RSquared); }
public void ReturnsLossFunction() { var graph = new TFGraph(); var context = new ModelCompilationContext(graph); var predictions = graph.Placeholder(TFDataType.Double, new TFShape(-1, 10)); var actuals = graph.Placeholder(TFDataType.Double, new TFShape(-1, 10)); var loss = new MeanSquaredError().Compile(context, predictions, actuals); loss.Should().NotBeNull(); }
// Start is called before the first frame update IEnumerator Start() { var r = new System.Random(2); var x = (Matrix) new double[1000, 1]; Matrix.MatrixLoop((i, j) => { x[i, 0] = i; }, x.X, x.Y); var y = (Matrix) new double[1000, 1]; Matrix.MatrixLoop((i, j) => { y[i, 0] = i * 12 + 15 + r.Next(10); }, x.X, x.Y); // var x = new double[,] { { 0, 0 }, { 0, 1 }, { 1, 0 }, { 1, 1 } }; // var y = new double[,] { { 0 }, { 1 }, { 0 }, { 1 } }; var X = new Tensor(x, true); var Y = new Tensor(y, true); var seq = new Sequential(); seq.Layers.Add(new Linear(1, 1, r)); var sgd = new StochasticGradientDescent(seq.Parameters, 0.001); var mse = new MeanSquaredError(); for (var i = 0; i < 10000; i++) { yield return(null); var pred = seq.Forward(X); print(pred.Data.Size); var loss = mse.Forward(pred, Y); loss.Backward(); sgd.Step(); print($"Epoch: {i} Loss: {loss.Data[0, 0]}"); print(Y); print(pred); } print(seq.Forward(new Tensor(x))); }
private void Start() { r = new System.Random(seed); seq = new Sequential(); seq.Layers.Add(new Linear(4, 100, r)); seq.Layers.Add(new ReLuLayer()); seq.Layers.Add(new Linear(100, 10, r)); seq.Layers.Add(new ReLuLayer()); seq.Layers.Add(new Linear(10, 1, r)); sgd = new StochasticGradientDescent(seq.Parameters, learningRate); mse = new MeanSquaredError(); cartPole = GetComponent <DirectMovementCartPole>(); }
static void Main(string[] argv) { modshogun.init_shogun_with_defaults(); int N = 100; DoubleMatrix ground_truth = randn(1, N); DoubleMatrix predicted = randn(1, N); Labels ground_truth_labels = new Labels(ground_truth); Labels predicted_labels = new Labels(predicted); MeanSquaredError evaluator = new MeanSquaredError(); double mse = evaluator.evaluate(predicted_labels, ground_truth_labels); Console.WriteLine(mse); modshogun.exit_shogun(); }
public void MseTests() { var solution = DataFrame.FromCsvData(@"1.0 2.0 3"); var submission = DataFrame.FromCsvData(@"2.0 3.0 4"); var scoreKeeper = new MeanSquaredError(); Assert.AreEqual(1.0, scoreKeeper.Score(solution, submission)); var solutionStream = Koalas.CsvReader.StringToStream("1.0\n2.0\n3"); var submissionStream = Koalas.CsvReader.StringToStream("3.0\n4.0\n5"); Assert.AreEqual(4.0, Evaluate.Metric(solutionStream, submissionStream, "mse")); Assert.AreEqual(4.0, Evaluate.Metric(solutionStream, submissionStream, "MSE")); Assert.AreEqual(4.0, Evaluate.Metric(solutionStream, submissionStream, "mean squared error")); }
public void Iterationを回すと最適値になる() { var optimizer = new optimizers.SGD(lr: 0.05f); var link = new SimpleLink(); optimizer.Setup(link); var loss = MeanSquaredError.ForwardStatic( link.Forward(new Variable(Matrix <float> .Build.DenseOfArray(new float[, ] { { 1, 1, 1 } }).Transpose())), new Variable(Matrix <float> .Build.DenseOfArray(new float[, ] { { 1, 2, 3 } }).Transpose()) ); Assert.Greater(loss.Value[0, 0], 0.1f); var converge = false; for (int i = 0; i < 100; i++) { var lossEach = MeanSquaredError.ForwardStatic( link.Forward(new Variable(Matrix <float> .Build.DenseOfArray(new float[, ] { { 1, 1, 1 } }).Transpose())), new Variable(Matrix <float> .Build.DenseOfArray(new float[, ] { { 1, 2, 3 } }).Transpose()) ); if (lossEach.Value[0, 0] < 0.1f) { converge = true; break; } optimizer.ZeroGrads(); lossEach.Backward(); optimizer.Update(); } Assert.True(converge); }
static void Main(string[] args) { Operations K = new Operations(); //Load array to the tensor NDArray x = new NDArray(3, 3); x.Load(2, 4, 6, 1, 3, 5, 2, 3, 5); x.Print("Load X Values"); NDArray y = new NDArray(3, 1); y.Load(20, 15, 15); y.Print("Load Y Values"); //Create two layers, one with 6 neurons and another with 1 FullyConnected fc1 = new FullyConnected(3, 6, "relu"); FullyConnected fc2 = new FullyConnected(6, 1, "relu"); //Connect input by passing data from one layer to another fc1.Forward(x); fc2.Forward(fc1.Output); var preds = fc2.Output; preds.Print("Predictions"); //Calculate the mean square error cost between the predicted and expected values BaseCost cost = new MeanSquaredError(); var costValues = cost.Forward(preds, y); costValues.Print("MSE Cost"); //Calculate the mean absolute metric value for the predicted vs expected values BaseMetric metric = new MeanAbsoluteError(); var metricValues = metric.Calculate(preds, y); metricValues.Print("MAE Metric"); Console.ReadLine(); }
static Real ComputeLoss(FunctionStack <Real> model, NdArray <Real>[] sequences) { //全体での誤差を集計 Real totalLoss = 0; NdArray <Real> x = new NdArray <Real>(new[] { 1 }, MINI_BATCH_SIZE); NdArray <Real> t = new NdArray <Real>(new[] { 1 }, MINI_BATCH_SIZE); for (int i = 0; i < LENGTH_OF_SEQUENCE - 1; i++) { for (int j = 0; j < MINI_BATCH_SIZE; j++) { x.Data[j] = sequences[j].Data[i]; t.Data[j] = sequences[j].Data[i + 1]; } NdArray <Real> result = model.Forward(x)[0]; totalLoss += new MeanSquaredError <Real>().Evaluate(result, t); model.Backward(result); } return(totalLoss / (LENGTH_OF_SEQUENCE - 1)); }
internal static HandleRef getCPtr(MeanSquaredError obj) { return((obj == null) ? new HandleRef(null, IntPtr.Zero) : obj.swigCPtr); }
public void chainer_pythonと同じ値になる() { var chain = new VerySmallChain(); var optimizer = new chainer.optimizers.Adam(); var input = new Variable(builder.DenseOfArray(new float[, ] { { 4, 3, 2 } })); var target = new Variable(builder.DenseOfArray(new float[, ] { { 100 } })); optimizer.Setup(chain); Helper.AssertMatrixAlmostEqual(chain.fc._Params["W"].Value, builder.DenseOfArray(new float[, ] { { -1, 0, 1 } })); Helper.AssertMatrixAlmostEqual(chain.fc._Params["b"].Value, builder.DenseOfArray(new float[, ] { { 1 } })); var loss = MeanSquaredError.ForwardStatic( chain.Forward(input), target ); Helper.AssertMatrixAlmostEqual( loss.Value, builder.DenseOfArray(new float[, ] { { 10201 } }), delta: 0.01f ); optimizer.ZeroGrads(); loss.Backward(); optimizer.Update(); loss = MeanSquaredError.ForwardStatic( chain.Forward(input), target ); Helper.AssertMatrixAlmostEqual( loss.Value, builder.DenseOfArray(new float[, ] { { 10198.9794921875f } }), delta: 0.01f ); optimizer.ZeroGrads(); loss.Backward(); optimizer.Update(); loss = MeanSquaredError.ForwardStatic( chain.Forward(input), target ); Helper.AssertMatrixAlmostEqual( loss.Value, builder.DenseOfArray(new float[, ] { { 10196.9609375f } }), delta: 0.01f ); for (int i = 0; i < 100; i++) { loss = MeanSquaredError.ForwardStatic( chain.Forward(input), target ); optimizer.ZeroGrads(); loss.Backward(); optimizer.Update(); } loss = MeanSquaredError.ForwardStatic( chain.Forward(input), target ); Helper.AssertMatrixAlmostEqual( loss.Value, builder.DenseOfArray(new float[, ] { { 9996.3515625f } }), delta: 0.01f ); }
internal static HandleRef getCPtr(MeanSquaredError obj) { return (obj == null) ? new HandleRef(null, IntPtr.Zero) : obj.swigCPtr; }
public static void Run() { // Prepare MNIST data RILogManager.Default?.SendDebug("MNIST Data Loading..."); MnistData mnistData = new MnistData(28); RILogManager.Default?.SendDebug("Training Start..."); // Write the network configuration in FunctionStack FunctionStack Layer1 = new FunctionStack("Test12 Layer 1", new Linear(true, 28 * 28, 256, name: "l1 Linear"), new BatchNormalization(true, 256, name: "l1 Norm"), new ReLU(name: "l1 ReLU") ); FunctionStack Layer2 = new FunctionStack("Test12 Layer 2", new Linear(true, 256, 256, name: "l2 Linear"), new BatchNormalization(true, 256, name: "l2 Norm"), new ReLU(name: "l2 ReLU") ); FunctionStack Layer3 = new FunctionStack("Test12 Layer 3", new Linear(true, 256, 256, name: "l3 Linear"), new BatchNormalization(true, 256, name: "l3 Norm"), new ReLU(name: "l3 ReLU") ); FunctionStack Layer4 = new FunctionStack("Test12 Layer 4", new Linear(true, 256, 10, name: "l4 Linear") ); // Function stack itself is also stacked as Function FunctionStack nn = new FunctionStack ("Test12", Layer1, Layer2, Layer3, Layer4 ); FunctionStack cDNI1 = new FunctionStack("Test12 DNI 1", new Linear(true, 256 + 10, 1024, name: "cDNI1 Linear1"), new BatchNormalization(true, 1024, name: "cDNI1 Norm1"), new ReLU(name: "cDNI1 ReLU1"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack cDNI2 = new FunctionStack("Test12 DNI 2", new Linear(true, 256 + 10, 1024, name: "cDNI2 Linear1"), new BatchNormalization(true, 1024, name: "cDNI2 Norm1"), new ReLU(name: "cDNI2 ReLU1"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "cDNI2 Linear3") ); FunctionStack cDNI3 = new FunctionStack("Test12 DNI 3", new Linear(true, 256 + 10, 1024, name: "cDNI3 Linear1"), new BatchNormalization(true, 1024, name: "cDNI3 Norm1"), new ReLU(name: "cDNI3 ReLU1"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "cDNI3 Linear3") ); Layer1.SetOptimizer(new Adam("Adam", 0.00003f)); Layer2.SetOptimizer(new Adam("Adam", 0.00003f)); Layer3.SetOptimizer(new Adam("Adam", 0.00003f)); Layer4.SetOptimizer(new Adam("Adam", 0.00003f)); cDNI1.SetOptimizer(new Adam("Adam", 0.00003f)); cDNI2.SetOptimizer(new Adam("Adam", 0.00003f)); cDNI3.SetOptimizer(new Adam("Adam", 0.00003f)); // Describe each function stack; RILogManager.Default?.SendDebug(Layer1.Describe()); RILogManager.Default?.SendDebug(Layer2.Describe()); RILogManager.Default?.SendDebug(Layer3.Describe()); RILogManager.Default?.SendDebug(Layer4.Describe()); RILogManager.Default?.SendDebug(cDNI1.Describe()); RILogManager.Default?.SendDebug(cDNI2.Describe()); RILogManager.Default?.SendDebug(cDNI3.Describe()); for (int epoch = 0; epoch < 10; epoch++) { // Total error in the whole Real totalLoss = 0; Real cDNI1totalLoss = 0; Real cDNI2totalLoss = 0; Real cDNI3totalLoss = 0; long totalLossCount = 0; long cDNI1totalLossCount = 0; long cDNI2totalLossCount = 0; long cDNI3totalLossCount = 0; // how many times to run the batch for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { RILogManager.Default?.SendDebug("epoch: " + (epoch + 1) + " of 10, batch iteration: " + i + " of " + TRAIN_DATA_COUNT); RILogManager.Default?.ViewerSendWatch("Epoch", epoch + 1); RILogManager.Default?.ViewerSendWatch("Batch Iteration", i); // Get data randomly from the training data TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT, 28, 28); // Run first tier NdArray[] layer1ForwardResult = Layer1.Forward(true, datasetX.Data); ResultDataSet layer1ResultDataSet = new ResultDataSet(layer1ForwardResult, datasetX.Label); // Obtain the slope of the first layer NdArray[] cDNI1Result = cDNI1.Forward(true, layer1ResultDataSet.GetTrainData()); // Apply the slope of the first layer layer1ForwardResult[0].Grad = cDNI1Result[0].Data.ToArray(); //Update first layer Layer1.Backward(true, layer1ForwardResult); layer1ForwardResult[0].ParentFunc = null; Layer1.Update(); // Run Layer 2 NdArray[] layer2ForwardResult = Layer2.Forward(true, layer1ResultDataSet.Result); ResultDataSet layer2ResultDataSet = new ResultDataSet(layer2ForwardResult, layer1ResultDataSet.Label); // Get the inclination of the second layer NdArray[] cDNI2Result = cDNI2.Forward(true, layer2ResultDataSet.GetTrainData()); // Apply the slope of the second layer layer2ForwardResult[0].Grad = cDNI2Result[0].Data.ToArray(); //Update layer 2 Layer2.Backward(true, layer2ForwardResult); layer2ForwardResult[0].ParentFunc = null; //Perform learning of first layer cDNI Real cDNI1loss = new MeanSquaredError().Evaluate(cDNI1Result, new NdArray(layer1ResultDataSet.Result[0].Grad, cDNI1Result[0].Shape, cDNI1Result[0].BatchCount)); Layer2.Update(); cDNI1.Backward(true, cDNI1Result); cDNI1.Update(); cDNI1totalLoss += cDNI1loss; cDNI1totalLossCount++; //Run Third Tier NdArray[] layer3ForwardResult = Layer3.Forward(true, layer2ResultDataSet.Result); ResultDataSet layer3ResultDataSet = new ResultDataSet(layer3ForwardResult, layer2ResultDataSet.Label); //Get the inclination of the third layer NdArray[] cDNI3Result = cDNI3.Forward(true, layer3ResultDataSet.GetTrainData()); //Apply the inclination of the third layer layer3ForwardResult[0].Grad = cDNI3Result[0].Data.ToArray(); //Update third layer Layer3.Backward(true, layer3ForwardResult); layer3ForwardResult[0].ParentFunc = null; //Perform learning of cDNI for layer 2 Real cDNI2loss = new MeanSquaredError().Evaluate(cDNI2Result, new NdArray(layer2ResultDataSet.Result[0].Grad, cDNI2Result[0].Shape, cDNI2Result[0].BatchCount)); Layer3.Update(); cDNI2.Backward(true, cDNI2Result); cDNI2.Update(); cDNI2totalLoss += cDNI2loss; cDNI2totalLossCount++; NdArray[] layer4ForwardResult = Layer4.Forward(true, layer3ResultDataSet.Result); Real sumLoss = new SoftmaxCrossEntropy().Evaluate(layer4ForwardResult, layer3ResultDataSet.Label); Layer4.Backward(true, layer4ForwardResult); layer4ForwardResult[0].ParentFunc = null; totalLoss += sumLoss; totalLossCount++; Real cDNI3loss = new MeanSquaredError().Evaluate(cDNI3Result, new NdArray(layer3ResultDataSet.Result[0].Grad, cDNI3Result[0].Shape, cDNI3Result[0].BatchCount)); Layer4.Update(); cDNI3.Backward(true, cDNI3Result); cDNI3.Update(); cDNI3totalLoss += cDNI3loss; cDNI3totalLossCount++; RILogManager.Default?.SendDebug("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); RILogManager.Default?.SendDebug("total loss " + totalLoss / totalLossCount); RILogManager.Default?.SendDebug("local loss " + sumLoss); RILogManager.Default?.SendDebug("\ncDNI1 total loss " + cDNI1totalLoss / cDNI1totalLossCount); RILogManager.Default?.SendDebug("cDNI2 total loss " + cDNI2totalLoss / cDNI2totalLossCount); RILogManager.Default?.SendDebug("cDNI3 total loss " + cDNI3totalLoss / cDNI3totalLossCount); RILogManager.Default?.SendDebug("\ncDNI1 local loss " + cDNI1loss); RILogManager.Default?.SendDebug("cDNI2 local loss " + cDNI2loss); RILogManager.Default?.SendDebug("cDNI3 local loss " + cDNI3loss); if (i % 20 == 0) { RILogManager.Default?.SendDebug("\nTesting..."); TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT, 28); Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); RILogManager.Default?.SendDebug("accuracy " + accuracy); } } } }
public static void Main() { // platformIdは、OpenCL・GPUの導入の記事に書いてある方法でご確認ください // https://jinbeizame.hateblo.jp/entry/kelpnet_opencl_gpu Weaver.Initialize(ComputeDeviceTypes.Gpu, platformId: 1, deviceIndex: 0); // ネットからVGGの学習済みモデルをダウンロード string modelFilePath = InternetFileDownloader.Donwload(DOWNLOAD_URL, MODEL_FILE); // 学習済みモデルをFunctionのリストとして保存 List <Function> vgg16Net = CaffemodelDataLoader.ModelLoad(modelFilePath); // VGGの出力層とその活性化関数を削除 vgg16Net.RemoveAt(vgg16Net.Count() - 1); vgg16Net.RemoveAt(vgg16Net.Count() - 1); // VGGの各FunctionのgpuEnableをtrueに for (int i = 0; i < vgg16Net.Count - 1; i++) { // GPUに対応している層であれば、GPU対応へ if (vgg16Net[i] is Convolution2D || vgg16Net[i] is Linear || vgg16Net[i] is MaxPooling) { ((IParallelizable)vgg16Net[i]).SetGpuEnable(true); } } // VGGをリストからFunctionStackに変換 FunctionStack vgg = new FunctionStack(vgg16Net.ToArray()); // 層を圧縮 vgg.Compress(); // 新しく出力層とその活性化関数を用意 FunctionStack nn = new FunctionStack( new Linear(4096, 1, gpuEnable: true), new Sigmoid() ); // 最適化手法としてAdamをセット nn.SetOptimizer(new Adam()); Console.WriteLine("DataSet Loading..."); // 訓練・テストデータ用のNdArrayを用意 // データセットは以下のURLからダウンロードを行い、 // VGGTransfer /bin/Debug/Data にtrainフォルダを置いてください。 // https://www.kaggle.com/c/dogs-vs-cats/data NdArray[] trainData = new NdArray[TRAIN_DATA_LENGTH * 2]; NdArray[] trainLabel = new NdArray[TRAIN_DATA_LENGTH * 2]; NdArray[] testData = new NdArray[TEST_DATA_LENGTH * 2]; NdArray[] testLabel = new NdArray[TEST_DATA_LENGTH * 2]; for (int i = 0; i < TRAIN_DATA_LENGTH + TEST_DATA_LENGTH; i++) { // 犬・猫の画像読み込み Bitmap baseCatImage = new Bitmap("Data/train/cat." + i + ".jpg"); Bitmap baseDogImage = new Bitmap("Data/train/dog." + i + ".jpg"); // 変換後の画像を格納するBitmapを定義 Bitmap catImage = new Bitmap(224, 224, PixelFormat.Format24bppRgb); Bitmap dogImage = new Bitmap(224, 224, PixelFormat.Format24bppRgb); // Graphicsオブジェクトに変換 Graphics gCat = Graphics.FromImage(catImage); Graphics gDog = Graphics.FromImage(dogImage); // Graphicsオブジェクト(の中のcatImageに)baseImageを変換して描画 gCat.DrawImage(baseCatImage, 0, 0, 224, 224); gDog.DrawImage(baseDogImage, 0, 0, 224, 224); // Graphicsオブジェクトを破棄し、メモリを解放 gCat.Dispose(); gDog.Dispose(); // 訓練・テストデータにデータを格納 // 先にテストデータの枚数分テストデータに保存し、その後訓練データを保存する // 画素値の値域は0 ~ 255のため、255で割ることで0 ~ 1に正規化 if (i < TEST_DATA_LENGTH) { // ImageをNdArrayに変換したものをvggに入力し、出力した特徴量を入力データとして保存 testData[i * 2] = vgg.Predict(NdArrayConverter.Image2NdArray(catImage, false, true) / 255.0)[0]; testLabel[i * 2] = new NdArray(new Real[] { 0 }); testData[i * 2 + 1] = vgg.Predict(NdArrayConverter.Image2NdArray(dogImage, false, true) / 255.0)[0]; testLabel[i * 2 + 1] = new NdArray(new Real[] { 1 }); } else { trainData[(i - TEST_DATA_LENGTH) * 2] = vgg.Predict(NdArrayConverter.Image2NdArray(catImage, false, true) / 255.0)[0]; trainLabel[(i - TEST_DATA_LENGTH) * 2] = new NdArray(new Real[] { 0 }); //new Real [] { 0 }; trainData[(i - TEST_DATA_LENGTH) * 2] = vgg.Predict(NdArrayConverter.Image2NdArray(dogImage, false, true) / 255.0)[0]; trainLabel[(i - TEST_DATA_LENGTH) * 2] = new NdArray(new Real[] { 1 }); // = new Real [] { 1 }; } } Console.WriteLine("Training Start..."); // ミニバッチ用のNdArrayを定義 NdArray batchData = new NdArray(new[] { 4096 }, BATCH_SIZE); NdArray batchLabel = new NdArray(new[] { 1 }, BATCH_SIZE); // 誤差関数を定義(今回は二値分類なので二乗誤差関数(MSE)) LossFunction lossFunction = new MeanSquaredError(); // エポックを回す for (int epoch = 0; epoch < 10; epoch++) { // 1エポックで訓練データ // バッチサイズ の回数分学習 for (int step = 0; step < TRAIN_DATA_COUNT; step++) { // ミニバッチを用意 for (int i = 0; i < BATCH_SIZE; i++) { // 0 ~ 訓練データサイズ-1 の中からランダムで整数を取得 int index = Mother.Dice.Next(trainData.Length); // trainData(NdArray[])を、batchData(NdArray)の形にコピー Array.Copy(trainData[index].Data, 0, batchData.Data, i * batchData.Length, batchData.Length); batchLabel.Data[i] = trainLabel[index].Data[0]; } // 学習(順伝播、誤差の計算、逆伝播、更新) NdArray[] output = nn.Forward(batchData); Real loss = lossFunction.Evaluate(output, batchLabel); nn.Backward(output); nn.Update(); } // 認識率(accuracy)の計算 // テストデータの回数データを回す Real accuracy = 0; for (int i = 0; i < TEST_DATA_LENGTH * 2; i++) { NdArray[] output = nn.Predict(testData[i]); // 出力outputと正解の誤差が0.5以下(正解が0のときにoutput<0.5、正解が1のときにoutput>0.5) // の際に正確に認識したとする if (Math.Abs(output[0].Data[0] - trainLabel[i].Data[0]) < 0.5) { accuracy += 1; } accuracy /= TEST_DATA_LENGTH * 2.0; Console.WriteLine("Epoch:" + epoch + "accuracy:" + accuracy); } } }
public static void Run() { //Prepare MNIST data Console.WriteLine("MNIST Data Loading..."); MnistData mnistData = new MnistData(); Console.WriteLine("Training Start..."); //Writing the network configuration in FunctionStack FunctionStack Layer1 = new FunctionStack( new Linear(28 * 28, 256, name: "l1 Linear"), new BatchNormalization(256, name: "l1 Norm"), new ReLU(name: "l1 ReLU") ); FunctionStack Layer2 = new FunctionStack( new Linear(256, 256, name: "l2 Linear"), new BatchNormalization(256, name: "l2 Norm"), new ReLU(name: "l2 ReLU") ); FunctionStack Layer3 = new FunctionStack( new Linear(256, 256, name: "l3 Linear"), new BatchNormalization(256, name: "l3 Norm"), new ReLU(name: "l3 ReLU") ); FunctionStack Layer4 = new FunctionStack( new Linear(256, 10, name: "l4 Linear") ); //FunctionStack itself is also stacked as Function FunctionStack nn = new FunctionStack ( Layer1, Layer2, Layer3, Layer4 ); FunctionStack cDNI1 = new FunctionStack( new Linear(256 + 10, 1024, name: "cDNI1 Linear1"), new BatchNormalization(1024, name: "cDNI1 Nrom1"), new ReLU(name: "cDNI1 ReLU1"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack cDNI2 = new FunctionStack( new Linear(256 + 10, 1024, name: "cDNI2 Linear1"), new BatchNormalization(1024, name: "cDNI2 Nrom1"), new ReLU(name: "cDNI2 ReLU1"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "cDNI2 Linear3") ); FunctionStack cDNI3 = new FunctionStack( new Linear(256 + 10, 1024, name: "cDNI3 Linear1"), new BatchNormalization(1024, name: "cDNI3 Nrom1"), new ReLU(name: "cDNI3 ReLU1"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "cDNI3 Linear3") ); //Declare optimizer Layer1.SetOptimizer(new Adam(0.00003f)); Layer2.SetOptimizer(new Adam(0.00003f)); Layer3.SetOptimizer(new Adam(0.00003f)); Layer4.SetOptimizer(new Adam(0.00003f)); cDNI1.SetOptimizer(new Adam(0.00003f)); cDNI2.SetOptimizer(new Adam(0.00003f)); cDNI3.SetOptimizer(new Adam(0.00003f)); for (int epoch = 0; epoch < 10; epoch++) { Console.WriteLine("epoch " + (epoch + 1)); //Total error in the whole Real totalLoss = 0; Real cDNI1totalLoss = 0; Real cDNI2totalLoss = 0; Real cDNI3totalLoss = 0; long totalLossCount = 0; long cDNI1totalLossCount = 0; long cDNI2totalLossCount = 0; long cDNI3totalLossCount = 0; //How many times to run the batch for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //Get data randomly from training data TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT); //Run first tier NdArray[] layer1ForwardResult = Layer1.Forward(datasetX.Data); ResultDataSet layer1ResultDataSet = new ResultDataSet(layer1ForwardResult, datasetX.Label); //Get the inclination of the first layer NdArray[] cDNI1Result = cDNI1.Forward(layer1ResultDataSet.GetTrainData()); //Apply the inclination of the first layer layer1ForwardResult[0].Grad = cDNI1Result[0].Data.ToArray(); //Update first layer Layer1.Backward(layer1ForwardResult); layer1ForwardResult[0].ParentFunc = null; Layer1.Update(); //Run Layer 2 NdArray[] layer2ForwardResult = Layer2.Forward(layer1ResultDataSet.Result); ResultDataSet layer2ResultDataSet = new ResultDataSet(layer2ForwardResult, layer1ResultDataSet.Label); //Get inclination of second layer NdArray[] cDNI2Result = cDNI2.Forward(layer2ResultDataSet.GetTrainData()); //Apply the inclination of the second layer layer2ForwardResult[0].Grad = cDNI2Result[0].Data.ToArray(); //Update 2nd tier Layer2.Backward(layer2ForwardResult); layer2ForwardResult[0].ParentFunc = null; //Perform learning of first layer cDNI Real cDNI1loss = new MeanSquaredError().Evaluate(cDNI1Result, new NdArray(layer1ResultDataSet.Result[0].Grad, cDNI1Result[0].Shape, cDNI1Result[0].BatchCount)); Layer2.Update(); cDNI1.Backward(cDNI1Result); cDNI1.Update(); cDNI1totalLoss += cDNI1loss; cDNI1totalLossCount++; //Run Third Tier NdArray[] layer3ForwardResult = Layer3.Forward(layer2ResultDataSet.Result); ResultDataSet layer3ResultDataSet = new ResultDataSet(layer3ForwardResult, layer2ResultDataSet.Label); //Get the inclination of the third layer NdArray[] cDNI3Result = cDNI3.Forward(layer3ResultDataSet.GetTrainData()); //Apply the inclination of the third layer layer3ForwardResult[0].Grad = cDNI3Result[0].Data.ToArray(); //Update third layer Layer3.Backward(layer3ForwardResult); layer3ForwardResult[0].ParentFunc = null; //Perform learning of cDNI for layer 2 Real cDNI2loss = new MeanSquaredError().Evaluate(cDNI2Result, new NdArray(layer2ResultDataSet.Result[0].Grad, cDNI2Result[0].Shape, cDNI2Result[0].BatchCount)); Layer3.Update(); cDNI2.Backward(cDNI2Result); cDNI2.Update(); cDNI2totalLoss += cDNI2loss; cDNI2totalLossCount++; //Run Layer 4 NdArray[] layer4ForwardResult = Layer4.Forward(layer3ResultDataSet.Result); //Get inclination of the fourth layer Real sumLoss = new SoftmaxCrossEntropy().Evaluate(layer4ForwardResult, layer3ResultDataSet.Label); //Update fourth layer Layer4.Backward(layer4ForwardResult); layer4ForwardResult[0].ParentFunc = null; totalLoss += sumLoss; totalLossCount++; //Perform learning of cDNI for the third layer Real cDNI3loss = new MeanSquaredError().Evaluate(cDNI3Result, new NdArray(layer3ResultDataSet.Result[0].Grad, cDNI3Result[0].Shape, cDNI3Result[0].BatchCount)); Layer4.Update(); cDNI3.Backward(cDNI3Result); cDNI3.Update(); cDNI3totalLoss += cDNI3loss; cDNI3totalLossCount++; Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //Result output Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); Console.WriteLine("\ncDNI1 total loss " + cDNI1totalLoss / cDNI1totalLossCount); Console.WriteLine("cDNI2 total loss " + cDNI2totalLoss / cDNI2totalLossCount); Console.WriteLine("cDNI3 total loss " + cDNI3totalLoss / cDNI3totalLossCount); Console.WriteLine("\ncDNI1 local loss " + cDNI1loss); Console.WriteLine("cDNI2 local loss " + cDNI2loss); Console.WriteLine("cDNI3 local loss " + cDNI3loss); //Test the accuracy if you move the batch 20 times if (i % 20 == 0) { Console.WriteLine("\nTesting..."); //Get data randomly from test data TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT); //Run test Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); Console.WriteLine("accuracy " + accuracy); } } } }
public void RnnLSTMRandomTest() { Python.Initialize(); Chainer.Initialize(); Real[,] input = { { 1.0f }, { 3.0f }, { 5.0f }, { 7.0f }, { 9.0f } }; Real[,] teach = { { 3.0f }, { 5.0f }, { 7.0f }, { 9.0f }, { 11.0f } }; Real[,] input2 = { { 3.0f }, { 5.0f }, { 7.0f }, { 9.0f }, { 11.0f } }; Real[,] teach2 = { { 5.0f }, { 7.0f }, { 9.0f }, { 11.0f }, { 13.0f } }; int outputCount = 1; int inputCount = 1; int hiddenCount = 2; Real[,] upwardInit = Initializer.GetRandomValues <Real[, ]>(hiddenCount, hiddenCount); Real[,] lateralInit = Initializer.GetRandomValues <Real[, ]>(hiddenCount, hiddenCount); Real[,,] biasInit = Initializer.GetRandomValues <Real[, , ]>(1, hiddenCount, 1); Real[,,] forgetBiasInit = Initializer.GetRandomValues <Real[, , ]>(1, hiddenCount, 1); //Chainer Real[,] w1 = Initializer.GetRandomValues <Real[, ]>(hiddenCount, inputCount); Real[] b1 = Initializer.GetRandomValues <Real[]>(hiddenCount); //Chainer Linear <Real> cLinear1 = new Linear <Real>(inputCount, hiddenCount, false, w1, b1); NChainer.LSTM <Real> cLstm = new NChainer.LSTM <Real>(hiddenCount, hiddenCount, lateralInit, upwardInit, biasInit, forgetBiasInit); Real[,] w2 = Initializer.GetRandomValues <Real[, ]>(outputCount, hiddenCount); Real[] b2 = Initializer.GetRandomValues <Real[]>(outputCount); Linear <Real> cLinear2 = new Linear <Real>(hiddenCount, outputCount, false, w2, b2); Variable <Real> cX1 = new Variable <Real>(input); Variable <Real> cY11 = cLinear1.Forward(cX1); Variable <Real> cY12 = cLstm.Forward(cY11); Variable <Real> cY13 = cLinear2.Forward(cY12); Variable <Real> cT = new Variable <Real>(teach); Variable <Real> cLoss = new NChainer.MeanSquaredError <Real>().Forward(cY13, cT); cLoss.Backward(); //KelpNet CL.Linear <Real> linear1 = new CL.Linear <Real>(inputCount, hiddenCount, false, w1, b1); LSTM <Real> lstm = new LSTM <Real>(hiddenCount, hiddenCount, lateralInit, upwardInit, biasInit, forgetBiasInit); CL.Linear <Real> linear2 = new CL.Linear <Real>(hiddenCount, outputCount, false, w2, b2); NdArray <Real> x1 = new NdArray <Real>(input, asBatch: true); NdArray <Real> y11 = linear1.Forward(x1)[0]; NdArray <Real> y12 = lstm.Forward(y11)[0]; NdArray <Real> y13 = linear2.Forward(y12)[0]; NdArray <Real> t = new NdArray <Real>(teach, asBatch: true); NdArray <Real> loss = new MeanSquaredError <Real>().Evaluate(y13, t); y13.Backward(); Real[] cY11data = ((Real[, ])cY11.Data).Flatten(); Real[] cY12data = ((Real[, ])cY12.Data).Flatten(); Real[] cY13data = ((Real[, ])cY13.Data).Flatten(); Real[] cXgrad = ((Real[, ])cX1.Grad).Flatten(); Real[] cupwardWGrad = ((Real[, ])cLstm.upward.W.Grad).Flatten(); Real[] cupwardbGrad = (Real[])cLstm.upward.b.Grad; //許容範囲を設定 Real delta = 0.00001f; //y11 Assert.AreEqual(cY11data.Length, y11.Data.Length); for (int i = 0; i < cY11data.Length; i++) { Assert.AreEqual(cY11data[i], y11.Data[i], delta); } //y12 Assert.AreEqual(cY12data.Length, y12.Data.Length); for (int i = 0; i < cY12data.Length; i++) { Assert.AreEqual(cY12data[i], y12.Data[i], delta); } //y13 Assert.AreEqual(cY13data.Length, y13.Data.Length); for (int i = 0; i < cY13data.Length; i++) { Assert.AreEqual(cY13data[i], y13.Data[i], delta); } //許容範囲を設定 delta = 0.0001f; //loss Assert.AreEqual(cLoss.Data[0], loss.Data[0], delta); //x.Grad Assert.AreEqual(cXgrad.Length, x1.Grad.Length); for (int i = 0; i < cXgrad.Length; i++) { Assert.AreEqual(cXgrad[i], x1.Grad[i], delta); } Real[] cWgrad11 = ((Real[, ])cLinear1.W.Grad).Flatten(); Real[] cbgrad11 = (Real[])cLinear1.b.Grad; //W.grad Assert.AreEqual(cWgrad11.Length, linear1.Weight.Grad.Length); for (int i = 0; i < linear1.Weight.Grad.Length; i++) { Assert.AreEqual(cWgrad11[i], linear1.Weight.Grad[i], delta); } //b.grad Assert.AreEqual(cbgrad11.Length, linear1.Bias.Grad.Length); for (int i = 0; i < linear1.Bias.Grad.Length; i++) { Assert.AreEqual(cbgrad11[i], linear1.Bias.Grad[i], delta); } Real[] cWgrad12 = ((Real[, ])cLinear2.W.Grad).Flatten(); Real[] cbgrad12 = (Real[])cLinear2.b.Grad; //W.grad Assert.AreEqual(cWgrad12.Length, linear2.Weight.Grad.Length); for (int i = 0; i < linear2.Weight.Grad.Length; i++) { Assert.AreEqual(cWgrad12[i], linear2.Weight.Grad[i], delta); } //b.grad Assert.AreEqual(cbgrad12.Length, linear2.Bias.Grad.Length); for (int i = 0; i < linear2.Bias.Grad.Length; i++) { Assert.AreEqual(cbgrad12[i], linear2.Bias.Grad[i], delta); } //W.grad int wLen = lstm.upward.Weight.Grad.Length; Assert.AreEqual(cupwardWGrad.Length, lstm.upward.Weight.Grad.Length); for (int i = 0; i < wLen; i++) { Assert.AreEqual(cupwardWGrad[i + wLen * 0], lstm.upward.Weight.Grad[i], delta); } //b.grad int bLen = lstm.upward.Bias.Length; Assert.AreEqual(cupwardbGrad.Length, lstm.upward.Bias.Grad.Length); for (int i = 0; i < bLen; i++) { Assert.AreEqual(cupwardbGrad[i + wLen * 0], lstm.upward.Bias.Grad[i], delta); } //2周目 Variable <Real> cX2 = new Variable <Real>(input2); Variable <Real> cY21 = cLinear1.Forward(cX2); Variable <Real> cY22 = cLstm.Forward(cY21); Variable <Real> cY23 = cLinear2.Forward(cY22); Variable <Real> cT2 = new Variable <Real>(teach2); Variable <Real> cLoss2 = new NChainer.MeanSquaredError <Real>().Forward(cY23, cT2); //KelpNet NdArray <Real> x2 = new NdArray <Real>(input2, asBatch: true); NdArray <Real> y21 = linear1.Forward(x2)[0]; NdArray <Real> y22 = lstm.Forward(y21)[0]; NdArray <Real> y23 = linear2.Forward(y22)[0]; NdArray <Real> t2 = new NdArray <Real>(teach2, asBatch: true); NdArray <Real> loss2 = new MeanSquaredError <Real>().Evaluate(y23, t2); Assert.AreEqual(cLoss2.Data[0], loss2.Data[0], delta); //Backwardを実行 cLoss2.Backward(); y23.Backward(); Real[] cYdata21 = ((Real[, ])cY21.Data).Flatten(); Real[] cYdata22 = ((Real[, ])cY22.Data).Flatten(); Real[] cYdata23 = ((Real[, ])cY23.Data).Flatten(); Real[] cXgrad2 = ((Real[, ])cX2.Grad).Flatten(); Real[] cupwardWGrad2 = ((Real[, ])cLstm.upward.W.Grad).Flatten(); Real[] cupwardbGrad2 = (Real[])cLstm.upward.b.Grad; Real[] clateralWGrad = ((Real[, ])cLstm.lateral.W.Grad).Flatten(); //y21 Assert.AreEqual(cYdata21.Length, y21.Data.Length); for (int i = 0; i < cYdata21.Length; i++) { Assert.AreEqual(cYdata21[i], y21.Data[i], delta); } //y22 Assert.AreEqual(cYdata22.Length, y22.Data.Length); for (int i = 0; i < cYdata22.Length; i++) { Assert.AreEqual(cYdata22[i], y22.Data[i], delta); } //y23 Assert.AreEqual(cYdata23.Length, y23.Data.Length); for (int i = 0; i < cYdata23.Length; i++) { Assert.AreEqual(cYdata23[i], y23.Data[i], delta); } //x.Grad Assert.AreEqual(cXgrad2.Length, x2.Grad.Length); for (int i = 0; i < cXgrad2.Length; i++) { Assert.AreEqual(cXgrad2[i], x2.Grad[i], delta); } //経由が多くかなり誤差が大きい為 delta = 1.0f; Real[] cWgrad22 = ((Real[, ])cLinear2.W.Grad).Flatten(); Real[] cbgrad22 = (Real[])cLinear2.b.Grad; //W.grad Assert.AreEqual(cWgrad22.Length, linear2.Weight.Grad.Length); for (int i = 0; i < linear2.Weight.Grad.Length; i++) { Assert.AreEqual(cWgrad22[i], linear2.Weight.Grad[i], delta); } //b.grad Assert.AreEqual(cbgrad22.Length, linear2.Bias.Grad.Length); for (int i = 0; i < linear2.Bias.Grad.Length; i++) { Assert.AreEqual(cbgrad22[i], linear2.Bias.Grad[i], delta); } delta = 2.0f; //W.grad Assert.AreEqual(clateralWGrad.Length, lstm.lateral.Weight.Grad.Length); for (int i = 0; i < clateralWGrad.Length; i++) { Assert.AreEqual(clateralWGrad[i + wLen * 0], lstm.lateral.Weight.Grad[i], delta); } for (int i = 0; i < wLen; i++) { Assert.AreEqual(cupwardWGrad2[i + wLen * 0], lstm.upward.Weight.Grad[i], delta); } //b.grad for (int i = 0; i < bLen; i++) { Assert.AreEqual(cupwardbGrad2[i + wLen * 0], lstm.upward.Bias.Grad[i], delta); } delta = 20.0f; Real[] cWgrad21 = ((Real[, ])cLinear1.W.Grad).Flatten(); Real[] cbgrad21 = (Real[])cLinear1.b.Grad; //W.grad Assert.AreEqual(cWgrad21.Length, linear1.Weight.Grad.Length); for (int i = 0; i < linear1.Weight.Grad.Length; i++) { Assert.AreEqual(cWgrad21[i], linear1.Weight.Grad[i], delta); } //b.grad Assert.AreEqual(cbgrad21.Length, linear1.Bias.Grad.Length); for (int i = 0; i < linear1.Bias.Grad.Length; i++) { Assert.AreEqual(cbgrad21[i], linear1.Bias.Grad[i], delta); } }
public static void Run() { //MNISTのデータを用意する Console.WriteLine("MNIST Data Loading..."); MnistData mnistData = new MnistData(); Console.WriteLine("Training Start..."); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack Layer1 = new FunctionStack( new Linear(28 * 28, 256, name: "l1 Linear"), new BatchNormalization(256, name: "l1 Norm"), new ReLU(name: "l1 ReLU") ); FunctionStack Layer2 = new FunctionStack( new Linear(256, 256, name: "l2 Linear"), new BatchNormalization(256, name: "l2 Norm"), new ReLU(name: "l2 ReLU") ); FunctionStack Layer3 = new FunctionStack( new Linear(256, 256, name: "l3 Linear"), new BatchNormalization(256, name: "l3 Norm"), new ReLU(name: "l3 ReLU") ); FunctionStack Layer4 = new FunctionStack( new Linear(256, 10, name: "l4 Linear") ); //FunctionStack自身もFunctionとして積み上げられる FunctionStack nn = new FunctionStack ( Layer1, Layer2, Layer3, Layer4 ); FunctionStack DNI1 = new FunctionStack( new Linear(256, 1024, name: "DNI1 Linear1"), new BatchNormalization(1024, name: "DNI1 Nrom1"), new ReLU(name: "DNI1 ReLU1"), new Linear(1024, 1024, name: "DNI1 Linear2"), new BatchNormalization(1024, name: "DNI1 Nrom2"), new ReLU(name: "DNI1 ReLU2"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack DNI2 = new FunctionStack( new Linear(256, 1024, name: "DNI2 Linear1"), new BatchNormalization(1024, name: "DNI2 Nrom1"), new ReLU(name: "DNI2 ReLU1"), new Linear(1024, 1024, name: "DNI2 Linear2"), new BatchNormalization(1024, name: "DNI2 Nrom2"), new ReLU(name: "DNI2 ReLU2"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI2 Linear3") ); FunctionStack DNI3 = new FunctionStack( new Linear(256, 1024, name: "DNI3 Linear1"), new BatchNormalization(1024, name: "DNI3 Nrom1"), new ReLU(name: "DNI3 ReLU1"), new Linear(1024, 1024, name: "DNI3 Linear2"), new BatchNormalization(1024, name: "DNI3 Nrom2"), new ReLU(name: "DNI3 ReLU2"), new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI3 Linear3") ); //optimizerを宣言 Layer1.SetOptimizer(new Adam()); Layer2.SetOptimizer(new Adam()); Layer3.SetOptimizer(new Adam()); Layer4.SetOptimizer(new Adam()); DNI1.SetOptimizer(new Adam()); DNI2.SetOptimizer(new Adam()); DNI3.SetOptimizer(new Adam()); //三世代学習 for (int epoch = 0; epoch < 20; epoch++) { Console.WriteLine("epoch " + (epoch + 1)); Real totalLoss = 0; Real DNI1totalLoss = 0; Real DNI2totalLoss = 0; Real DNI3totalLoss = 0; long totalLossCount = 0; long DNI1totalLossCount = 0; long DNI2totalLossCount = 0; long DNI3totalLossCount = 0; //何回バッチを実行するか for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //訓練データからランダムにデータを取得 TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT); //第一層を実行 NdArray[] layer1ForwardResult = Layer1.Forward(datasetX.Data); //第一層の傾きを取得 NdArray[] DNI1Result = DNI1.Forward(layer1ForwardResult); //第一層の傾きを適用 layer1ForwardResult[0].Grad = DNI1Result[0].Data.ToArray(); //第一層を更新 Layer1.Backward(layer1ForwardResult); layer1ForwardResult[0].ParentFunc = null; //Backwardを実行したので計算グラフを切っておく Layer1.Update(); //第二層を実行 NdArray[] layer2ForwardResult = Layer2.Forward(layer1ForwardResult); //第二層の傾きを取得 NdArray[] DNI2Result = DNI2.Forward(layer2ForwardResult); //第二層の傾きを適用 layer2ForwardResult[0].Grad = DNI2Result[0].Data.ToArray(); //第二層を更新 Layer2.Backward(layer2ForwardResult); layer2ForwardResult[0].ParentFunc = null; //第一層用のDNIの学習を実行 Real DNI1loss = new MeanSquaredError().Evaluate(DNI1Result, new NdArray(layer1ForwardResult[0].Grad, DNI1Result[0].Shape, DNI1Result[0].BatchCount)); Layer2.Update(); DNI1.Backward(DNI1Result); DNI1.Update(); DNI1totalLoss += DNI1loss; DNI1totalLossCount++; //第三層を実行 NdArray[] layer3ForwardResult = Layer3.Forward(layer2ForwardResult); //第三層の傾きを取得 NdArray[] DNI3Result = DNI3.Forward(layer3ForwardResult); //第三層の傾きを適用 layer3ForwardResult[0].Grad = DNI3Result[0].Data.ToArray(); //第三層を更新 Layer3.Backward(layer3ForwardResult); layer3ForwardResult[0].ParentFunc = null; //第二層用のDNIの学習を実行 Real DNI2loss = new MeanSquaredError().Evaluate(DNI2Result, new NdArray(layer2ForwardResult[0].Grad, DNI2Result[0].Shape, DNI2Result[0].BatchCount)); Layer3.Update(); DNI2.Backward(DNI2Result); DNI2.Update(); DNI2totalLoss += DNI2loss; DNI2totalLossCount++; //第四層を実行 NdArray[] layer4ForwardResult = Layer4.Forward(layer3ForwardResult); //第四層の傾きを取得 Real sumLoss = new SoftmaxCrossEntropy().Evaluate(layer4ForwardResult, datasetX.Label); //第四層を更新 Layer4.Backward(layer4ForwardResult); layer4ForwardResult[0].ParentFunc = null; totalLoss += sumLoss; totalLossCount++; //第三層用のDNIの学習を実行 Real DNI3loss = new MeanSquaredError().Evaluate(DNI3Result, new NdArray(layer3ForwardResult[0].Grad, DNI3Result[0].Shape, DNI3Result[0].BatchCount)); Layer4.Update(); DNI3.Backward(DNI3Result); DNI3.Update(); DNI3totalLoss += DNI3loss; DNI3totalLossCount++; Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //結果出力 Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); Console.WriteLine("\nDNI1 total loss " + DNI1totalLoss / DNI1totalLossCount); Console.WriteLine("DNI2 total loss " + DNI2totalLoss / DNI2totalLossCount); Console.WriteLine("DNI3 total loss " + DNI3totalLoss / DNI3totalLossCount); Console.WriteLine("\nDNI1 local loss " + DNI1loss); Console.WriteLine("DNI2 local loss " + DNI2loss); Console.WriteLine("DNI3 local loss " + DNI3loss); //20回バッチを動かしたら精度をテストする if (i % 20 == 0) { Console.WriteLine("\nTesting..."); //テストデータからランダムにデータを取得 TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT); //テストを実行 Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); Console.WriteLine("accuracy " + accuracy); } } } }
public static void Run() { //MNISTのデータを用意する Console.WriteLine("MNIST Data Loading..."); MnistData <Real> mnistData = new MnistData <Real>(); Console.WriteLine("Training Start..."); //ネットワークの構成を FunctionStack に書き連ねる FunctionStack <Real> Layer1 = new FunctionStack <Real>( new Linear <Real>(28 * 28, 256, name: "l1 Linear"), new BatchNormalization <Real>(256, name: "l1 Norm"), new ReLU <Real>(name: "l1 ReLU") ); FunctionStack <Real> Layer2 = new FunctionStack <Real>( new Linear <Real>(256, 256, name: "l2 Linear"), new BatchNormalization <Real>(256, name: "l2 Norm"), new ReLU <Real>(name: "l2 ReLU") ); FunctionStack <Real> Layer3 = new FunctionStack <Real>( new Linear <Real>(256, 256, name: "l3 Linear"), new BatchNormalization <Real>(256, name: "l3 Norm"), new ReLU <Real>(name: "l3 ReLU") ); FunctionStack <Real> Layer4 = new FunctionStack <Real>( new Linear <Real>(256, 10, name: "l4 Linear") ); //FunctionStack自身もFunctionとして積み上げられる FunctionStack <Real> nn = new FunctionStack <Real> ( Layer1, Layer2, Layer3, Layer4 ); FunctionStack <Real> cDNI1 = new FunctionStack <Real>( new Linear <Real>(256 + 10, 1024, name: "cDNI1 Linear1"), new BatchNormalization <Real>(1024, name: "cDNI1 Nrom1"), new ReLU <Real>(name: "cDNI1 ReLU1"), new Linear <Real>(1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack <Real> cDNI2 = new FunctionStack <Real>( new Linear <Real>(256 + 10, 1024, name: "cDNI2 Linear1"), new BatchNormalization <Real>(1024, name: "cDNI2 Nrom1"), new ReLU <Real>(name: "cDNI2 ReLU1"), new Linear <Real>(1024, 256, initialW: new Real[1024, 256], name: "cDNI2 Linear3") ); FunctionStack <Real> cDNI3 = new FunctionStack <Real>( new Linear <Real>(256 + 10, 1024, name: "cDNI3 Linear1"), new BatchNormalization <Real>(1024, name: "cDNI3 Nrom1"), new ReLU <Real>(name: "cDNI3 ReLU1"), new Linear <Real>(1024, 256, initialW: new Real[1024, 256], name: "cDNI3 Linear3") ); //optimizerを宣言 //optimizerを宣言 Adam <Real> L1adam = new Adam <Real>(0.00003f); Adam <Real> L2adam = new Adam <Real>(0.00003f); Adam <Real> L3adam = new Adam <Real>(0.00003f); Adam <Real> L4adam = new Adam <Real>(0.00003f); L1adam.SetUp(Layer1); L2adam.SetUp(Layer2); L3adam.SetUp(Layer3); L4adam.SetUp(Layer4); Adam <Real> cDNI1adam = new Adam <Real>(0.00003f); Adam <Real> cDNI2adam = new Adam <Real>(0.00003f); Adam <Real> cDNI3adam = new Adam <Real>(0.00003f); cDNI1adam.SetUp(cDNI1); cDNI2adam.SetUp(cDNI2); cDNI3adam.SetUp(cDNI3); for (int epoch = 0; epoch < 10; epoch++) { Console.WriteLine("epoch " + (epoch + 1)); //全体での誤差を集計 Real totalLoss = 0; Real cDNI1totalLoss = 0; Real cDNI2totalLoss = 0; Real cDNI3totalLoss = 0; long totalLossCount = 0; long cDNI1totalLossCount = 0; long cDNI2totalLossCount = 0; long cDNI3totalLossCount = 0; //何回バッチを実行するか for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { //訓練データからランダムにデータを取得 TestDataSet <Real> datasetX = mnistData.Train.GetRandomDataSet(BATCH_DATA_COUNT); //第一層を実行 NdArray <Real> layer1ForwardResult = Layer1.Forward(datasetX.Data)[0]; ResultDataSet layer1ResultDataSet = new ResultDataSet(layer1ForwardResult, datasetX.Label); //第一層の傾きを取得 NdArray <Real> cDNI1Result = cDNI1.Forward(layer1ResultDataSet.GetTrainData())[0]; //第一層の傾きを適用 layer1ForwardResult.Grad = cDNI1Result.Data.ToArray(); //第一層を更新 Layer1.Backward(layer1ForwardResult); layer1ForwardResult.ParentFunc = null; L1adam.Update(); //第二層を実行 NdArray <Real> layer2ForwardResult = Layer2.Forward(layer1ResultDataSet.Result)[0]; ResultDataSet layer2ResultDataSet = new ResultDataSet(layer2ForwardResult, layer1ResultDataSet.Label); //第二層の傾きを取得 NdArray <Real> cDNI2Result = cDNI2.Forward(layer2ResultDataSet.GetTrainData())[0]; //第二層の傾きを適用 layer2ForwardResult.Grad = cDNI2Result.Data.ToArray(); //第二層を更新 Layer2.Backward(layer2ForwardResult); layer2ForwardResult.ParentFunc = null; //第一層用のcDNIの学習を実行 Real cDNI1loss = new MeanSquaredError <Real>().Evaluate(cDNI1Result, new NdArray <Real>(layer1ResultDataSet.Result.Grad, cDNI1Result.Shape, cDNI1Result.BatchCount)); L2adam.Update(); cDNI1.Backward(cDNI1Result); cDNI1adam.Update(); cDNI1totalLoss += cDNI1loss; cDNI1totalLossCount++; //第三層を実行 NdArray <Real> layer3ForwardResult = Layer3.Forward(layer2ResultDataSet.Result)[0]; ResultDataSet layer3ResultDataSet = new ResultDataSet(layer3ForwardResult, layer2ResultDataSet.Label); //第三層の傾きを取得 NdArray <Real> cDNI3Result = cDNI3.Forward(layer3ResultDataSet.GetTrainData())[0]; //第三層の傾きを適用 layer3ForwardResult.Grad = cDNI3Result.Data.ToArray(); //第三層を更新 Layer3.Backward(layer3ForwardResult); layer3ForwardResult.ParentFunc = null; //第二層用のcDNIの学習を実行 Real cDNI2loss = new MeanSquaredError <Real>().Evaluate(cDNI2Result, new NdArray <Real>(layer2ResultDataSet.Result.Grad, cDNI2Result.Shape, cDNI2Result.BatchCount)); L3adam.Update(); cDNI2.Backward(cDNI2Result); cDNI2adam.Update(); cDNI2totalLoss += cDNI2loss; cDNI2totalLossCount++; //第四層を実行 NdArray <Real> layer4ForwardResult = Layer4.Forward(layer3ResultDataSet.Result)[0]; //第四層の傾きを取得 Real sumLoss = new SoftmaxCrossEntropy <Real>().Evaluate(layer4ForwardResult, layer3ResultDataSet.Label); //第四層を更新 Layer4.Backward(layer4ForwardResult); layer4ForwardResult.ParentFunc = null; totalLoss += sumLoss; totalLossCount++; //第三層用のcDNIの学習を実行 Real cDNI3loss = new MeanSquaredError <Real>().Evaluate(cDNI3Result, new NdArray <Real>(layer3ResultDataSet.Result.Grad, cDNI3Result.Shape, cDNI3Result.BatchCount)); L4adam.Update(); cDNI3.Backward(cDNI3Result); cDNI3adam.Update(); cDNI3totalLoss += cDNI3loss; cDNI3totalLossCount++; Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT); //結果出力 Console.WriteLine("total loss " + totalLoss / totalLossCount); Console.WriteLine("local loss " + sumLoss); Console.WriteLine("\ncDNI1 total loss " + cDNI1totalLoss / cDNI1totalLossCount); Console.WriteLine("cDNI2 total loss " + cDNI2totalLoss / cDNI2totalLossCount); Console.WriteLine("cDNI3 total loss " + cDNI3totalLoss / cDNI3totalLossCount); Console.WriteLine("\ncDNI1 local loss " + cDNI1loss); Console.WriteLine("cDNI2 local loss " + cDNI2loss); Console.WriteLine("cDNI3 local loss " + cDNI3loss); //20回バッチを動かしたら精度をテストする if (i % 20 == 0) { Console.WriteLine("\nTesting..."); //テストデータからランダムにデータを取得 TestDataSet <Real> datasetY = mnistData.Eval.GetRandomDataSet(TEST_DATA_COUNT); //テストを実行 Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); Console.WriteLine("accuracy " + accuracy); } } } }
public static void Run() { // Prepare MNIST data RILogManager.Default?.SendDebug("MNIST Data Loading..."); MnistData mnistData = new MnistData(28); RILogManager.Default?.SendDebug("Training Start..."); // Write the network configuration in FunctionStack FunctionStack Layer1 = new FunctionStack("Test11 Layer 1", new Linear(true, 28 * 28, 256, name: "l1 Linear"), new BatchNormalization(true, 256, name: "l1 Norm"), new ReLU(name: "l1 ReLU") ); FunctionStack Layer2 = new FunctionStack("Test11 Layer 2", new Linear(true, 256, 256, name: "l2 Linear"), new BatchNormalization(true, 256, name: "l2 Norm"), new ReLU(name: "l2 ReLU") ); FunctionStack Layer3 = new FunctionStack("Test11 Layer 3", new Linear(true, 256, 256, name: "l3 Linear"), new BatchNormalization(true, 256, name: "l3 Norm"), new ReLU(name: "l3 ReLU") ); FunctionStack Layer4 = new FunctionStack("Test11 Layer 4", new Linear(true, 256, 10, name: "l4 Linear") ); // Function stack itself is also stacked as Function FunctionStack nn = new FunctionStack ("Test11", Layer1, Layer2, Layer3, Layer4 ); FunctionStack DNI1 = new FunctionStack("Test11 DNI1", new Linear(true, 256, 1024, name: "DNI1 Linear1"), new BatchNormalization(true, 1024, name: "DNI1 Norm1"), new ReLU(name: "DNI1 ReLU1"), new Linear(true, 1024, 1024, name: "DNI1 Linear2"), new BatchNormalization(true, 1024, name: "DNI1 Norm2"), new ReLU(name: "DNI1 ReLU2"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3") ); FunctionStack DNI2 = new FunctionStack("Test11 DNI2", new Linear(true, 256, 1024, name: "DNI2 Linear1"), new BatchNormalization(true, 1024, name: "DNI2 Norm1"), new ReLU(name: "DNI2 ReLU1"), new Linear(true, 1024, 1024, name: "DNI2 Linear2"), new BatchNormalization(true, 1024, name: "DNI2 Norm2"), new ReLU(name: "DNI2 ReLU2"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "DNI2 Linear3") ); FunctionStack DNI3 = new FunctionStack("Test11 DNI3", new Linear(true, 256, 1024, name: "DNI3 Linear1"), new BatchNormalization(true, 1024, name: "DNI3 Norm1"), new ReLU(name: "DNI3 ReLU1"), new Linear(true, 1024, 1024, name: "DNI3 Linear2"), new BatchNormalization(true, 1024, name: "DNI3 Norm2"), new ReLU(name: "DNI3 ReLU2"), new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "DNI3 Linear3") ); //optimizer Layer1.SetOptimizer(new Adam()); Layer2.SetOptimizer(new Adam()); Layer3.SetOptimizer(new Adam()); Layer4.SetOptimizer(new Adam()); DNI1.SetOptimizer(new Adam()); DNI2.SetOptimizer(new Adam()); DNI3.SetOptimizer(new Adam()); // Three generations learning for (int epoch = 0; epoch < 20; epoch++) { RILogManager.Default?.SendDebug("epoch " + (epoch + 1)); Real totalLoss = 0; Real DNI1totalLoss = 0; Real DNI2totalLoss = 0; Real DNI3totalLoss = 0; long totalLossCount = 0; long DNI1totalLossCount = 0; long DNI2totalLossCount = 0; long DNI3totalLossCount = 0; // how many times to run the batch for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++) { // Get data randomly from the training data TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT, 28, 28); // Run first tier NdArray[] layer1ForwardResult = Layer1.Forward(true, datasetX.Data); // Obtain the slope of the first layer NdArray[] DNI1Result = DNI1.Forward(true, layer1ForwardResult); // Apply the slope of the first layer layer1ForwardResult[0].Grad = DNI1Result[0].Data.ToArray(); // Update first layer Layer1.Backward(true, layer1ForwardResult); layer1ForwardResult[0].ParentFunc = null; // Backward was executed and cut off calculation graph Layer1.Update(); // Run Layer 2 NdArray[] layer2ForwardResult = Layer2.Forward(true, layer1ForwardResult); // Get the inclination of the second layer NdArray[] DNI2Result = DNI2.Forward(true, layer2ForwardResult); // Apply the slope of the second layer layer2ForwardResult[0].Grad = DNI2Result[0].Data.ToArray(); // Update layer 2 Layer2.Backward(true, layer2ForwardResult); layer2ForwardResult[0].ParentFunc = null; // Learn DNI for first tier Real DNI1loss = new MeanSquaredError().Evaluate(DNI1Result, new NdArray(layer1ForwardResult[0].Grad, DNI1Result[0].Shape, DNI1Result[0].BatchCount)); Layer2.Update(); DNI1.Backward(true, DNI1Result); DNI1.Update(); DNI1totalLoss += DNI1loss; DNI1totalLossCount++; // run layer 3 NdArray[] layer3ForwardResult = Layer3.Forward(true, layer2ForwardResult); // Get the inclination of the third layer NdArray[] DNI3Result = DNI3.Forward(true, layer3ForwardResult); // Apply the slope of the third layer layer3ForwardResult[0].Grad = DNI3Result[0].Data.ToArray(); // Update layer 3 Layer3.Backward(true, layer3ForwardResult); layer3ForwardResult[0].ParentFunc = null; // Run DNI learning for layer 2 Real DNI2loss = new MeanSquaredError().Evaluate(DNI2Result, new NdArray(layer2ForwardResult[0].Grad, DNI2Result[0].Shape, DNI2Result[0].BatchCount)); Layer3.Update(); DNI2.Backward(true, DNI2Result); DNI2.Update(); DNI2totalLoss += DNI2loss; DNI2totalLossCount++; // run layer 4 NdArray[] layer4ForwardResult = Layer4.Forward(true, layer3ForwardResult); // Obtain the slope of the fourth layer Real sumLoss = new SoftmaxCrossEntropy().Evaluate(layer4ForwardResult, datasetX.Label); // Update fourth layer Layer4.Backward(true, layer4ForwardResult); layer4ForwardResult[0].ParentFunc = null; totalLoss += sumLoss; totalLossCount++; // Run DNI learning for layer 3 Real DNI3loss = new MeanSquaredError().Evaluate(DNI3Result, new NdArray(layer3ForwardResult[0].Grad, DNI3Result[0].Shape, DNI3Result[0].BatchCount)); Layer4.Update(); DNI3.Backward(true, DNI3Result); DNI3.Update(); DNI3totalLoss += DNI3loss; DNI3totalLossCount++; RILogManager.Default?.SendDebug("batch count " + i + "/" + TRAIN_DATA_COUNT); RILogManager.Default?.SendDebug("total loss " + totalLoss / totalLossCount); RILogManager.Default?.SendDebug("local loss " + sumLoss); RILogManager.Default?.SendDebug("DNI1 total loss " + DNI1totalLoss / DNI1totalLossCount); RILogManager.Default?.SendDebug("DNI2 total loss " + DNI2totalLoss / DNI2totalLossCount); RILogManager.Default?.SendDebug("DNI3 total loss " + DNI3totalLoss / DNI3totalLossCount); RILogManager.Default?.SendDebug("DNI1 local loss " + DNI1loss); RILogManager.Default?.SendDebug("DNI2 local loss " + DNI2loss); RILogManager.Default?.SendDebug("DNI3 local loss " + DNI3loss); // Test the accuracy if you move the batch 20 times if (i % 20 == 0) { RILogManager.Default?.SendDebug("Testing..."); // Get data randomly from test data TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT, 28); // Run test Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label); RILogManager.Default?.SendDebug("accuracy " + accuracy); } } } }
static void Main(string[] args) { Console.WriteLine("XOR Test"); int seed; using (var rng = new RNGCryptoServiceProvider()) { var buffer = new byte[sizeof(int)]; rng.GetBytes(buffer); seed = BitConverter.ToInt32(buffer, 0); } RandomProvider.SetSeed(seed); var filename = "XOR.xml"; var serializer = new DataContractSerializer(typeof(IEnumerable <Layer>), new Type[] { typeof(FullyConnected), typeof(Activation), typeof(Sigmoid) }); var patternList = new List <ValueTuple <double[], double[]> >(); var accuracyList = new List <double>(); var lossList = new List <double>(); Model model; patternList.Add(ValueTuple.Create <double[], double[]>(new double[] { 0, 0 }, new double[] { 0 })); patternList.Add(ValueTuple.Create <double[], double[]>(new double[] { 0, 1 }, new double[] { 1 })); patternList.Add(ValueTuple.Create <double[], double[]>(new double[] { 1, 0 }, new double[] { 1 })); patternList.Add(ValueTuple.Create <double[], double[]>(new double[] { 1, 1 }, new double[] { 0 })); if (File.Exists(filename)) { using (XmlReader xmlReader = XmlReader.Create(filename)) { model = new Model((IEnumerable <Layer>)serializer.ReadObject(xmlReader)); } } else { int epochs = 10000; int iterations = 1; ILossFunction lossFunction = new MeanSquaredError(); model = new Model( new FullyConnected(2, (fanIn, fanOut) => RandomProvider.GetRandom().NextDouble(), new Activation(new Sigmoid(), new FullyConnected(2, 1, (fanIn, fanOut) => RandomProvider.GetRandom().NextDouble())))); model.Stepped += (sender, e) => { double tptn = 0.0; patternList.ForEach(tuple => { if (ArgMax(model.Predict(tuple.Item1)) == ArgMax(tuple.Item2)) { tptn += 1.0; } }); var accuracy = tptn / patternList.Count; var loss = model.GetLoss(patternList, lossFunction); accuracyList.Add(accuracy); lossList.Add(loss); if (iterations % 2500 == 0) { Console.WriteLine("Epoch {0}/{1}", iterations, epochs); Console.WriteLine("Accuracy: {0}, Loss: {1}", accuracy, loss); } iterations++; }; Console.WriteLine("Training..."); var stopwatch = Stopwatch.StartNew(); model.Fit(patternList, epochs, 32, new Momentum(0.5, 0.1), lossFunction); stopwatch.Stop(); Console.WriteLine("Done ({0}).", stopwatch.Elapsed.ToString()); } foreach (var tuple in patternList) { Console.WriteLine("{0}->{1}", String.Join(",", tuple.Item1.Aggregate <double, List <string> >(new List <string>(), (x, y) => { x.Add(y.ToString()); return(x); })), String.Join(",", model.Predict(tuple.Item1).Aggregate <double, List <string> >(new List <string>(), (x, y) => { x.Add(y.ToString()); return(x); }))); } XmlWriterSettings settings = new XmlWriterSettings(); settings.Indent = true; settings.Encoding = new System.Text.UTF8Encoding(false); using (XmlWriter xmlWriter = XmlWriter.Create(filename, settings)) { serializer.WriteObject(xmlWriter, model.Layers); xmlWriter.Flush(); } }