예제 #1
0
        public static void Run()
        {
            //訓練回数
            const int learningCount = 10000;

            //訓練データ
            Real[][] trainData =
            {
                new Real[] { 0, 0 },
                new Real[] { 1, 0 },
                new Real[] { 0, 1 },
                new Real[] { 1, 1 }
            };

            //訓練データラベル
            Real[][] trainLabel =
            {
                new Real[] { 0 },
                new Real[] { 1 },
                new Real[] { 1 },
                new Real[] { 0 }
            };

            //ネットワークの構成を FunctionStack に書き連ねる
            FunctionStack <Real> nn = new FunctionStack <Real>(
                new Linear <Real>(2, 2, name: "l1 Linear"),
                new Sigmoid <Real>(name: "l1 ReLU"),
                new Linear <Real>(2, 1, name: "l2 Linear")
                );

            //optimizerを宣言(今回はAdam)
            Adam <Real> adam = new Adam <Real>();

            adam.SetUp(nn);

            //訓練ループ
            Console.WriteLine("Training...");
            for (int i = 0; i < learningCount; i++)
            {
                //今回はロス関数にMeanSquaredErrorを使う
                Trainer.Train(nn, trainData[0], trainLabel[0], new MeanSquaredError <Real>());
                Trainer.Train(nn, trainData[1], trainLabel[1], new MeanSquaredError <Real>());
                Trainer.Train(nn, trainData[2], trainLabel[2], new MeanSquaredError <Real>());
                Trainer.Train(nn, trainData[3], trainLabel[3], new MeanSquaredError <Real>());

                //訓練後に毎回更新を実行しなければ、ミニバッチとして更新できる
                adam.Update();
            }

            //訓練結果を表示
            Console.WriteLine("Test Start...");
            foreach (Real[] val in trainData)
            {
                NdArray <Real> result = nn.Predict(val)[0];
                Console.WriteLine(val[0] + " xor " + val[1] + " = " + (result.Data[0] > 0.5 ? 1 : 0) + " " + result);
            }
        }
예제 #2
0
        public static void Run()
        {
            DataMaker      dataMaker = new DataMaker(STEPS_PER_CYCLE, NUMBER_OF_CYCLES);
            NdArray <Real> trainData = dataMaker.Make();

            //ネットワークの構成は FunctionStack に書き連ねる
            FunctionStack <Real> model = new FunctionStack <Real>(
                new Linear <Real>(1, 5, name: "Linear l1"),
                new LSTM <Real>(5, 5, name: "LSTM l2"),
                new Linear <Real>(5, 1, name: "Linear l3")
                );

            //optimizerを宣言
            Adam <Real> adam = new Adam <Real>();

            adam.SetUp(model);

            //訓練ループ
            Console.WriteLine("Training...");
            for (int epoch = 0; epoch < TRAINING_EPOCHS; epoch++)
            {
                NdArray <Real>[] sequences = dataMaker.MakeMiniBatch(trainData, MINI_BATCH_SIZE, LENGTH_OF_SEQUENCE);

                Real loss = ComputeLoss(model, sequences);

                adam.Update();

                model.ResetState();

                if (epoch != 0 && epoch % DISPLAY_EPOCH == 0)
                {
                    Console.WriteLine("[{0}]training loss:\t{1}", epoch, loss);
                }
            }

            Console.WriteLine("Testing...");
            NdArray <Real>[] testSequences = dataMaker.MakeMiniBatch(trainData, MINI_BATCH_SIZE, LENGTH_OF_SEQUENCE);

            int sample_index = 45;

            predict(testSequences[sample_index], model, PREDICTION_LENGTH);
        }
예제 #3
0
        public static void Run()
        {
            //MNISTのデータを用意する
            Console.WriteLine("MNIST Data Loading...");
            MnistData <Real> mnistData = new MnistData <Real>();


            Console.WriteLine("Training Start...");

            //ネットワークの構成を FunctionStack に書き連ねる
            FunctionStack <Real> Layer1 = new FunctionStack <Real>(
                new Linear <Real>(28 * 28, 256, name: "l1 Linear"),
                new BatchNormalization <Real>(256, name: "l1 Norm"),
                new ReLU <Real>(name: "l1 ReLU")
                );

            FunctionStack <Real> Layer2 = new FunctionStack <Real>(
                new Linear <Real>(256, 256, name: "l2 Linear"),
                new BatchNormalization <Real>(256, name: "l2 Norm"),
                new ReLU <Real>(name: "l2 ReLU")
                );

            FunctionStack <Real> Layer3 = new FunctionStack <Real>(
                new Linear <Real>(256, 256, name: "l3 Linear"),
                new BatchNormalization <Real>(256, name: "l3 Norm"),
                new ReLU <Real>(name: "l3 ReLU")
                );

            FunctionStack <Real> Layer4 = new FunctionStack <Real>(
                new Linear <Real>(256, 10, name: "l4 Linear")
                );

            //FunctionStack自身もFunctionとして積み上げられる
            FunctionStack <Real> nn = new FunctionStack <Real>
                                      (
                Layer1,
                Layer2,
                Layer3,
                Layer4
                                      );

            FunctionStack <Real> DNI1 = new FunctionStack <Real>(
                new Linear <Real>(256, 1024, name: "DNI1 Linear1"),
                new BatchNormalization <Real>(1024, name: "DNI1 Nrom1"),
                new ReLU <Real>(name: "DNI1 ReLU1"),
                new Linear <Real>(1024, 1024, name: "DNI1 Linear2"),
                new BatchNormalization <Real>(1024, name: "DNI1 Nrom2"),
                new ReLU <Real>(name: "DNI1 ReLU2"),
                new Linear <Real>(1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3")
                );

            FunctionStack <Real> DNI2 = new FunctionStack <Real>(
                new Linear <Real>(256, 1024, name: "DNI2 Linear1"),
                new BatchNormalization <Real>(1024, name: "DNI2 Nrom1"),
                new ReLU <Real>(name: "DNI2 ReLU1"),
                new Linear <Real>(1024, 1024, name: "DNI2 Linear2"),
                new BatchNormalization <Real>(1024, name: "DNI2 Nrom2"),
                new ReLU <Real>(name: "DNI2 ReLU2"),
                new Linear <Real>(1024, 256, initialW: new Real[1024, 256], name: "DNI2 Linear3")
                );

            FunctionStack <Real> DNI3 = new FunctionStack <Real>(
                new Linear <Real>(256, 1024, name: "DNI3 Linear1"),
                new BatchNormalization <Real>(1024, name: "DNI3 Nrom1"),
                new ReLU <Real>(name: "DNI3 ReLU1"),
                new Linear <Real>(1024, 1024, name: "DNI3 Linear2"),
                new BatchNormalization <Real>(1024, name: "DNI3 Nrom2"),
                new ReLU <Real>(name: "DNI3 ReLU2"),
                new Linear <Real>(1024, 256, initialW: new Real[1024, 256], name: "DNI3 Linear3")
                );

            //optimizerを宣言
            Adam <Real> L1adam = new Adam <Real>();
            Adam <Real> L2adam = new Adam <Real>();
            Adam <Real> L3adam = new Adam <Real>();
            Adam <Real> L4adam = new Adam <Real>();

            L1adam.SetUp(Layer1);
            L2adam.SetUp(Layer2);
            L3adam.SetUp(Layer3);
            L4adam.SetUp(Layer4);

            Adam <Real> DNI1adam = new Adam <Real>();
            Adam <Real> DNI2adam = new Adam <Real>();
            Adam <Real> DNI3adam = new Adam <Real>();

            DNI1adam.SetUp(DNI1);
            DNI2adam.SetUp(DNI2);
            DNI3adam.SetUp(DNI3);

            //三世代学習
            for (int epoch = 0; epoch < 20; epoch++)
            {
                Console.WriteLine("epoch " + (epoch + 1));

                Real totalLoss     = 0;
                Real DNI1totalLoss = 0;
                Real DNI2totalLoss = 0;
                Real DNI3totalLoss = 0;

                long totalLossCount     = 0;
                long DNI1totalLossCount = 0;
                long DNI2totalLossCount = 0;
                long DNI3totalLossCount = 0;

                //何回バッチを実行するか
                for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++)
                {
                    //訓練データからランダムにデータを取得
                    TestDataSet <Real> datasetX = mnistData.Train.GetRandomDataSet(BATCH_DATA_COUNT);

                    //第一層を実行
                    NdArray <Real> layer1ForwardResult = Layer1.Forward(datasetX.Data)[0];

                    //第一層の傾きを取得
                    NdArray <Real> DNI1Result = DNI1.Forward(layer1ForwardResult)[0];

                    //第一層の傾きを適用
                    layer1ForwardResult.Grad = DNI1Result.Data.ToArray();

                    //第一層を更新
                    Layer1.Backward(layer1ForwardResult);
                    layer1ForwardResult.ParentFunc = null; //Backwardを実行したので計算グラフを切っておく
                    L1adam.Update();

                    //第二層を実行
                    NdArray <Real> layer2ForwardResult = Layer2.Forward(layer1ForwardResult)[0];

                    //第二層の傾きを取得
                    NdArray <Real> DNI2Result = DNI2.Forward(layer2ForwardResult)[0];

                    //第二層の傾きを適用
                    layer2ForwardResult.Grad = DNI2Result.Data.ToArray();

                    //第二層を更新
                    Layer2.Backward(layer2ForwardResult);
                    layer2ForwardResult.ParentFunc = null;

                    //第一層用のDNIの学習を実行
                    Real DNI1loss = new MeanSquaredError <Real>().Evaluate(DNI1Result, new NdArray <Real>(layer1ForwardResult.Grad, DNI1Result.Shape, DNI1Result.BatchCount));

                    L2adam.Update();

                    DNI1.Backward(DNI1Result);
                    DNI1adam.Update();

                    DNI1totalLoss += DNI1loss;
                    DNI1totalLossCount++;

                    //第三層を実行
                    NdArray <Real> layer3ForwardResult = Layer3.Forward(layer2ForwardResult)[0];

                    //第三層の傾きを取得
                    NdArray <Real> DNI3Result = DNI3.Forward(layer3ForwardResult)[0];

                    //第三層の傾きを適用
                    layer3ForwardResult.Grad = DNI3Result.Data.ToArray();

                    //第三層を更新
                    Layer3.Backward(layer3ForwardResult);
                    layer3ForwardResult.ParentFunc = null;

                    //第二層用のDNIの学習を実行
                    Real DNI2loss = new MeanSquaredError <Real>().Evaluate(DNI2Result, new NdArray <Real>(layer2ForwardResult.Grad, DNI2Result.Shape, DNI2Result.BatchCount));

                    L3adam.Update();

                    DNI2.Backward(DNI2Result);
                    DNI2adam.Update();

                    DNI2totalLoss += DNI2loss;
                    DNI2totalLossCount++;

                    //第四層を実行
                    NdArray <Real> layer4ForwardResult = Layer4.Forward(layer3ForwardResult)[0];

                    //第四層の傾きを取得
                    Real sumLoss = new SoftmaxCrossEntropy <Real>().Evaluate(layer4ForwardResult, datasetX.Label);

                    //第四層を更新
                    Layer4.Backward(layer4ForwardResult);
                    layer4ForwardResult.ParentFunc = null;

                    totalLoss += sumLoss;
                    totalLossCount++;

                    //第三層用のDNIの学習を実行
                    Real DNI3loss = new MeanSquaredError <Real>().Evaluate(DNI3Result, new NdArray <Real>(layer3ForwardResult.Grad, DNI3Result.Shape, DNI3Result.BatchCount));

                    L4adam.Update();

                    DNI3.Backward(DNI3Result);
                    DNI3adam.Update();

                    DNI3totalLoss += DNI3loss;
                    DNI3totalLossCount++;

                    Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT);
                    //結果出力
                    Console.WriteLine("total loss " + totalLoss / totalLossCount);
                    Console.WriteLine("local loss " + sumLoss);

                    Console.WriteLine("\nDNI1 total loss " + DNI1totalLoss / DNI1totalLossCount);
                    Console.WriteLine("DNI2 total loss " + DNI2totalLoss / DNI2totalLossCount);
                    Console.WriteLine("DNI3 total loss " + DNI3totalLoss / DNI3totalLossCount);

                    Console.WriteLine("\nDNI1 local loss " + DNI1loss);
                    Console.WriteLine("DNI2 local loss " + DNI2loss);
                    Console.WriteLine("DNI3 local loss " + DNI3loss);

                    //20回バッチを動かしたら精度をテストする
                    if (i % 20 == 0)
                    {
                        Console.WriteLine("\nTesting...");

                        //テストデータからランダムにデータを取得
                        TestDataSet <Real> datasetY = mnistData.Eval.GetRandomDataSet(TEST_DATA_COUNT);

                        //テストを実行
                        Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label);
                        Console.WriteLine("accuracy " + accuracy);
                    }
                }
            }
        }
예제 #4
0
        public void AdamRandomTest()
        {
            Python.Initialize();
            Chainer.Initialize();

            int inputCount  = Mother.Dice.Next(2, 50);
            int outputCount = Mother.Dice.Next(2, 50);
            int batchCount  = Mother.Dice.Next(1, 5);

            Real[,] input   = Initializer.GetRandomValues <Real[, ]>(batchCount, inputCount);
            Real[,] dummyGy = Initializer.GetRandomValues <Real[, ]>(batchCount, outputCount);
            Real[,] w       = Initializer.GetRandomValues <Real[, ]>(outputCount, inputCount);
            Real[] b = Initializer.GetRandomValues <Real[]>(outputCount);


            float alpha = (float)Mother.Dice.NextDouble(); //0.001f
            float beta1 = (float)Mother.Dice.NextDouble(); //0.9f;
            float beta2 = (float)Mother.Dice.NextDouble(); //0.999f;
            float eps   = (float)Mother.Dice.NextDouble(); //1e-08f;
            float eta   = (float)Mother.Dice.NextDouble(); //1.0f;

            //Chainer
            NChainer.Linear <Real> cLinear = new NChainer.Linear <Real>(inputCount, outputCount, false, w, b);
            NChainer.Adam <Real>   cAdam   = new NChainer.Adam <Real>(alpha, beta1, beta2, eps, eta);
            cAdam.Setup(cLinear);

            Variable <Real> cX = new Variable <Real>(input);

            Variable <Real> cY = cLinear.Forward(cX);

            cY.Grad = dummyGy;

            cY.Backward();

            cAdam.Update();

            //KelpNet
            KelpNet.CL.Linear <Real> linear = new KelpNet.CL.Linear <Real>(inputCount, outputCount, false, w, b);
            KelpNet.Adam <Real>      adam   = new Adam <Real>(alpha, beta1, beta2, eps, eta);
            adam.SetUp(linear);

            NdArray <Real> x = new NdArray <Real>(input, asBatch: true);

            NdArray <Real> y = linear.Forward(x)[0];

            y.Grad = dummyGy.Flatten();

            y.Backward();

            adam.Update();


            Real[] cW = ((Real[, ])cLinear.W.Data).Flatten();
            Real[] cb = (Real[])cLinear.b.Data;

            //許容範囲を算出
            Real delta = 0.00001f;

            //W.grad
            Assert.AreEqual(cW.Length, linear.Weight.Data.Length);
            for (int i = 0; i < linear.Weight.Data.Length; i++)
            {
                Assert.AreEqual(cW[i], linear.Weight.Data[i], delta);
            }

            //b.grad
            Assert.AreEqual(cb.Length, linear.Bias.Data.Length);
            for (int i = 0; i < linear.Bias.Data.Length; i++)
            {
                Assert.AreEqual(cb[i], linear.Bias.Data[i], delta);
            }
        }
예제 #5
0
        public static void Run()
        {
            Console.WriteLine("Build Vocabulary.");

            Vocabulary vocabulary = new Vocabulary();
            string     trainPath  = InternetFileDownloader.Donwload(DOWNLOAD_URL + TRAIN_FILE, TRAIN_FILE, TRAIN_FILE_HASH);
            string     testPath   = InternetFileDownloader.Donwload(DOWNLOAD_URL + TEST_FILE, TEST_FILE, TEST_FILE_HASH);

            int[] trainData = vocabulary.LoadData(trainPath);
            int[] testData  = vocabulary.LoadData(testPath);

            int nVocab = vocabulary.Length;

            Console.WriteLine("Done.");

            Console.WriteLine("Network Initilizing.");
            FunctionStack <Real> model = new FunctionStack <Real>(
                new EmbedID <Real>(nVocab, N_UNITS, name: "l1 EmbedID"),
                new Linear <Real>(N_UNITS, N_UNITS, name: "l2 Linear"),
                new TanhActivation <Real>("l2 Tanh"),
                new Linear <Real>(N_UNITS, nVocab, name: "l3 Linear"),
                new Softmax <Real>("l3 Sonftmax")
                );

            Adam <Real> adam = new Adam <Real>();

            adam.SetUp(model);

            List <int> s = new List <int>();

            Console.WriteLine("Train Start.");
            SoftmaxCrossEntropy <Real> softmaxCrossEntropy = new SoftmaxCrossEntropy <Real>();

            for (int epoch = 0; epoch < TRAINING_EPOCHS; epoch++)
            {
                for (int pos = 0; pos < trainData.Length; pos++)
                {
                    NdArray <Real> h = new NdArray <Real>(new Real[N_UNITS]);

                    int id = trainData[pos];
                    s.Add(id);

                    if (id == vocabulary.EosID)
                    {
                        Real accumloss = 0;
                        Stack <NdArray <Real> > tmp = new Stack <NdArray <Real> >();

                        for (int i = 0; i < s.Count; i++)
                        {
                            NdArray <int> tx = new NdArray <int>(i == s.Count - 1 ? new[] { vocabulary.EosID } : new[] { s[i + 1] });

                            //l1 EmbedID
                            NdArray <Real> l1 = model.Functions[0].Forward(s[i])[0];

                            //l2 Linear
                            NdArray <Real> l2 = model.Functions[1].Forward(h)[0];

                            //Add
                            NdArray <Real> xK = l1 + l2;

                            //l2 Tanh
                            h = model.Functions[2].Forward(xK)[0];

                            //l3 Linear
                            NdArray <Real> h2 = model.Functions[3].Forward(h)[0];

                            Real loss = softmaxCrossEntropy.Evaluate(h2, tx);
                            tmp.Push(h2);
                            accumloss += loss;
                        }

                        Console.WriteLine(accumloss);

                        for (int i = 0; i < s.Count; i++)
                        {
                            model.Backward(tmp.Pop());
                        }

                        adam.Update();

                        s.Clear();
                    }

                    if (pos % 100 == 0)
                    {
                        Console.WriteLine(pos + "/" + trainData.Length + " finished");
                    }
                }
            }

            Console.WriteLine("Test Start.");

            Real       sum     = 0;
            int        wnum    = 0;
            List <int> ts      = new List <int>();
            bool       unkWord = false;

            for (int pos = 0; pos < 1000; pos++)
            {
                int id = testData[pos];
                ts.Add(id);

                if (id > trainData.Length)
                {
                    unkWord = true;
                }

                if (id == vocabulary.EosID)
                {
                    if (!unkWord)
                    {
                        Console.WriteLine("pos" + pos);
                        Console.WriteLine("tsLen" + ts.Count);
                        Console.WriteLine("sum" + sum);
                        Console.WriteLine("wnum" + wnum);

                        sum  += CalPs(model, ts);
                        wnum += ts.Count - 1;
                    }
                    else
                    {
                        unkWord = false;
                    }

                    ts.Clear();
                }
            }

            Console.WriteLine(Math.Pow(2.0f, sum / wnum));
        }
예제 #6
0
        public void AdamRandomTest()
        {
            Python.Initialize();
            Chainer.Initialize();

            int inputCount = Mother.Dice.Next(2, 50);
            int outputCount = Mother.Dice.Next(2, 50);
            int batchCount = Mother.Dice.Next(1, 5);

            Real[,] input = (Real[,])Initializer.GetRealNdArray(new[] { batchCount, inputCount });
            Real[,] dummyGy = (Real[,])Initializer.GetRealNdArray(new[] { batchCount, outputCount });
            Real[,] w = (Real[,])Initializer.GetRealNdArray(new[] { outputCount, inputCount });
            Real[] b = Initializer.GetRealArray(outputCount);


            float alpha = (float)Mother.Dice.NextDouble(); //0.001f
            float beta1 = (float)Mother.Dice.NextDouble(); //0.9f;
            float beta2 = (float)Mother.Dice.NextDouble(); //0.999f;
            float eps = (float)Mother.Dice.NextDouble(); //1e-08f;
            float eta = (float)Mother.Dice.NextDouble(); //1.0f;

            //Chainer
            NChainer.Linear<Real> cLinear = new NChainer.Linear<Real>(inputCount, outputCount, false, Real.ToBaseNdArray(w), Real.ToBaseArray(b));
            NChainer.Adam<Real> cAdam = new NChainer.Adam<Real>(alpha, beta1, beta2, eps, eta);
            cAdam.Setup(cLinear);

            Variable<Real> cX = new Variable<Real>(Real.ToBaseNdArray(input));

            Variable<Real> cY = cLinear.Forward(cX);
            cY.Grad = Real.ToBaseNdArray(dummyGy);

            cY.Backward();

            cAdam.Update();

            //KelpNet
            KelpNet.CL.Linear linear = new KelpNet.CL.Linear(inputCount, outputCount, false, w, b);
            KelpNet.Adam adam = new Adam(alpha, beta1, beta2, eps, eta);
            adam.SetUp(linear);

            NdArray x = new NdArray(Real.ToRealArray(input), new[] { inputCount }, batchCount);

            NdArray y = linear.Forward(x)[0];
            y.Grad = Real.ToRealArray(dummyGy);

            y.Backward();

            adam.Update();


            Real[] cW = Real.ToRealArray((Real[,])cLinear.W.Data);
            Real[] cb = (Real[])cLinear.b.Data;

            //許容範囲を算出
            double delta = 0.00001;

            //W.grad
            Assert.AreEqual(cW.Length, linear.Weight.Data.Length);
            for (int i = 0; i < linear.Weight.Data.Length; i++)
            {
                Assert.AreEqual(cW[i], linear.Weight.Data[i], delta);
            }

            //b.grad
            Assert.AreEqual(cb.Length, linear.Bias.Data.Length);
            for (int i = 0; i < linear.Bias.Data.Length; i++)
            {
                Assert.AreEqual(cb[i], linear.Bias.Data[i], delta);
            }
        }