Esempio n. 1
0
        static double Evaluate(FunctionStack model, int[] dataset)
        {
            FunctionStack predictModel = (FunctionStack)model.Clone();

            predictModel.ResetState();

            Real totalLoss      = 0;
            long totalLossCount = 0;

            for (int i = 0; i < dataset.Length - 1; i++)
            {
                NdArray x = new NdArray(new[] { 1 }, BATCH_SIZE);
                NdArray t = new NdArray(new[] { 1 }, BATCH_SIZE);

                for (int j = 0; j < BATCH_SIZE; j++)
                {
                    x.Data[j] = dataset[j + i];
                    t.Data[j] = dataset[j + i + 1];
                }

                Real sumLoss = new SoftmaxCrossEntropy().Evaluate(predictModel.Forward(x), t);
                totalLoss += sumLoss;
                totalLossCount++;
            }

            //calc perplexity
            return(Math.Exp(totalLoss / (totalLossCount - 1)));
        }
Esempio n. 2
0
        static Real Evaluate(FunctionStack <Real> model, int[] dataset)
        {
            FunctionStack <Real> predictModel = DeepCopyHelper <Real> .DeepCopy(model);

            predictModel.ResetState();

            Real totalLoss      = 0;
            long totalLossCount = 0;

            for (int i = 0; i < dataset.Length - 1; i++)
            {
                NdArray <Real> x = new NdArray <Real>(new[] { 1 }, BATCH_SIZE);
                NdArray <int>  t = new NdArray <int>(new[] { 1 }, BATCH_SIZE);

                for (int j = 0; j < BATCH_SIZE; j++)
                {
                    x.Data[j] = dataset[j + i];
                    t.Data[j] = dataset[j + i + 1];
                }

                NdArray <Real> result  = predictModel.Forward(x)[0];
                Real           sumLoss = new SoftmaxCrossEntropy <Real>().Evaluate(result, t);
                totalLoss += sumLoss;
                totalLossCount++;
            }

            //calc perplexity
            return(Math.Exp(totalLoss / (totalLossCount - 1)));
        }
Esempio n. 3
0
        public void SoftmaxCrossEntropy()
        {
            var softmax = new Const <double>(1.0, "softmax");
            var y       = new Const <double>(1.0, "y");
            var op      = new SoftmaxCrossEntropy <double>(softmax, y);

            var xml          = op.ToXml();
            var deserialized = SerializationExtensions.FromXml <double>(xml) as SoftmaxCrossEntropy <double>;

            Assert.IsNotNull(deserialized);
            Assert.AreEqual(2, deserialized.Parents.Count);
            Assert.AreEqual("softmax", (deserialized.Parents[0] as Const <double>).Name);
            Assert.AreEqual("y", (deserialized.Parents[1] as Const <double>).Name);
        }
Esempio n. 4
0
        public static void Run()
        {
            Console.WriteLine("Build Vocabulary.");

            Vocabulary vocabulary = new Vocabulary();
            string     trainPath  = InternetFileDownloader.Donwload(DOWNLOAD_URL + TRAIN_FILE, TRAIN_FILE, TRAIN_FILE_HASH);
            string     testPath   = InternetFileDownloader.Donwload(DOWNLOAD_URL + TEST_FILE, TEST_FILE, TEST_FILE_HASH);

            int[] trainData = vocabulary.LoadData(trainPath);
            int[] testData  = vocabulary.LoadData(testPath);

            int nVocab = vocabulary.Length;

            Console.WriteLine("Done.");

            Console.WriteLine("Network Initilizing.");
            FunctionStack model = new FunctionStack(
                new EmbedID(nVocab, N_UNITS, name: "l1 EmbedID"),
                new Linear(N_UNITS, N_UNITS, name: "l2 Linear"),
                new TanhActivation("l2 Tanh"),
                new Linear(N_UNITS, nVocab, name: "l3 Linear"),
                new Softmax("l3 Sonftmax")
                );

            model.SetOptimizer(new Adam());

            List <int> s = new List <int>();

            Console.WriteLine("Train Start.");
            SoftmaxCrossEntropy softmaxCrossEntropy = new SoftmaxCrossEntropy();

            for (int epoch = 0; epoch < TRAINING_EPOCHS; epoch++)
            {
                for (int pos = 0; pos < trainData.Length; pos++)
                {
                    NdArray h = new NdArray(new Real[N_UNITS]);

                    int id = trainData[pos];
                    s.Add(id);

                    if (id == vocabulary.EosID)
                    {
                        Real            accumloss = 0;
                        Stack <NdArray> tmp       = new Stack <NdArray>();

                        for (int i = 0; i < s.Count; i++)
                        {
                            int tx = i == s.Count - 1 ? vocabulary.EosID : s[i + 1];

                            //l1 EmbedID
                            NdArray l1 = model.Functions[0].Forward(s[i])[0];

                            //l2 Linear
                            NdArray l2 = model.Functions[1].Forward(h)[0];

                            //Add
                            NdArray xK = l1 + l2;

                            //l2 Tanh
                            h = model.Functions[2].Forward(xK)[0];

                            //l3 Linear
                            NdArray h2 = model.Functions[3].Forward(h)[0];

                            Real loss = softmaxCrossEntropy.Evaluate(h2, tx);
                            tmp.Push(h2);
                            accumloss += loss;
                        }

                        Console.WriteLine(accumloss);

                        for (int i = 0; i < s.Count; i++)
                        {
                            model.Backward(tmp.Pop());
                        }

                        model.Update();
                        s.Clear();
                    }

                    if (pos % 100 == 0)
                    {
                        Console.WriteLine(pos + "/" + trainData.Length + " finished");
                    }
                }
            }

            Console.WriteLine("Test Start.");

            Real       sum     = 0;
            int        wnum    = 0;
            List <int> ts      = new List <int>();
            bool       unkWord = false;

            for (int pos = 0; pos < 1000; pos++)
            {
                int id = testData[pos];
                ts.Add(id);

                if (id > trainData.Length)
                {
                    unkWord = true;
                }

                if (id == vocabulary.EosID)
                {
                    if (!unkWord)
                    {
                        Console.WriteLine("pos" + pos);
                        Console.WriteLine("tsLen" + ts.Count);
                        Console.WriteLine("sum" + sum);
                        Console.WriteLine("wnum" + wnum);

                        sum  += CalPs(model, ts);
                        wnum += ts.Count - 1;
                    }
                    else
                    {
                        unkWord = false;
                    }

                    ts.Clear();
                }
            }

            Console.WriteLine(Math.Pow(2.0, sum / wnum));
        }
Esempio n. 5
0
        /// <summary>
        ///     This sample shows how to serialize and deserialize a ConvNetSharp.Flow network
        ///     1) Graph creation
        ///     2) Dummy Training (only use a single data point)
        ///     3) Serialization
        ///     4) Deserialization
        /// </summary>
        private static void Main()
        {
            var cns = new ConvNetSharp <double>();

            // 1) Graph creation
            var input = cns.PlaceHolder("x"); // input

            var dense1  = cns.Dense(input, 20) + cns.Variable(BuilderInstance <double> .Volume.From(new double[20].Populate(0.1), new Shape(20)), "bias1", true);
            var relu    = cns.Relu(dense1);
            var dense2  = cns.Dense(relu, 10) + cns.Variable(new Shape(10), "bias2", true);
            var softmax = cns.Softmax(dense2); // output

            var output = cns.PlaceHolder("y"); // ground truth
            var cost   = new SoftmaxCrossEntropy <double>(cns, softmax, output);

            var x = BuilderInstance <double> .Volume.From(new[] { 0.3, -0.5 }, new Shape(2));

            var y = BuilderInstance <double> .Volume.From(new[] { 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, new Shape(10));

            var dico = new Dictionary <string, Volume <double> > {
                { "x", x }, { "y", y }
            };

            var count     = 0;
            var optimizer = new GradientDescentOptimizer <double>(cns, 0.01);

            using (var session = new Session <double>())
            {
                session.Differentiate(cost); // computes dCost/dW at every node of the graph

                // 2) Dummy Training (only use a single data point)
                double currentCost;
                do
                {
                    currentCost = Math.Abs(session.Run(cost, dico, false).ToArray().Sum());
                    Console.WriteLine($"cost: {currentCost}");

                    session.Run(optimizer, dico);
                    count++;
                } while (currentCost > 1e-2);

                Console.WriteLine($"{count}");

                // Forward pass with original network
                var result = session.Run(softmax, new Dictionary <string, Volume <double> > {
                    { "x", x }
                });
                Console.WriteLine("probability that x is class 0: " + result.Get(0));
            }

            // 3) Serialization
            softmax.Save("MyNetwork");

            // 4) Deserialization
            var deserialized = SerializationExtensions.Load <double>("MyNetwork", false)[0]; // first element is the model (second element is the cost if it was saved along)

            using (var session = new Session <double>())
            {
                // Forward pass with deserialized network
                var result = session.Run(deserialized, new Dictionary <string, Volume <double> > {
                    { "x", x }
                });
                Console.WriteLine("probability that x is class 0: " + result.Get(0)); // This should give exactly the same result as previous network evaluation
            }

            Console.ReadLine();
        }
Esempio n. 6
0
        public static void Run()
        {
            //MNISTのデータを用意する
            Console.WriteLine("MNIST Data Loading...");
            MnistData mnistData = new MnistData();


            Console.WriteLine("Training Start...");

            //ネットワークの構成を FunctionStack に書き連ねる
            FunctionStack Layer1 = new FunctionStack(
                new Linear(28 * 28, 256, name: "l1 Linear"),
                new BatchNormalization(256, name: "l1 Norm"),
                new ReLU(name: "l1 ReLU")
                );

            FunctionStack Layer2 = new FunctionStack(
                new Linear(256, 256, name: "l2 Linear"),
                new BatchNormalization(256, name: "l2 Norm"),
                new ReLU(name: "l2 ReLU")
                );

            FunctionStack Layer3 = new FunctionStack(
                new Linear(256, 256, name: "l3 Linear"),
                new BatchNormalization(256, name: "l3 Norm"),
                new ReLU(name: "l3 ReLU")
                );

            FunctionStack Layer4 = new FunctionStack(
                new Linear(256, 10, name: "l4 Linear")
                );

            //FunctionStack自身もFunctionとして積み上げられる
            FunctionStack nn = new FunctionStack
                               (
                Layer1,
                Layer2,
                Layer3,
                Layer4
                               );

            FunctionStack DNI1 = new FunctionStack(
                new Linear(256, 1024, name: "DNI1 Linear1"),
                new BatchNormalization(1024, name: "DNI1 Nrom1"),
                new ReLU(name: "DNI1 ReLU1"),
                new Linear(1024, 1024, name: "DNI1 Linear2"),
                new BatchNormalization(1024, name: "DNI1 Nrom2"),
                new ReLU(name: "DNI1 ReLU2"),
                new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3")
                );

            FunctionStack DNI2 = new FunctionStack(
                new Linear(256, 1024, name: "DNI2 Linear1"),
                new BatchNormalization(1024, name: "DNI2 Nrom1"),
                new ReLU(name: "DNI2 ReLU1"),
                new Linear(1024, 1024, name: "DNI2 Linear2"),
                new BatchNormalization(1024, name: "DNI2 Nrom2"),
                new ReLU(name: "DNI2 ReLU2"),
                new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI2 Linear3")
                );

            FunctionStack DNI3 = new FunctionStack(
                new Linear(256, 1024, name: "DNI3 Linear1"),
                new BatchNormalization(1024, name: "DNI3 Nrom1"),
                new ReLU(name: "DNI3 ReLU1"),
                new Linear(1024, 1024, name: "DNI3 Linear2"),
                new BatchNormalization(1024, name: "DNI3 Nrom2"),
                new ReLU(name: "DNI3 ReLU2"),
                new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI3 Linear3")
                );

            //optimizerを宣言
            Layer1.SetOptimizer(new Adam());
            Layer2.SetOptimizer(new Adam());
            Layer3.SetOptimizer(new Adam());
            Layer4.SetOptimizer(new Adam());

            DNI1.SetOptimizer(new Adam());
            DNI2.SetOptimizer(new Adam());
            DNI3.SetOptimizer(new Adam());

            //三世代学習
            for (int epoch = 0; epoch < 20; epoch++)
            {
                Console.WriteLine("epoch " + (epoch + 1));

                Real totalLoss     = 0;
                Real DNI1totalLoss = 0;
                Real DNI2totalLoss = 0;
                Real DNI3totalLoss = 0;

                long totalLossCount     = 0;
                long DNI1totalLossCount = 0;
                long DNI2totalLossCount = 0;
                long DNI3totalLossCount = 0;

                //何回バッチを実行するか
                for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++)
                {
                    //訓練データからランダムにデータを取得
                    TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT);

                    //第一層を実行
                    NdArray[] layer1ForwardResult = Layer1.Forward(datasetX.Data);

                    //第一層の傾きを取得
                    NdArray[] DNI1Result = DNI1.Forward(layer1ForwardResult);

                    //第一層の傾きを適用
                    layer1ForwardResult[0].Grad = DNI1Result[0].Data.ToArray();

                    //第一層を更新
                    Layer1.Backward(layer1ForwardResult);
                    layer1ForwardResult[0].ParentFunc = null; //Backwardを実行したので計算グラフを切っておく
                    Layer1.Update();

                    //第二層を実行
                    NdArray[] layer2ForwardResult = Layer2.Forward(layer1ForwardResult);

                    //第二層の傾きを取得
                    NdArray[] DNI2Result = DNI2.Forward(layer2ForwardResult);

                    //第二層の傾きを適用
                    layer2ForwardResult[0].Grad = DNI2Result[0].Data.ToArray();

                    //第二層を更新
                    Layer2.Backward(layer2ForwardResult);
                    layer2ForwardResult[0].ParentFunc = null;

                    //第一層用のDNIの学習を実行
                    Real DNI1loss = new MeanSquaredError().Evaluate(DNI1Result, new NdArray(layer1ForwardResult[0].Grad, DNI1Result[0].Shape, DNI1Result[0].BatchCount));

                    Layer2.Update();

                    DNI1.Backward(DNI1Result);
                    DNI1.Update();

                    DNI1totalLoss += DNI1loss;
                    DNI1totalLossCount++;

                    //第三層を実行
                    NdArray[] layer3ForwardResult = Layer3.Forward(layer2ForwardResult);

                    //第三層の傾きを取得
                    NdArray[] DNI3Result = DNI3.Forward(layer3ForwardResult);

                    //第三層の傾きを適用
                    layer3ForwardResult[0].Grad = DNI3Result[0].Data.ToArray();

                    //第三層を更新
                    Layer3.Backward(layer3ForwardResult);
                    layer3ForwardResult[0].ParentFunc = null;

                    //第二層用のDNIの学習を実行
                    Real DNI2loss = new MeanSquaredError().Evaluate(DNI2Result, new NdArray(layer2ForwardResult[0].Grad, DNI2Result[0].Shape, DNI2Result[0].BatchCount));

                    Layer3.Update();

                    DNI2.Backward(DNI2Result);
                    DNI2.Update();

                    DNI2totalLoss += DNI2loss;
                    DNI2totalLossCount++;

                    //第四層を実行
                    NdArray[] layer4ForwardResult = Layer4.Forward(layer3ForwardResult);

                    //第四層の傾きを取得
                    Real sumLoss = new SoftmaxCrossEntropy().Evaluate(layer4ForwardResult, datasetX.Label);

                    //第四層を更新
                    Layer4.Backward(layer4ForwardResult);
                    layer4ForwardResult[0].ParentFunc = null;

                    totalLoss += sumLoss;
                    totalLossCount++;

                    //第三層用のDNIの学習を実行
                    Real DNI3loss = new MeanSquaredError().Evaluate(DNI3Result, new NdArray(layer3ForwardResult[0].Grad, DNI3Result[0].Shape, DNI3Result[0].BatchCount));

                    Layer4.Update();

                    DNI3.Backward(DNI3Result);
                    DNI3.Update();

                    DNI3totalLoss += DNI3loss;
                    DNI3totalLossCount++;

                    Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT);
                    //結果出力
                    Console.WriteLine("total loss " + totalLoss / totalLossCount);
                    Console.WriteLine("local loss " + sumLoss);

                    Console.WriteLine("\nDNI1 total loss " + DNI1totalLoss / DNI1totalLossCount);
                    Console.WriteLine("DNI2 total loss " + DNI2totalLoss / DNI2totalLossCount);
                    Console.WriteLine("DNI3 total loss " + DNI3totalLoss / DNI3totalLossCount);

                    Console.WriteLine("\nDNI1 local loss " + DNI1loss);
                    Console.WriteLine("DNI2 local loss " + DNI2loss);
                    Console.WriteLine("DNI3 local loss " + DNI3loss);

                    //20回バッチを動かしたら精度をテストする
                    if (i % 20 == 0)
                    {
                        Console.WriteLine("\nTesting...");

                        //テストデータからランダムにデータを取得
                        TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT);

                        //テストを実行
                        Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label);
                        Console.WriteLine("accuracy " + accuracy);
                    }
                }
            }
        }
Esempio n. 7
0
        public static void Run()
        {
            Console.WriteLine("Build Vocabulary.");

            Vocabulary vocabulary = new Vocabulary();

            string trainPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TRAIN_FILE, TRAIN_FILE);
            string validPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + VALID_FILE, VALID_FILE);
            string testPath  = InternetFileDownloader.Donwload(DOWNLOAD_URL + TEST_FILE, TEST_FILE);

            int[] trainData = vocabulary.LoadData(trainPath);
            int[] validData = vocabulary.LoadData(validPath);
            int[] testData  = vocabulary.LoadData(testPath);

            int nVocab = vocabulary.Length;

            Console.WriteLine("Network Initilizing.");
            FunctionStack model = new FunctionStack(
                new EmbedID(nVocab, N_UNITS, name: "l1 EmbedID"),
                new Dropout(),
                new LSTM(N_UNITS, N_UNITS, name: "l2 LSTM"),
                new Dropout(),
                new LSTM(N_UNITS, N_UNITS, name: "l3 LSTM"),
                new Dropout(),
                new Linear(N_UNITS, nVocab, name: "l4 Linear")
                );

            //与えられたthresholdで頭打ちではなく、全パラメータのL2Normからレートを取り補正を行う
            GradientClipping gradientClipping = new GradientClipping(threshold: GRAD_CLIP);
            SGD sgd = new SGD(learningRate: 1);

            model.SetOptimizer(gradientClipping, sgd);

            Real wholeLen = trainData.Length;
            int  jump     = (int)Math.Floor(wholeLen / BATCH_SIZE);
            int  epoch    = 0;

            Stack <NdArray[]> backNdArrays = new Stack <NdArray[]>();

            Console.WriteLine("Train Start.");

            for (int i = 0; i < jump * N_EPOCH; i++)
            {
                NdArray x = new NdArray(new[] { 1 }, BATCH_SIZE);
                NdArray t = new NdArray(new[] { 1 }, BATCH_SIZE);

                for (int j = 0; j < BATCH_SIZE; j++)
                {
                    x.Data[j] = trainData[(int)((jump * j + i) % wholeLen)];
                    t.Data[j] = trainData[(int)((jump * j + i + 1) % wholeLen)];
                }

                NdArray[] result  = model.Forward(x);
                Real      sumLoss = new SoftmaxCrossEntropy().Evaluate(result, t);
                backNdArrays.Push(result);
                Console.WriteLine("[{0}/{1}] Loss: {2}", i + 1, jump, sumLoss);

                //Run truncated BPTT
                if ((i + 1) % BPROP_LEN == 0)
                {
                    for (int j = 0; backNdArrays.Count > 0; j++)
                    {
                        Console.WriteLine("backward" + backNdArrays.Count);
                        model.Backward(backNdArrays.Pop());
                    }

                    model.Update();
                    model.ResetState();
                }

                if ((i + 1) % jump == 0)
                {
                    epoch++;
                    Console.WriteLine("evaluate");
                    Console.WriteLine("validation perplexity: {0}", Evaluate(model, validData));

                    if (epoch >= 6)
                    {
                        sgd.LearningRate /= 1.2;
                        Console.WriteLine("learning rate =" + sgd.LearningRate);
                    }
                }
            }

            Console.WriteLine("test start");
            Console.WriteLine("test perplexity:" + Evaluate(model, testData));
        }
Esempio n. 8
0
        private static void FacePresence()
        {
            var batchSize = 1000;
            int width     = 32;
            int height    = 32;

            BuilderInstance <float> .Volume = new VolumeBuilder(); // For GPU

            var imageLoader       = new ImageLoader();
            var randomImageLoader = new ImageLoader(true, 2);

            // Load Dataset - Faces
            var faces1 = LfwCropLoader.LoadDataset(@"..\..\..\Dataset\lfwcrop_grey", width, height);
            var faces2 = imageLoader.LoadDataset(@"..\..\..\Dataset\custom\faces", width, height); // dump you own face images here

            // Load Dataset - Non-faces
            var nonFaces1 = randomImageLoader.LoadDataset(@"..\..\..\Dataset\scene_categories", width, height);
            var nonFaces2 = randomImageLoader.LoadDataset(@"..\..\..\Dataset\TextureDatabase", width, height);
            var nonFaces3 = randomImageLoader.LoadDataset(@"..\..\..\Dataset\cars_brad_bg", width, height);
            var nonFaces4 = randomImageLoader.LoadDataset(@"..\..\..\Dataset\houses", width, height);
            var nonFaces5 = imageLoader.LoadDataset(@"..\..\..\Dataset\custom\non_faces", width, height); // dump you own non-face images here

            var facesDataset = new FaceDetectionDataset(width, height);

            facesDataset.TrainSet.AddRange(faces1);
            facesDataset.TrainSet.AddRange(faces2);
            facesDataset.TrainSet.AddRange(nonFaces1);
            facesDataset.TrainSet.AddRange(nonFaces2);
            facesDataset.TrainSet.AddRange(nonFaces3);
            facesDataset.TrainSet.AddRange(nonFaces4);
            facesDataset.TrainSet.AddRange(nonFaces5);

            Console.WriteLine(" Done.");
            ConvNetSharp <float> cns;

            // Model
            Op <float> softmax = null;

            if (File.Exists("FaceDetection.json"))
            {
                Console.WriteLine("Loading model from disk...");
                softmax = SerializationExtensions.Load <float>("FaceDetection", false)[0]; // first element is the model (second element is the cost if it was saved along)
                cns     = softmax.Graph;                                                   // Deserialization creates its own graph that we have to use. TODO: make it simplier in ConvNetSharp
            }
            else
            {
                cns = new ConvNetSharp <float>();
            }

            var x        = cns.PlaceHolder("x");
            var dropProb = cns.PlaceHolder("dropProb");

            if (softmax == null)
            {
                // Inspired by https://github.com/PCJohn/FaceDetect
                var layer1 = cns.Relu(cns.Conv(x, 5, 5, 4, 2) + cns.Variable(new Shape(1, 1, 4, 1), "bias1", true));
                var layer2 = cns.Relu(cns.Conv(layer1, 3, 3, 16, 2) + cns.Variable(new Shape(1, 1, 16, 1), "bias2", true));
                var layer3 = cns.Relu(cns.Conv(layer2, 3, 3, 32) + cns.Variable(new Shape(1, 1, 32, 1), "bias3", true));

                var flatten = cns.Flatten(layer3);
                var dense1  = cns.Dropout(cns.Relu(cns.Dense(flatten, 600)) + cns.Variable(new Shape(1, 1, 600, 1), "bias4", true), dropProb);
                var dense2  = cns.Dense(dense1, 2) + cns.Variable(new Shape(1, 1, 2, 1), "bias5", true);
                softmax = cns.Softmax(dense2);
            }

            var y = cns.PlaceHolder("y");

            // Cost
            var cost = new SoftmaxCrossEntropy <float>(cns, softmax, y);

            // Optimizer
            var optimizer = new AdamOptimizer <float>(cns, 1e-4f, 0.9f, 0.999f, 1e-16f);

            //if (File.Exists("loss.csv"))
            //{
            //    File.Delete("loss.csv");
            //}

            Volume <float> trainingProb = 0.5f;
            Volume <float> testingProb  = 0.0f;

            // Training
            using (var session = new Session <float>())
            {
                session.Differentiate(cost); // computes dCost/dW at every node of the graph

                var    iteration = 0;
                double currentCost;
                do
                {
                    var batch  = facesDataset.GetBatch(batchSize);
                    var input  = batch.Item1;
                    var output = batch.Item2;

                    var dico = new Dictionary <string, Volume <float> > {
                        { "x", input }, { "y", output }, { "dropProb", trainingProb }
                    };


                    var stopwatch = Stopwatch.StartNew();
                    // session.Run(softmax, dico);
                    Debug.WriteLine(stopwatch.ElapsedMilliseconds);

                    currentCost = session.Run(cost, dico);
                    Console.WriteLine($"cost: {currentCost}");
                    File.AppendAllLines("loss.csv", new[] { currentCost.ToString(CultureInfo.InvariantCulture) });

                    session.Run(optimizer, dico);

                    if (iteration++ % 100 == 0)
                    {
                        // Test on a on random picture
                        var test = facesDataset.GetBatch(100);
                        dico = new Dictionary <string, Volume <float> > {
                            { "x", test.Item1 }, { "dropProb", testingProb }
                        };
                        var result = session.Run(softmax, dico);

                        int correct = 0;
                        for (int i = 0; i < 100; i++)
                        {
                            var class0Prob = result.Get(0, 0, 0, i);
                            var class1Prob = result.Get(0, 0, 1, i);

                            if ((test.Item3[i].IsFace && class1Prob > class0Prob) || (!test.Item3[i].IsFace && class0Prob > class1Prob))
                            {
                                correct++;
                            }
                        }

                        Console.WriteLine($"Test: {correct}%");
                        File.AppendAllLines("accuracy.csv", new[] { correct.ToString() });
                        var filename = test.Item3[0].Filename;

                        softmax.Save("FaceDetection");
                    }
                } while (currentCost > 1e-5 && !Console.KeyAvailable);

                softmax.Save("FaceDetection");
            }
        }
Esempio n. 9
0
        public static void Run()
        {
            _outputStream = File.Create(LogPath);

            _logWriter = new HistogramLogWriter(_outputStream);
            _logWriter.Write(DateTime.Now);

            var recorder = HistogramFactory
                           .With64BitBucketSize()
                           ?.WithValuesFrom(1)
                           ?.WithValuesUpTo(2345678912345)
                           ?.WithPrecisionOf(3)
                           ?.WithThreadSafeWrites()
                           ?.WithThreadSafeReads()
                           ?.Create();

            var accumulatingHistogram = new LongHistogram(2345678912345, 3);

            var size = accumulatingHistogram.GetEstimatedFootprintInBytes();

            RILogManager.Default?.SendDebug("Histogram size = {0} bytes ({1:F2} MB)", size, size / 1024.0 / 1024.0);


            RILogManager.Default?.SendDebug("Recorded latencies [in system clock ticks]");
            accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.None, useCsvFormat: true);
            Console.WriteLine();

            RILogManager.Default?.SendDebug("Recorded latencies [in usec]");
            accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToMicroseconds, useCsvFormat: true);
            Console.WriteLine();

            RILogManager.Default?.SendDebug("Recorded latencies [in msec]");
            accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToMilliseconds, useCsvFormat: true);
            Console.WriteLine();

            RILogManager.Default?.SendDebug("Recorded latencies [in sec]");
            accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToSeconds, useCsvFormat: true);

            DocumentResults(accumulatingHistogram, recorder);

            RILogManager.Default?.SendDebug("Build Vocabulary.");

            DocumentResults(accumulatingHistogram, recorder);

            Vocabulary vocabulary = new Vocabulary();

            DocumentResults(accumulatingHistogram, recorder);

            string trainPath = InternetFileDownloader.Download(DOWNLOAD_URL + TRAIN_FILE, TRAIN_FILE);

            DocumentResults(accumulatingHistogram, recorder);

            string validPath = InternetFileDownloader.Download(DOWNLOAD_URL + VALID_FILE, VALID_FILE);

            DocumentResults(accumulatingHistogram, recorder);

            string testPath = InternetFileDownloader.Download(DOWNLOAD_URL + TEST_FILE, TEST_FILE);

            DocumentResults(accumulatingHistogram, recorder);


            int[] trainData = vocabulary.LoadData(trainPath);
            DocumentResults(accumulatingHistogram, recorder);

            int[] validData = vocabulary.LoadData(validPath);
            DocumentResults(accumulatingHistogram, recorder);

            int[] testData = vocabulary.LoadData(testPath);
            DocumentResults(accumulatingHistogram, recorder);

            int nVocab = vocabulary.Length;

            RILogManager.Default?.SendDebug("Network Initializing.");
            FunctionStack model = new FunctionStack("Test10",
                                                    new EmbedID(nVocab, N_UNITS, name: "l1 EmbedID"),
                                                    new Dropout(),
                                                    new LSTM(true, N_UNITS, N_UNITS, name: "l2 LSTM"),
                                                    new Dropout(),
                                                    new LSTM(true, N_UNITS, N_UNITS, name: "l3 LSTM"),
                                                    new Dropout(),
                                                    new Linear(true, N_UNITS, nVocab, name: "l4 Linear")
                                                    );

            DocumentResults(accumulatingHistogram, recorder);

            // Do not cease at the given threshold, correct the rate by taking the rate from L2Norm of all parameters
            GradientClipping gradientClipping = new GradientClipping(threshold: GRAD_CLIP);
            SGD sgd = new SGD(learningRate: 1);

            model.SetOptimizer(gradientClipping, sgd);
            DocumentResults(accumulatingHistogram, recorder);

            Real wholeLen = trainData.Length;
            int  jump     = (int)Math.Floor(wholeLen / BATCH_SIZE);
            int  epoch    = 0;

            Stack <NdArray[]> backNdArrays = new Stack <NdArray[]>();

            RILogManager.Default?.SendDebug("Train Start.");
            double  dVal;
            NdArray x = new NdArray(new[] { 1 }, BATCH_SIZE, (Function)null);
            NdArray t = new NdArray(new[] { 1 }, BATCH_SIZE, (Function)null);

            for (int i = 0; i < jump * N_EPOCH; i++)
            {
                for (int j = 0; j < BATCH_SIZE; j++)
                {
                    x.Data[j] = trainData[(int)((jump * j + i) % wholeLen)];
                    t.Data[j] = trainData[(int)((jump * j + i + 1) % wholeLen)];
                }

                NdArray[] result  = model.Forward(true, x);
                Real      sumLoss = new SoftmaxCrossEntropy().Evaluate(result, t);
                backNdArrays.Push(result);
                RILogManager.Default?.SendDebug("[{0}/{1}] Loss: {2}", i + 1, jump, sumLoss);

                //Run truncated BPTT
                if ((i + 1) % BPROP_LEN == 0)
                {
                    for (int j = 0; backNdArrays.Count > 0; j++)
                    {
                        RILogManager.Default?.SendDebug("backward" + backNdArrays.Count);
                        model.Backward(true, backNdArrays.Pop());
                    }

                    model.Update();
                    model.ResetState();
                }

                if ((i + 1) % jump == 0)
                {
                    epoch++;
                    RILogManager.Default?.SendDebug("evaluate");
                    dVal = Evaluate(model, validData);
                    RILogManager.Default?.SendDebug($"validation perplexity: {dVal}");

                    if (epoch >= 6)
                    {
                        sgd.LearningRate /= 1.2;
                        RILogManager.Default?.SendDebug("learning rate =" + sgd.LearningRate);
                    }
                }
                DocumentResults(accumulatingHistogram, recorder);
            }

            RILogManager.Default?.SendDebug("test start");
            dVal = Evaluate(model, testData);
            RILogManager.Default?.SendDebug("test perplexity:" + dVal);
            DocumentResults(accumulatingHistogram, recorder);

            _logWriter.Dispose();
            _outputStream.Dispose();


            RILogManager.Default?.SendDebug("Log contents");
            RILogManager.Default?.SendDebug(File.ReadAllText(LogPath));
            Console.WriteLine();
            RILogManager.Default?.SendDebug("Percentile distribution (values reported in milliseconds)");
            accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToMilliseconds, useCsvFormat: true);

            RILogManager.Default?.SendDebug("Mean: " + BytesToString(accumulatingHistogram.GetMean()) + ", StdDev: " +
                                            BytesToString(accumulatingHistogram.GetStdDeviation()));
        }
Esempio n. 10
0
        const Real L2_SCALE = 1e-4f;             //l2 loss scale

        public static void Run()
        {
            //MNISTのデータを用意する
            Console.WriteLine("MNIST data loading...");
            MnistData <Real> mnistData = new MnistData <Real>();

            //テストデータから全データを取得
            TestDataSet <Real> datasetY = mnistData.Eval.GetAllDataSet();

            Console.WriteLine("\nNetwork initializing...");

            int numBatches    = mnistData.Train.Length / BATCH_SIZE; // 600 = 60000 / 100
            int batchPerEpoch = mnistData.Train.Length / BATCH_SIZE;

            int[] boundaries = { LR_DROP_EPOCH *batchPerEpoch, (LR_DROP_EPOCH + 20) * batchPerEpoch };

            Dictionary <string, Real> customSparsities = new Dictionary <string, Real>
            {
                { "layer2", END_SPARSITY *SPARSITY_SCALE },
                { "layer3", END_SPARSITY * 0 }
            };

            MaskedLinear <Real> layer1 = new MaskedLinear <Real>(28 * 28, 300, name: "layer1", gpuEnable: true);
            MaskedLinear <Real> layer2 = new MaskedLinear <Real>(300, 100, name: "layer2", gpuEnable: true);
            MaskedLinear <Real> layer3 = new MaskedLinear <Real>(100, 10, name: "layer3", gpuEnable: true);

            //ネットワークの構成を FunctionStack に書き連ねる
            FunctionStack <Real> nn = new FunctionStack <Real>(
                layer1,
                new ReLU <Real>(name: "l1 ReLU"),
                layer2,
                new ReLU <Real>(name: "l2 ReLU"),
                layer3
                );

            SoftmaxCrossEntropy <Real> sce = new SoftmaxCrossEntropy <Real>();

            WeightDecay <Real> weightDecay = new WeightDecay <Real>(L2_SCALE);

            weightDecay.AddParameters(layer1.Weight, layer2.Weight, layer3.Weight);

            MomentumSGD <Real> mSGD = new MomentumSGD <Real>(LEARNING_RATE);

            mSGD.SetUp(nn);

            var opt = new SparseRigLOptimizer(mSGD, MASKUPDATE_BEGIN_STEP, MASKUPDATE_END_STEP, MASKUPDATE_FREQUENCY, DROP_FRACTION, "cosine", "zeros", RIGL_ACC_SCALE);

            NdArray <Real>[] allMasks =
            {
                layer1.Mask,
                layer2.Mask,
                layer3.Mask,
            };

            string[] LayerNames =
            {
                layer1.Name,
                layer2.Name,
                layer3.Name,
            };

            NdArray <Real>[] allWights =
            {
                layer1.Weight,
                layer2.Weight,
                layer3.Weight,
            };

            //マスクの初期化
            SparseUtils.MaskInit(allMasks, LayerNames, "erdos_renyi", END_SPARSITY, customSparsities);

            Console.WriteLine("[Global sparsity] " + SparseUtils.CalculateSparsity(allMasks));
            var weightSparsity = GetWeightSparsity(allMasks);

            Console.WriteLine("[Sparsity] Layer0, Layer1 : " + weightSparsity[0] + ", " + weightSparsity[1]);

            Console.WriteLine("\nTraining Start...");

            //学習開始
            for (int i = 0; i < NUM_EPOCHS * numBatches; i++)
            {
                //訓練データからランダムにデータを取得
                TestDataSet <Real> datasetX = mnistData.Train.GetRandomDataSet(BATCH_SIZE);

                //バッチ学習を実行する
                NdArray <Real> y    = nn.Forward(datasetX.Data)[0];
                Real           loss = sce.Evaluate(y, datasetX.Label);
                nn.Backward(y);

                weightDecay.Update();
                opt._optimizer.LearningRate = PiecewiseConstant(opt._optimizer.UpdateCount, boundaries, LEARNING_RATE);

                opt.condMaskUpdate(allMasks, allWights);

                ////10回毎に結果出力
                //if (i % 10 + 1 == 10)
                //{
                //    Console.WriteLine("\nbatch count:" + (i + 1) + " (lr:" + opt._optimizer.LearningRate + ")");
                //    Console.WriteLine("loss " + loss);
                //}

                //精度をテストする
                if (i % numBatches + 1 == numBatches)
                {
                    Console.WriteLine("\nEpoch:" + Math.Floor((i + 1) / (Real)numBatches) + " Iteration:" + (i + 1) + " Testing... ");

                    //テストを実行
                    Real accuracy = Trainer.Accuracy(nn, datasetY, new SoftmaxCrossEntropy <Real>(), out loss);

                    Console.WriteLine("loss: " + loss);
                    Console.WriteLine("accuracy: " + accuracy);
                }
            }
        }
Esempio n. 11
0
        static void Main(string[] args)
        {
            Console.WriteLine("MNIST Test");

            int seed;

            using (var rng = new RNGCryptoServiceProvider())
            {
                var buffer = new byte[sizeof(int)];

                rng.GetBytes(buffer);
                seed = BitConverter.ToInt32(buffer, 0);
            }

            RandomProvider.SetSeed(seed);

            var   assembly            = Assembly.GetExecutingAssembly();
            var   filename            = "CNN.xml";
            var   serializer          = new DataContractSerializer(typeof(IEnumerable <Layer>), new Type[] { typeof(Convolution), typeof(BatchNormalization), typeof(Activation), typeof(ReLU), typeof(MaxPooling), typeof(FullyConnected), typeof(Dropout), typeof(Softmax) });
            var   trainingList        = new List <ValueTuple <double[], double[]> >();
            var   testList            = new List <ValueTuple <double[], double[]> >();
            var   accuracyList        = new List <double>();
            var   lossList            = new List <double>();
            var   logPath             = "Log.csv";
            var   channels            = 1;
            var   imageWidth          = 28;
            var   imageHeight         = 28;
            var   filters             = 30;
            var   filterWidth         = 5;
            var   filterHeight        = 5;
            var   poolWidth           = 2;
            var   poolHeight          = 2;
            var   activationMapWidth  = Convolution.GetActivationMapLength(imageWidth, filterWidth);
            var   activationMapHeight = Convolution.GetActivationMapLength(imageHeight, filterHeight);
            var   outputWidth         = MaxPooling.GetOutputLength(activationMapWidth, poolWidth);
            var   outputHeight        = MaxPooling.GetOutputLength(activationMapHeight, poolHeight);
            Model model;

            using (Stream
                   imagesStream = assembly.GetManifestResourceStream("MNISTTest.train-images.idx3-ubyte"),
                   labelsStream = assembly.GetManifestResourceStream("MNISTTest.train-labels.idx1-ubyte"))
            {
                foreach (var image in MnistImage.Load(imagesStream, labelsStream).Take(1000))
                {
                    var t = new double[10];

                    for (int i = 0; i < 10; i++)
                    {
                        if (i == image.Label)
                        {
                            t[i] = 1.0;
                        }
                        else
                        {
                            t[i] = 0.0;
                        }
                    }

                    trainingList.Add(ValueTuple.Create <double[], double[]>(image.Normalize(), t));
                }
            }

            using (Stream
                   imagesStream = assembly.GetManifestResourceStream("MNISTTest.t10k-images.idx3-ubyte"),
                   labelsStream = assembly.GetManifestResourceStream("MNISTTest.t10k-labels.idx1-ubyte"))
            {
                foreach (var image in MnistImage.Load(imagesStream, labelsStream).Take(1000))
                {
                    var t = new double[10];

                    for (int i = 0; i < 10; i++)
                    {
                        if (i == image.Label)
                        {
                            t[i] = 1.0;
                        }
                        else
                        {
                            t[i] = 0.0;
                        }
                    }

                    testList.Add(ValueTuple.Create <double[], double[]>(image.Normalize(), t));
                }
            }

            if (File.Exists(filename))
            {
                using (XmlReader xmlReader = XmlReader.Create(filename))
                {
                    model = new Model((IEnumerable <Layer>)serializer.ReadObject(xmlReader));
                }
            }
            else
            {
                int           epochs       = 50;
                int           iterations   = 1;
                ILossFunction lossFunction = new SoftmaxCrossEntropy();

                model = new Model(
                    new Convolution(channels, imageWidth, imageHeight, filters, filterWidth, filterHeight, (fanIn, fanOut) => Initializers.HeNormal(fanIn),
                                    new Activation(new ReLU(),
                                                   new MaxPooling(filters, activationMapWidth, activationMapHeight, poolWidth, poolHeight,
                                                                  new FullyConnected(filters * outputWidth * outputHeight, (fanIn, fanOut) => Initializers.HeNormal(fanIn),
                                                                                     new Activation(new ReLU(),
                                                                                                    new Dropout(0.5,
                                                                                                                new FullyConnected(100, (fanIn, fanOut) => Initializers.GlorotNormal(fanIn, fanOut),
                                                                                                                                   new Dropout(10, 0.5)))))))));
                //model.WeightDecayRate = 0.1;
                model.Stepped += (sender, e) =>
                {
                    double tptn = 0.0;

                    trainingList.ForEach(x =>
                    {
                        if (ArgMax(model.Predict(x.Item1)) == ArgMax(x.Item2))
                        {
                            tptn += 1.0;
                        }
                    });

                    var accuracy = tptn / trainingList.Count;
                    var loss     = model.GetLoss(trainingList, lossFunction);

                    accuracyList.Add(accuracy);
                    lossList.Add(loss);

                    Console.WriteLine("Epoch {0}/{1}", iterations, epochs);
                    Console.WriteLine("Accuracy: {0}, Loss: {1}", accuracy, loss);

                    iterations++;
                };

                Console.WriteLine("Training...");

                var stopwatch = Stopwatch.StartNew();

                model.Fit(trainingList, epochs, 100, new Adam(), lossFunction);

                stopwatch.Stop();

                Console.WriteLine("Done ({0}).", stopwatch.Elapsed.ToString());
            }

            double testTptn = 0.0;

            testList.ForEach(x =>
            {
                var vector = model.Predict(x.Item1);
                var i      = ArgMax(vector);
                var j      = ArgMax(x.Item2);

                if (i == j && Math.Round(vector[i]) == x.Item2[j])
                {
                    testTptn += 1.0;
                }
            });

            Console.WriteLine("Accuracy: {0}", testTptn / testList.Count);

            if (accuracyList.Count > 0)
            {
                var logDictionary = new Dictionary <string, IEnumerable <double> >();

                logDictionary.Add("Accuracy", accuracyList);
                logDictionary.Add("Loss", lossList);

                ToCsv(logPath, logDictionary);

                Console.WriteLine("Saved log to {0}...", logPath);
            }

            XmlWriterSettings settings = new XmlWriterSettings();

            settings.Indent   = true;
            settings.Encoding = new System.Text.UTF8Encoding(false);

            using (XmlWriter xmlWriter = XmlWriter.Create(filename, settings))
            {
                serializer.WriteObject(xmlWriter, model.Layers);
                xmlWriter.Flush();
            }
        }
Esempio n. 12
0
        public static void Run()
        {
            //Prepare MNIST data
            Console.WriteLine("MNIST Data Loading...");
            MnistData mnistData = new MnistData();

            Console.WriteLine("Training Start...");

            //Writing the network configuration in FunctionStack
            FunctionStack Layer1 = new FunctionStack(
                new Linear(28 * 28, 256, name: "l1 Linear"),
                new BatchNormalization(256, name: "l1 Norm"),
                new ReLU(name: "l1 ReLU")
                );

            FunctionStack Layer2 = new FunctionStack(
                new Linear(256, 256, name: "l2 Linear"),
                new BatchNormalization(256, name: "l2 Norm"),
                new ReLU(name: "l2 ReLU")
                );

            FunctionStack Layer3 = new FunctionStack(
                new Linear(256, 256, name: "l3 Linear"),
                new BatchNormalization(256, name: "l3 Norm"),
                new ReLU(name: "l3 ReLU")
                );

            FunctionStack Layer4 = new FunctionStack(
                new Linear(256, 10, name: "l4 Linear")
                );

            //FunctionStack itself is also stacked as Function
            FunctionStack nn = new FunctionStack
                               (
                Layer1,
                Layer2,
                Layer3,
                Layer4
                               );

            FunctionStack cDNI1 = new FunctionStack(
                new Linear(256 + 10, 1024, name: "cDNI1 Linear1"),
                new BatchNormalization(1024, name: "cDNI1 Nrom1"),
                new ReLU(name: "cDNI1 ReLU1"),
                new Linear(1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3")
                );

            FunctionStack cDNI2 = new FunctionStack(
                new Linear(256 + 10, 1024, name: "cDNI2 Linear1"),
                new BatchNormalization(1024, name: "cDNI2 Nrom1"),
                new ReLU(name: "cDNI2 ReLU1"),
                new Linear(1024, 256, initialW: new Real[1024, 256], name: "cDNI2 Linear3")
                );

            FunctionStack cDNI3 = new FunctionStack(
                new Linear(256 + 10, 1024, name: "cDNI3 Linear1"),
                new BatchNormalization(1024, name: "cDNI3 Nrom1"),
                new ReLU(name: "cDNI3 ReLU1"),
                new Linear(1024, 256, initialW: new Real[1024, 256], name: "cDNI3 Linear3")
                );

            //Declare optimizer
            Layer1.SetOptimizer(new Adam(0.00003f));
            Layer2.SetOptimizer(new Adam(0.00003f));
            Layer3.SetOptimizer(new Adam(0.00003f));
            Layer4.SetOptimizer(new Adam(0.00003f));

            cDNI1.SetOptimizer(new Adam(0.00003f));
            cDNI2.SetOptimizer(new Adam(0.00003f));
            cDNI3.SetOptimizer(new Adam(0.00003f));

            for (int epoch = 0; epoch < 10; epoch++)
            {
                Console.WriteLine("epoch " + (epoch + 1));

                //Total error in the whole
                Real totalLoss      = 0;
                Real cDNI1totalLoss = 0;
                Real cDNI2totalLoss = 0;
                Real cDNI3totalLoss = 0;

                long totalLossCount      = 0;
                long cDNI1totalLossCount = 0;
                long cDNI2totalLossCount = 0;
                long cDNI3totalLossCount = 0;


                //How many times to run the batch
                for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++)
                {
                    //Get data randomly from training data
                    TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT);

                    //Run first tier
                    NdArray[]     layer1ForwardResult = Layer1.Forward(datasetX.Data);
                    ResultDataSet layer1ResultDataSet = new ResultDataSet(layer1ForwardResult, datasetX.Label);

                    //Get the inclination of the first layer
                    NdArray[] cDNI1Result = cDNI1.Forward(layer1ResultDataSet.GetTrainData());

                    //Apply the inclination of the first layer
                    layer1ForwardResult[0].Grad = cDNI1Result[0].Data.ToArray();

                    //Update first layer
                    Layer1.Backward(layer1ForwardResult);
                    layer1ForwardResult[0].ParentFunc = null;
                    Layer1.Update();

                    //Run Layer 2
                    NdArray[]     layer2ForwardResult = Layer2.Forward(layer1ResultDataSet.Result);
                    ResultDataSet layer2ResultDataSet = new ResultDataSet(layer2ForwardResult, layer1ResultDataSet.Label);

                    //Get inclination of second layer
                    NdArray[] cDNI2Result = cDNI2.Forward(layer2ResultDataSet.GetTrainData());

                    //Apply the inclination of the second layer
                    layer2ForwardResult[0].Grad = cDNI2Result[0].Data.ToArray();

                    //Update 2nd tier
                    Layer2.Backward(layer2ForwardResult);
                    layer2ForwardResult[0].ParentFunc = null;


                    //Perform learning of first layer cDNI
                    Real cDNI1loss = new MeanSquaredError().Evaluate(cDNI1Result, new NdArray(layer1ResultDataSet.Result[0].Grad, cDNI1Result[0].Shape, cDNI1Result[0].BatchCount));

                    Layer2.Update();

                    cDNI1.Backward(cDNI1Result);
                    cDNI1.Update();

                    cDNI1totalLoss += cDNI1loss;
                    cDNI1totalLossCount++;

                    //Run Third Tier
                    NdArray[]     layer3ForwardResult = Layer3.Forward(layer2ResultDataSet.Result);
                    ResultDataSet layer3ResultDataSet = new ResultDataSet(layer3ForwardResult, layer2ResultDataSet.Label);

                    //Get the inclination of the third layer
                    NdArray[] cDNI3Result = cDNI3.Forward(layer3ResultDataSet.GetTrainData());

                    //Apply the inclination of the third layer
                    layer3ForwardResult[0].Grad = cDNI3Result[0].Data.ToArray();

                    //Update third layer
                    Layer3.Backward(layer3ForwardResult);
                    layer3ForwardResult[0].ParentFunc = null;

                    //Perform learning of cDNI for layer 2
                    Real cDNI2loss = new MeanSquaredError().Evaluate(cDNI2Result, new NdArray(layer2ResultDataSet.Result[0].Grad, cDNI2Result[0].Shape, cDNI2Result[0].BatchCount));

                    Layer3.Update();

                    cDNI2.Backward(cDNI2Result);
                    cDNI2.Update();

                    cDNI2totalLoss += cDNI2loss;
                    cDNI2totalLossCount++;

                    //Run Layer 4
                    NdArray[] layer4ForwardResult = Layer4.Forward(layer3ResultDataSet.Result);

                    //Get inclination of the fourth layer
                    Real sumLoss = new SoftmaxCrossEntropy().Evaluate(layer4ForwardResult, layer3ResultDataSet.Label);

                    //Update fourth layer
                    Layer4.Backward(layer4ForwardResult);
                    layer4ForwardResult[0].ParentFunc = null;

                    totalLoss += sumLoss;
                    totalLossCount++;

                    //Perform learning of cDNI for the third layer
                    Real cDNI3loss = new MeanSquaredError().Evaluate(cDNI3Result, new NdArray(layer3ResultDataSet.Result[0].Grad, cDNI3Result[0].Shape, cDNI3Result[0].BatchCount));

                    Layer4.Update();

                    cDNI3.Backward(cDNI3Result);
                    cDNI3.Update();

                    cDNI3totalLoss += cDNI3loss;
                    cDNI3totalLossCount++;

                    Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT);
                    //Result output
                    Console.WriteLine("total loss " + totalLoss / totalLossCount);
                    Console.WriteLine("local loss " + sumLoss);

                    Console.WriteLine("\ncDNI1 total loss " + cDNI1totalLoss / cDNI1totalLossCount);
                    Console.WriteLine("cDNI2 total loss " + cDNI2totalLoss / cDNI2totalLossCount);
                    Console.WriteLine("cDNI3 total loss " + cDNI3totalLoss / cDNI3totalLossCount);

                    Console.WriteLine("\ncDNI1 local loss " + cDNI1loss);
                    Console.WriteLine("cDNI2 local loss " + cDNI2loss);
                    Console.WriteLine("cDNI3 local loss " + cDNI3loss);

                    //Test the accuracy if you move the batch 20 times
                    if (i % 20 == 0)
                    {
                        Console.WriteLine("\nTesting...");

                        //Get data randomly from test data
                        TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT);

                        //Run test
                        Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label);
                        Console.WriteLine("accuracy " + accuracy);
                    }
                }
            }
        }
Esempio n. 13
0
        public static void Run()
        {
            // Prepare MNIST data
            RILogManager.Default?.SendDebug("MNIST Data Loading...");
            MnistData mnistData = new MnistData(28);

            RILogManager.Default?.SendDebug("Training Start...");

            // Write the network configuration in FunctionStack
            FunctionStack Layer1 = new FunctionStack("Test12 Layer 1",
                                                     new Linear(true, 28 * 28, 256, name: "l1 Linear"),
                                                     new BatchNormalization(true, 256, name: "l1 Norm"),
                                                     new ReLU(name: "l1 ReLU")
                                                     );

            FunctionStack Layer2 = new FunctionStack("Test12 Layer 2",
                                                     new Linear(true, 256, 256, name: "l2 Linear"),
                                                     new BatchNormalization(true, 256, name: "l2 Norm"),
                                                     new ReLU(name: "l2 ReLU")
                                                     );

            FunctionStack Layer3 = new FunctionStack("Test12 Layer 3",
                                                     new Linear(true, 256, 256, name: "l3 Linear"),
                                                     new BatchNormalization(true, 256, name: "l3 Norm"),
                                                     new ReLU(name: "l3 ReLU")
                                                     );

            FunctionStack Layer4 = new FunctionStack("Test12 Layer 4",
                                                     new Linear(true, 256, 10, name: "l4 Linear")
                                                     );

            // Function stack itself is also stacked as Function
            FunctionStack nn = new FunctionStack
                                   ("Test12",
                                   Layer1,
                                   Layer2,
                                   Layer3,
                                   Layer4
                                   );

            FunctionStack cDNI1 = new FunctionStack("Test12 DNI 1",
                                                    new Linear(true, 256 + 10, 1024, name: "cDNI1 Linear1"),
                                                    new BatchNormalization(true, 1024, name: "cDNI1 Norm1"),
                                                    new ReLU(name: "cDNI1 ReLU1"),
                                                    new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3")
                                                    );

            FunctionStack cDNI2 = new FunctionStack("Test12 DNI 2",
                                                    new Linear(true, 256 + 10, 1024, name: "cDNI2 Linear1"),
                                                    new BatchNormalization(true, 1024, name: "cDNI2 Norm1"),
                                                    new ReLU(name: "cDNI2 ReLU1"),
                                                    new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "cDNI2 Linear3")
                                                    );

            FunctionStack cDNI3 = new FunctionStack("Test12 DNI 3",
                                                    new Linear(true, 256 + 10, 1024, name: "cDNI3 Linear1"),
                                                    new BatchNormalization(true, 1024, name: "cDNI3 Norm1"),
                                                    new ReLU(name: "cDNI3 ReLU1"),
                                                    new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "cDNI3 Linear3")
                                                    );

            Layer1.SetOptimizer(new Adam("Adam", 0.00003f));
            Layer2.SetOptimizer(new Adam("Adam", 0.00003f));
            Layer3.SetOptimizer(new Adam("Adam", 0.00003f));
            Layer4.SetOptimizer(new Adam("Adam", 0.00003f));

            cDNI1.SetOptimizer(new Adam("Adam", 0.00003f));
            cDNI2.SetOptimizer(new Adam("Adam", 0.00003f));
            cDNI3.SetOptimizer(new Adam("Adam", 0.00003f));

            // Describe each function stack;
            RILogManager.Default?.SendDebug(Layer1.Describe());
            RILogManager.Default?.SendDebug(Layer2.Describe());
            RILogManager.Default?.SendDebug(Layer3.Describe());
            RILogManager.Default?.SendDebug(Layer4.Describe());

            RILogManager.Default?.SendDebug(cDNI1.Describe());
            RILogManager.Default?.SendDebug(cDNI2.Describe());
            RILogManager.Default?.SendDebug(cDNI3.Describe());

            for (int epoch = 0; epoch < 10; epoch++)
            {
                // Total error in the whole
                Real totalLoss      = 0;
                Real cDNI1totalLoss = 0;
                Real cDNI2totalLoss = 0;
                Real cDNI3totalLoss = 0;

                long totalLossCount      = 0;
                long cDNI1totalLossCount = 0;
                long cDNI2totalLossCount = 0;
                long cDNI3totalLossCount = 0;


                // how many times to run the batch
                for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++)
                {
                    RILogManager.Default?.SendDebug("epoch: " + (epoch + 1) + " of 10, batch iteration: " + i + " of " + TRAIN_DATA_COUNT);
                    RILogManager.Default?.ViewerSendWatch("Epoch", epoch + 1);
                    RILogManager.Default?.ViewerSendWatch("Batch Iteration", i);

                    // Get data randomly from the training data
                    TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT, 28, 28);

                    // Run first tier
                    NdArray[]     layer1ForwardResult = Layer1.Forward(true, datasetX.Data);
                    ResultDataSet layer1ResultDataSet = new ResultDataSet(layer1ForwardResult, datasetX.Label);

                    // Obtain the slope of the first layer
                    NdArray[] cDNI1Result = cDNI1.Forward(true, layer1ResultDataSet.GetTrainData());

                    // Apply the slope of the first layer
                    layer1ForwardResult[0].Grad = cDNI1Result[0].Data.ToArray();

                    //Update first layer
                    Layer1.Backward(true, layer1ForwardResult);
                    layer1ForwardResult[0].ParentFunc = null;
                    Layer1.Update();

                    // Run Layer 2
                    NdArray[]     layer2ForwardResult = Layer2.Forward(true, layer1ResultDataSet.Result);
                    ResultDataSet layer2ResultDataSet = new ResultDataSet(layer2ForwardResult, layer1ResultDataSet.Label);

                    // Get the inclination of the second layer
                    NdArray[] cDNI2Result = cDNI2.Forward(true, layer2ResultDataSet.GetTrainData());

                    // Apply the slope of the second layer
                    layer2ForwardResult[0].Grad = cDNI2Result[0].Data.ToArray();

                    //Update layer 2
                    Layer2.Backward(true, layer2ForwardResult);
                    layer2ForwardResult[0].ParentFunc = null;


                    //Perform learning of first layer cDNI
                    Real cDNI1loss = new MeanSquaredError().Evaluate(cDNI1Result, new NdArray(layer1ResultDataSet.Result[0].Grad, cDNI1Result[0].Shape, cDNI1Result[0].BatchCount));

                    Layer2.Update();

                    cDNI1.Backward(true, cDNI1Result);
                    cDNI1.Update();

                    cDNI1totalLoss += cDNI1loss;
                    cDNI1totalLossCount++;

                    //Run Third Tier
                    NdArray[]     layer3ForwardResult = Layer3.Forward(true, layer2ResultDataSet.Result);
                    ResultDataSet layer3ResultDataSet = new ResultDataSet(layer3ForwardResult, layer2ResultDataSet.Label);

                    //Get the inclination of the third layer
                    NdArray[] cDNI3Result = cDNI3.Forward(true, layer3ResultDataSet.GetTrainData());

                    //Apply the inclination of the third layer
                    layer3ForwardResult[0].Grad = cDNI3Result[0].Data.ToArray();

                    //Update third layer
                    Layer3.Backward(true, layer3ForwardResult);
                    layer3ForwardResult[0].ParentFunc = null;

                    //Perform learning of cDNI for layer 2
                    Real cDNI2loss = new MeanSquaredError().Evaluate(cDNI2Result, new NdArray(layer2ResultDataSet.Result[0].Grad, cDNI2Result[0].Shape, cDNI2Result[0].BatchCount));

                    Layer3.Update();

                    cDNI2.Backward(true, cDNI2Result);
                    cDNI2.Update();

                    cDNI2totalLoss += cDNI2loss;
                    cDNI2totalLossCount++;

                    NdArray[] layer4ForwardResult = Layer4.Forward(true, layer3ResultDataSet.Result);
                    Real      sumLoss             = new SoftmaxCrossEntropy().Evaluate(layer4ForwardResult, layer3ResultDataSet.Label);
                    Layer4.Backward(true, layer4ForwardResult);
                    layer4ForwardResult[0].ParentFunc = null;

                    totalLoss += sumLoss;
                    totalLossCount++;

                    Real cDNI3loss = new MeanSquaredError().Evaluate(cDNI3Result, new NdArray(layer3ResultDataSet.Result[0].Grad, cDNI3Result[0].Shape, cDNI3Result[0].BatchCount));

                    Layer4.Update();

                    cDNI3.Backward(true, cDNI3Result);
                    cDNI3.Update();

                    cDNI3totalLoss += cDNI3loss;
                    cDNI3totalLossCount++;

                    RILogManager.Default?.SendDebug("\nbatch count " + i + "/" + TRAIN_DATA_COUNT);
                    RILogManager.Default?.SendDebug("total loss " + totalLoss / totalLossCount);
                    RILogManager.Default?.SendDebug("local loss " + sumLoss);

                    RILogManager.Default?.SendDebug("\ncDNI1 total loss " + cDNI1totalLoss / cDNI1totalLossCount);
                    RILogManager.Default?.SendDebug("cDNI2 total loss " + cDNI2totalLoss / cDNI2totalLossCount);
                    RILogManager.Default?.SendDebug("cDNI3 total loss " + cDNI3totalLoss / cDNI3totalLossCount);

                    RILogManager.Default?.SendDebug("\ncDNI1 local loss " + cDNI1loss);
                    RILogManager.Default?.SendDebug("cDNI2 local loss " + cDNI2loss);
                    RILogManager.Default?.SendDebug("cDNI3 local loss " + cDNI3loss);

                    if (i % 20 == 0)
                    {
                        RILogManager.Default?.SendDebug("\nTesting...");
                        TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT, 28);
                        Real        accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label);
                        RILogManager.Default?.SendDebug("accuracy " + accuracy);
                    }
                }
            }
        }
Esempio n. 14
0
        public static void Run()
        {
            Console.WriteLine("Build Vocabulary.");

            Vocabulary vocabulary = new Vocabulary();

            string trainPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TRAIN_FILE, TRAIN_FILE, TRAIN_FILE_HASH);
            string validPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + VALID_FILE, VALID_FILE, VALID_FILE_HASH);
            string testPath  = InternetFileDownloader.Donwload(DOWNLOAD_URL + TEST_FILE, TEST_FILE, TEST_FILE_HASH);

            int[] trainData = vocabulary.LoadData(trainPath);
            int[] validData = vocabulary.LoadData(validPath);
            int[] testData  = vocabulary.LoadData(testPath);

            int nVocab = vocabulary.Length;

            Console.WriteLine("Network Initilizing.");
            FunctionStack <Real> model = new FunctionStack <Real>(
                new EmbedID <Real>(nVocab, N_UNITS, name: "l1 EmbedID"),
                new Dropout <Real>(),
                new LSTM <Real>(N_UNITS, N_UNITS, name: "l2 LSTM"),
                new Dropout <Real>(),
                new LSTM <Real>(N_UNITS, N_UNITS, name: "l3 LSTM"),
                new Dropout <Real>(),
                new Linear <Real>(N_UNITS, nVocab, name: "l4 Linear")
                );

            for (int i = 0; i < model.Functions.Length; i++)
            {
                for (int j = 0; j < model.Functions[i].Parameters.Length; j++)
                {
                    for (int k = 0; k < model.Functions[i].Parameters[j].Data.Length; k++)
                    {
                        model.Functions[i].Parameters[j].Data[k] = ((Real)Mother.Dice.NextDouble() * 2.0f - 1.0f) / 10.0f;
                    }
                }
            }

            //与えられたthresholdで頭打ちではなく、全パラメータのL2Normからレートを取り補正を行う
            GradientClipping <Real> gradientClipping = new GradientClipping <Real>(threshold: GRAD_CLIP);
            SGD <Real> sgd = new SGD <Real>(learningRate: 0.1f);

            gradientClipping.SetUp(model);
            sgd.SetUp(model);

            Real wholeLen = trainData.Length;
            int  jump     = (int)Math.Floor(wholeLen / BATCH_SIZE);
            int  epoch    = 0;

            Console.WriteLine("Train Start.");

            for (int i = 0; i < jump * N_EPOCH; i++)
            {
                NdArray <Real> x = new NdArray <Real>(new[] { 1 }, BATCH_SIZE);
                NdArray <int>  t = new NdArray <int>(new[] { 1 }, BATCH_SIZE);

                for (int j = 0; j < BATCH_SIZE; j++)
                {
                    x.Data[j] = trainData[(int)((jump * j + i) % wholeLen)];
                    t.Data[j] = trainData[(int)((jump * j + i + 1) % wholeLen)];
                }

                NdArray <Real> result  = model.Forward(x)[0];
                Real           sumLoss = new SoftmaxCrossEntropy <Real>().Evaluate(result, t);
                Console.WriteLine("[{0}/{1}] Loss: {2}", i + 1, jump, sumLoss);
                model.Backward(result);

                //Run truncated BPTT
                if ((i + 1) % BPROP_LEN == 0)
                {
                    gradientClipping.Update();
                    sgd.Update();
                    model.ResetState();
                }

                if ((i + 1) % jump == 0)
                {
                    epoch++;
                    Console.WriteLine("evaluate");
                    Console.WriteLine("validation perplexity: {0}", Evaluate(model, validData));

                    if (epoch >= 6)
                    {
                        sgd.LearningRate /= 1.2f;
                        Console.WriteLine("learning rate =" + sgd.LearningRate);
                    }
                }
            }

            Console.WriteLine("test start");
            Console.WriteLine("test perplexity:" + Evaluate(model, testData));
        }
Esempio n. 15
0
        public static void Run()
        {
            //MNISTのデータを用意する
            Console.WriteLine("MNIST Data Loading...");
            MnistData <Real> mnistData = new MnistData <Real>();

            Console.WriteLine("Training Start...");

            //ネットワークの構成を FunctionStack に書き連ねる
            FunctionStack <Real> Layer1 = new FunctionStack <Real>(
                new Linear <Real>(28 * 28, 256, name: "l1 Linear"),
                new BatchNormalization <Real>(256, name: "l1 Norm"),
                new ReLU <Real>(name: "l1 ReLU")
                );

            FunctionStack <Real> Layer2 = new FunctionStack <Real>(
                new Linear <Real>(256, 256, name: "l2 Linear"),
                new BatchNormalization <Real>(256, name: "l2 Norm"),
                new ReLU <Real>(name: "l2 ReLU")
                );

            FunctionStack <Real> Layer3 = new FunctionStack <Real>(
                new Linear <Real>(256, 256, name: "l3 Linear"),
                new BatchNormalization <Real>(256, name: "l3 Norm"),
                new ReLU <Real>(name: "l3 ReLU")
                );

            FunctionStack <Real> Layer4 = new FunctionStack <Real>(
                new Linear <Real>(256, 10, name: "l4 Linear")
                );

            //FunctionStack自身もFunctionとして積み上げられる
            FunctionStack <Real> nn = new FunctionStack <Real>
                                      (
                Layer1,
                Layer2,
                Layer3,
                Layer4
                                      );

            FunctionStack <Real> cDNI1 = new FunctionStack <Real>(
                new Linear <Real>(256 + 10, 1024, name: "cDNI1 Linear1"),
                new BatchNormalization <Real>(1024, name: "cDNI1 Nrom1"),
                new ReLU <Real>(name: "cDNI1 ReLU1"),
                new Linear <Real>(1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3")
                );

            FunctionStack <Real> cDNI2 = new FunctionStack <Real>(
                new Linear <Real>(256 + 10, 1024, name: "cDNI2 Linear1"),
                new BatchNormalization <Real>(1024, name: "cDNI2 Nrom1"),
                new ReLU <Real>(name: "cDNI2 ReLU1"),
                new Linear <Real>(1024, 256, initialW: new Real[1024, 256], name: "cDNI2 Linear3")
                );

            FunctionStack <Real> cDNI3 = new FunctionStack <Real>(
                new Linear <Real>(256 + 10, 1024, name: "cDNI3 Linear1"),
                new BatchNormalization <Real>(1024, name: "cDNI3 Nrom1"),
                new ReLU <Real>(name: "cDNI3 ReLU1"),
                new Linear <Real>(1024, 256, initialW: new Real[1024, 256], name: "cDNI3 Linear3")
                );

            //optimizerを宣言
            //optimizerを宣言
            Adam <Real> L1adam = new Adam <Real>(0.00003f);
            Adam <Real> L2adam = new Adam <Real>(0.00003f);
            Adam <Real> L3adam = new Adam <Real>(0.00003f);
            Adam <Real> L4adam = new Adam <Real>(0.00003f);

            L1adam.SetUp(Layer1);
            L2adam.SetUp(Layer2);
            L3adam.SetUp(Layer3);
            L4adam.SetUp(Layer4);

            Adam <Real> cDNI1adam = new Adam <Real>(0.00003f);
            Adam <Real> cDNI2adam = new Adam <Real>(0.00003f);
            Adam <Real> cDNI3adam = new Adam <Real>(0.00003f);

            cDNI1adam.SetUp(cDNI1);
            cDNI2adam.SetUp(cDNI2);
            cDNI3adam.SetUp(cDNI3);

            for (int epoch = 0; epoch < 10; epoch++)
            {
                Console.WriteLine("epoch " + (epoch + 1));

                //全体での誤差を集計
                Real totalLoss      = 0;
                Real cDNI1totalLoss = 0;
                Real cDNI2totalLoss = 0;
                Real cDNI3totalLoss = 0;

                long totalLossCount      = 0;
                long cDNI1totalLossCount = 0;
                long cDNI2totalLossCount = 0;
                long cDNI3totalLossCount = 0;


                //何回バッチを実行するか
                for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++)
                {
                    //訓練データからランダムにデータを取得
                    TestDataSet <Real> datasetX = mnistData.Train.GetRandomDataSet(BATCH_DATA_COUNT);

                    //第一層を実行
                    NdArray <Real> layer1ForwardResult = Layer1.Forward(datasetX.Data)[0];
                    ResultDataSet  layer1ResultDataSet = new ResultDataSet(layer1ForwardResult, datasetX.Label);

                    //第一層の傾きを取得
                    NdArray <Real> cDNI1Result = cDNI1.Forward(layer1ResultDataSet.GetTrainData())[0];

                    //第一層の傾きを適用
                    layer1ForwardResult.Grad = cDNI1Result.Data.ToArray();

                    //第一層を更新
                    Layer1.Backward(layer1ForwardResult);
                    layer1ForwardResult.ParentFunc = null;
                    L1adam.Update();

                    //第二層を実行
                    NdArray <Real> layer2ForwardResult = Layer2.Forward(layer1ResultDataSet.Result)[0];
                    ResultDataSet  layer2ResultDataSet = new ResultDataSet(layer2ForwardResult, layer1ResultDataSet.Label);

                    //第二層の傾きを取得
                    NdArray <Real> cDNI2Result = cDNI2.Forward(layer2ResultDataSet.GetTrainData())[0];

                    //第二層の傾きを適用
                    layer2ForwardResult.Grad = cDNI2Result.Data.ToArray();

                    //第二層を更新
                    Layer2.Backward(layer2ForwardResult);
                    layer2ForwardResult.ParentFunc = null;


                    //第一層用のcDNIの学習を実行
                    Real cDNI1loss = new MeanSquaredError <Real>().Evaluate(cDNI1Result, new NdArray <Real>(layer1ResultDataSet.Result.Grad, cDNI1Result.Shape, cDNI1Result.BatchCount));

                    L2adam.Update();

                    cDNI1.Backward(cDNI1Result);
                    cDNI1adam.Update();

                    cDNI1totalLoss += cDNI1loss;
                    cDNI1totalLossCount++;

                    //第三層を実行
                    NdArray <Real> layer3ForwardResult = Layer3.Forward(layer2ResultDataSet.Result)[0];
                    ResultDataSet  layer3ResultDataSet = new ResultDataSet(layer3ForwardResult, layer2ResultDataSet.Label);

                    //第三層の傾きを取得
                    NdArray <Real> cDNI3Result = cDNI3.Forward(layer3ResultDataSet.GetTrainData())[0];

                    //第三層の傾きを適用
                    layer3ForwardResult.Grad = cDNI3Result.Data.ToArray();

                    //第三層を更新
                    Layer3.Backward(layer3ForwardResult);
                    layer3ForwardResult.ParentFunc = null;

                    //第二層用のcDNIの学習を実行
                    Real cDNI2loss = new MeanSquaredError <Real>().Evaluate(cDNI2Result, new NdArray <Real>(layer2ResultDataSet.Result.Grad, cDNI2Result.Shape, cDNI2Result.BatchCount));

                    L3adam.Update();

                    cDNI2.Backward(cDNI2Result);
                    cDNI2adam.Update();

                    cDNI2totalLoss += cDNI2loss;
                    cDNI2totalLossCount++;

                    //第四層を実行
                    NdArray <Real> layer4ForwardResult = Layer4.Forward(layer3ResultDataSet.Result)[0];

                    //第四層の傾きを取得
                    Real sumLoss = new SoftmaxCrossEntropy <Real>().Evaluate(layer4ForwardResult, layer3ResultDataSet.Label);

                    //第四層を更新
                    Layer4.Backward(layer4ForwardResult);
                    layer4ForwardResult.ParentFunc = null;

                    totalLoss += sumLoss;
                    totalLossCount++;

                    //第三層用のcDNIの学習を実行
                    Real cDNI3loss = new MeanSquaredError <Real>().Evaluate(cDNI3Result, new NdArray <Real>(layer3ResultDataSet.Result.Grad, cDNI3Result.Shape, cDNI3Result.BatchCount));

                    L4adam.Update();

                    cDNI3.Backward(cDNI3Result);
                    cDNI3adam.Update();

                    cDNI3totalLoss += cDNI3loss;
                    cDNI3totalLossCount++;

                    Console.WriteLine("\nbatch count " + i + "/" + TRAIN_DATA_COUNT);
                    //結果出力
                    Console.WriteLine("total loss " + totalLoss / totalLossCount);
                    Console.WriteLine("local loss " + sumLoss);

                    Console.WriteLine("\ncDNI1 total loss " + cDNI1totalLoss / cDNI1totalLossCount);
                    Console.WriteLine("cDNI2 total loss " + cDNI2totalLoss / cDNI2totalLossCount);
                    Console.WriteLine("cDNI3 total loss " + cDNI3totalLoss / cDNI3totalLossCount);

                    Console.WriteLine("\ncDNI1 local loss " + cDNI1loss);
                    Console.WriteLine("cDNI2 local loss " + cDNI2loss);
                    Console.WriteLine("cDNI3 local loss " + cDNI3loss);

                    //20回バッチを動かしたら精度をテストする
                    if (i % 20 == 0)
                    {
                        Console.WriteLine("\nTesting...");

                        //テストデータからランダムにデータを取得
                        TestDataSet <Real> datasetY = mnistData.Eval.GetRandomDataSet(TEST_DATA_COUNT);

                        //テストを実行
                        Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label);
                        Console.WriteLine("accuracy " + accuracy);
                    }
                }
            }
        }
Esempio n. 16
0
        public static void Run()
        {
            // Prepare MNIST data
            RILogManager.Default?.SendDebug("MNIST Data Loading...");
            MnistData mnistData = new MnistData(28);

            RILogManager.Default?.SendDebug("Training Start...");

            // Write the network configuration in FunctionStack
            FunctionStack Layer1 = new FunctionStack("Test11 Layer 1",
                                                     new Linear(true, 28 * 28, 256, name: "l1 Linear"),
                                                     new BatchNormalization(true, 256, name: "l1 Norm"),
                                                     new ReLU(name: "l1 ReLU")
                                                     );

            FunctionStack Layer2 = new FunctionStack("Test11 Layer 2",
                                                     new Linear(true, 256, 256, name: "l2 Linear"),
                                                     new BatchNormalization(true, 256, name: "l2 Norm"),
                                                     new ReLU(name: "l2 ReLU")
                                                     );

            FunctionStack Layer3 = new FunctionStack("Test11 Layer 3",
                                                     new Linear(true, 256, 256, name: "l3 Linear"),
                                                     new BatchNormalization(true, 256, name: "l3 Norm"),
                                                     new ReLU(name: "l3 ReLU")
                                                     );

            FunctionStack Layer4 = new FunctionStack("Test11 Layer 4",
                                                     new Linear(true, 256, 10, name: "l4 Linear")
                                                     );

            // Function stack itself is also stacked as Function
            FunctionStack nn = new FunctionStack
                                   ("Test11",
                                   Layer1,
                                   Layer2,
                                   Layer3,
                                   Layer4
                                   );

            FunctionStack DNI1 = new FunctionStack("Test11 DNI1",
                                                   new Linear(true, 256, 1024, name: "DNI1 Linear1"),
                                                   new BatchNormalization(true, 1024, name: "DNI1 Norm1"),
                                                   new ReLU(name: "DNI1 ReLU1"),
                                                   new Linear(true, 1024, 1024, name: "DNI1 Linear2"),
                                                   new BatchNormalization(true, 1024, name: "DNI1 Norm2"),
                                                   new ReLU(name: "DNI1 ReLU2"),
                                                   new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "DNI1 Linear3")
                                                   );

            FunctionStack DNI2 = new FunctionStack("Test11 DNI2",
                                                   new Linear(true, 256, 1024, name: "DNI2 Linear1"),
                                                   new BatchNormalization(true, 1024, name: "DNI2 Norm1"),
                                                   new ReLU(name: "DNI2 ReLU1"),
                                                   new Linear(true, 1024, 1024, name: "DNI2 Linear2"),
                                                   new BatchNormalization(true, 1024, name: "DNI2 Norm2"),
                                                   new ReLU(name: "DNI2 ReLU2"),
                                                   new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "DNI2 Linear3")
                                                   );

            FunctionStack DNI3 = new FunctionStack("Test11 DNI3",
                                                   new Linear(true, 256, 1024, name: "DNI3 Linear1"),
                                                   new BatchNormalization(true, 1024, name: "DNI3 Norm1"),
                                                   new ReLU(name: "DNI3 ReLU1"),
                                                   new Linear(true, 1024, 1024, name: "DNI3 Linear2"),
                                                   new BatchNormalization(true, 1024, name: "DNI3 Norm2"),
                                                   new ReLU(name: "DNI3 ReLU2"),
                                                   new Linear(true, 1024, 256, initialW: new Real[1024, 256], name: "DNI3 Linear3")
                                                   );

            //optimizer
            Layer1.SetOptimizer(new Adam());
            Layer2.SetOptimizer(new Adam());
            Layer3.SetOptimizer(new Adam());
            Layer4.SetOptimizer(new Adam());
            DNI1.SetOptimizer(new Adam());
            DNI2.SetOptimizer(new Adam());
            DNI3.SetOptimizer(new Adam());

            // Three generations learning
            for (int epoch = 0; epoch < 20; epoch++)
            {
                RILogManager.Default?.SendDebug("epoch " + (epoch + 1));

                Real totalLoss     = 0;
                Real DNI1totalLoss = 0;
                Real DNI2totalLoss = 0;
                Real DNI3totalLoss = 0;

                long totalLossCount     = 0;
                long DNI1totalLossCount = 0;
                long DNI2totalLossCount = 0;
                long DNI3totalLossCount = 0;

                // how many times to run the batch
                for (int i = 1; i < TRAIN_DATA_COUNT + 1; i++)
                {
                    // Get data randomly from the training data
                    TestDataSet datasetX = mnistData.GetRandomXSet(BATCH_DATA_COUNT, 28, 28);

                    // Run first tier
                    NdArray[] layer1ForwardResult = Layer1.Forward(true, datasetX.Data);

                    // Obtain the slope of the first layer
                    NdArray[] DNI1Result = DNI1.Forward(true, layer1ForwardResult);

                    // Apply the slope of the first layer
                    layer1ForwardResult[0].Grad = DNI1Result[0].Data.ToArray();

                    // Update first layer
                    Layer1.Backward(true, layer1ForwardResult);
                    layer1ForwardResult[0].ParentFunc = null; // Backward was executed and cut off calculation graph
                    Layer1.Update();

                    // Run Layer 2
                    NdArray[] layer2ForwardResult = Layer2.Forward(true, layer1ForwardResult);

                    // Get the inclination of the second layer
                    NdArray[] DNI2Result = DNI2.Forward(true, layer2ForwardResult);

                    // Apply the slope of the second layer
                    layer2ForwardResult[0].Grad = DNI2Result[0].Data.ToArray();

                    // Update layer 2
                    Layer2.Backward(true, layer2ForwardResult);
                    layer2ForwardResult[0].ParentFunc = null;

                    // Learn DNI for first tier
                    Real DNI1loss = new MeanSquaredError().Evaluate(DNI1Result, new NdArray(layer1ForwardResult[0].Grad, DNI1Result[0].Shape, DNI1Result[0].BatchCount));

                    Layer2.Update();

                    DNI1.Backward(true, DNI1Result);
                    DNI1.Update();

                    DNI1totalLoss += DNI1loss;
                    DNI1totalLossCount++;

                    // run layer 3
                    NdArray[] layer3ForwardResult = Layer3.Forward(true, layer2ForwardResult);

                    // Get the inclination of the third layer
                    NdArray[] DNI3Result = DNI3.Forward(true, layer3ForwardResult);

                    // Apply the slope of the third layer
                    layer3ForwardResult[0].Grad = DNI3Result[0].Data.ToArray();

                    // Update layer 3
                    Layer3.Backward(true, layer3ForwardResult);
                    layer3ForwardResult[0].ParentFunc = null;

                    // Run DNI learning for layer 2
                    Real DNI2loss = new MeanSquaredError().Evaluate(DNI2Result, new NdArray(layer2ForwardResult[0].Grad, DNI2Result[0].Shape, DNI2Result[0].BatchCount));

                    Layer3.Update();

                    DNI2.Backward(true, DNI2Result);
                    DNI2.Update();

                    DNI2totalLoss += DNI2loss;
                    DNI2totalLossCount++;

                    // run layer 4
                    NdArray[] layer4ForwardResult = Layer4.Forward(true, layer3ForwardResult);

                    // Obtain the slope of the fourth layer
                    Real sumLoss = new SoftmaxCrossEntropy().Evaluate(layer4ForwardResult, datasetX.Label);

                    // Update fourth layer
                    Layer4.Backward(true, layer4ForwardResult);
                    layer4ForwardResult[0].ParentFunc = null;

                    totalLoss += sumLoss;
                    totalLossCount++;

                    // Run DNI learning for layer 3
                    Real DNI3loss = new MeanSquaredError().Evaluate(DNI3Result, new NdArray(layer3ForwardResult[0].Grad, DNI3Result[0].Shape, DNI3Result[0].BatchCount));

                    Layer4.Update();

                    DNI3.Backward(true, DNI3Result);
                    DNI3.Update();

                    DNI3totalLoss += DNI3loss;
                    DNI3totalLossCount++;

                    RILogManager.Default?.SendDebug("batch count " + i + "/" + TRAIN_DATA_COUNT);
                    RILogManager.Default?.SendDebug("total loss " + totalLoss / totalLossCount);
                    RILogManager.Default?.SendDebug("local loss " + sumLoss);

                    RILogManager.Default?.SendDebug("DNI1 total loss " + DNI1totalLoss / DNI1totalLossCount);
                    RILogManager.Default?.SendDebug("DNI2 total loss " + DNI2totalLoss / DNI2totalLossCount);
                    RILogManager.Default?.SendDebug("DNI3 total loss " + DNI3totalLoss / DNI3totalLossCount);

                    RILogManager.Default?.SendDebug("DNI1 local loss " + DNI1loss);
                    RILogManager.Default?.SendDebug("DNI2 local loss " + DNI2loss);
                    RILogManager.Default?.SendDebug("DNI3 local loss " + DNI3loss);

                    // Test the accuracy if you move the batch 20 times
                    if (i % 20 == 0)
                    {
                        RILogManager.Default?.SendDebug("Testing...");

                        // Get data randomly from test data
                        TestDataSet datasetY = mnistData.GetRandomYSet(TEST_DATA_COUNT, 28);

                        // Run test
                        Real accuracy = Trainer.Accuracy(nn, datasetY.Data, datasetY.Label);
                        RILogManager.Default?.SendDebug("accuracy " + accuracy);
                    }
                }
            }
        }