Esempio n. 1
0
        static void Main(string[] args)
        {
            DynetParams.FromArgs(args).Initialize();
            // Alternatively, can initialize and it directly, e.g:
            // DynetParams dp = new DynetParams();
            // dp.AutoBatch = true;
            // dp.MemDescriptor = "768";
            // dp.Initialize();

            const string  EOS        = "<EOS>";
            List <string> characters = "abcdefghijklmnopqrstuvwxyz ".Select(c => c.ToString()).ToList();

            characters.Add(EOS);

            // Lookup - dictionary
            Dictionary <string, int> c2i = Enumerable.Range(0, characters.Count).ToDictionary(i => characters[i], i => i);

            // Define the variables
            VOCAB_SIZE         = characters.Count;
            LSTM_NUM_OF_LAYERS = 2;
            EMBEDDINGS_SIZE    = 32;
            STATE_SIZE         = 32;
            ATTENTION_SIZE     = 32;

            // ParameterCollection (all the model parameters).
            ParameterCollection m = new ParameterCollection();
            // A class defined locally used to contain all the parameters to transfer
            // them between functions and avoid global variables
            ParameterGroup pg = new ParameterGroup();

            pg.c2i = c2i;
            pg.i2c = characters;
            pg.EOS = EOS;

            // LSTMs
            pg.enc_fwd_lstm = new LSTMBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, m);
            pg.enc_bwd_lstm = new LSTMBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, m);

            pg.dec_lstm = new LSTMBuilder(LSTM_NUM_OF_LAYERS, STATE_SIZE * 2 + EMBEDDINGS_SIZE, STATE_SIZE, m);

            // Create the parameters
            pg.input_lookup  = m.AddLookupParameters(VOCAB_SIZE, new[] { EMBEDDINGS_SIZE });
            pg.attention_w1  = m.AddParameters(new[] { ATTENTION_SIZE, STATE_SIZE * 2 });
            pg.attention_w2  = m.AddParameters(new[] { ATTENTION_SIZE, STATE_SIZE * 2 * LSTM_NUM_OF_LAYERS });
            pg.attention_v   = m.AddParameters(new[] { 1, ATTENTION_SIZE });
            pg.decoder_W     = m.AddParameters(new[] { VOCAB_SIZE, STATE_SIZE });
            pg.decoder_b     = m.AddParameters(new[] { VOCAB_SIZE });
            pg.output_lookup = m.AddLookupParameters(VOCAB_SIZE, new[] { EMBEDDINGS_SIZE });

            Trainer trainer = new SimpleSGDTrainer(m);

            // For good practice, renew the computation graph
            dy.RenewCG();

            // Train
            string trainSentence = "it is working";

            // Run 600 epochs
            for (int iEpoch = 0; iEpoch < 600; iEpoch++)
            {
                // Loss
                Expression loss = CalculateLoss(trainSentence, trainSentence, pg);
                // Forward, backward, update
                float lossValue = loss.ScalarValue();
                loss.Backward();
                trainer.Update();
                if (iEpoch % 20 == 0)
                {
                    Console.WriteLine(lossValue);
                    Console.WriteLine(GenerateSentence(trainSentence, pg));
                }
            }// next epoch
        }
Esempio n. 2
0
        static void Main(string[] args)
        {
            DynetParams.FromArgs(args).Initialize();
            // Alternatively, can initialize and it directly, e.g:
            // DynetParams dp = new DynetParams();
            // dp.AutoBatch = true;
            // dp.MemDescriptor = "768";
            // dp.Initialize();

            const int ITERATIONS  = 30;
            const int HIDDEN_SIZE = 8;

            // ParameterCollection (all the model parameters).
            ParameterCollection m       = new ParameterCollection();
            Trainer             trainer = new SimpleSGDTrainer(m);

            // Create the parameters
            Parameter p_W = m.AddParameters(new[] { HIDDEN_SIZE, 2 });
            Parameter p_b = m.AddParameters(new[] { HIDDEN_SIZE });
            Parameter p_V = m.AddParameters(new[] { 1, HIDDEN_SIZE });
            Parameter p_a = m.AddParameters(new[] { 1 });

            // Load the model?
            string modelFname = args.FirstOrDefault(arg => arg.StartsWith("-model="));

            if (modelFname != null)
            {
                modelFname = modelFname.Substring("-model=".Length);
                m.Load(modelFname);
            }

            // For good practice, renew the computation graph
            dy.RenewCG();

            // Build the graph
            Expression W = dy.parameter(p_W); // Can also do: p_W.ToExpression();
            Expression b = dy.parameter(p_b);
            Expression V = dy.parameter(p_V);
            Expression a = dy.parameter(p_a);

            // Set x_values to change the inputs to the network.
            Expression x = dy.input(new[] { 0f, 0f });
            // Set y_value to change the target output
            Expression y = dy.input(0f);

            Expression h         = dy.tanh(W * x + b);
            Expression y_pred    = V * h + a;
            Expression loss_expr = dy.squared_distance(y_pred, y);

            // Show the computation graph, just for fun.
            dy.PrintCGGraphViz();

            // Train the parameters.
            for (int iIter = 0; iIter < ITERATIONS; iIter++)
            {
                double loss = 0;
                for (int mi = 0; mi < 4; mi++)
                {
                    float x1     = (mi % 2) != 0 ? 1 : -1;
                    float x2     = ((mi / 2) % 2) != 0 ? 1 : -1;
                    float yValue = (x1 != x2) ? 1 : -1;
                    // Set the values
                    x.SetValue(new[] { x1, x2 });
                    y.SetValue(yValue);
                    // Forward & backward
                    loss += loss_expr.ScalarValue(fRecalculate: true);
                    loss_expr.Backward();
                    // Update
                    trainer.Update();
                }
                loss /= 4;
                Console.WriteLine("E = " + loss);
            }// next iteration

            // Print the four options
            x.SetValue(new[] { 1f, -1f });
            Console.WriteLine("[ 1,-1] = " + y_pred.ScalarValue(fRecalculate: true));
            x.SetValue(new[] { -1f, 1f });
            Console.WriteLine("[-1, 1] = " + y_pred.ScalarValue(fRecalculate: true));
            x.SetValue(new[] { 1f, 1f });
            Console.WriteLine("[ 1, 1] = " + y_pred.ScalarValue(fRecalculate: true));
            x.SetValue(new[] { -1f, -1f });
            Console.WriteLine("[-1,-1] = " + y_pred.ScalarValue(fRecalculate: true));

            // Output the model & parameter objects to a file
            m.Save("xor.model");
        }
Esempio n. 3
0
        static void Main(string[] args)
        {
            DynetParams.FromArgs(args).Initialize();
            // Alternatively, can initialize and it directly, e.g:
            // DynetParams dp = new DynetParams();
            // dp.AutoBatch = true;
            // dp.MemDescriptor = "768";
            // dp.Initialize();

            // Expects all the parameters to be in the format -X=Y
            Dictionary <string, string> argParams = new Dictionary <string, string>();

            foreach (string arg in args)
            {
                string[] parts = arg.TrimStart('-').Split('=');
                argParams[parts[0]] = parts[1];
            }

            int   LAYERS     = 2;
            int   INPUT_DIM  = 50;
            int   HIDDEN_DIM = 100;
            float DROPOUT    = 0.0f;

            Dictionary <string, int> d = new Dictionary <string, int>();

            d.Add("<UNK>", 0); d.Add("<s>", 1); d.Add("</s>", 2);

            // Data:
            List <List <int> > trainData = null;
            List <List <int> > devData   = null;
            List <List <int> > testData  = null;

            // Read the data
            if (argParams.ContainsKey("train_file"))
            {
                string filename = argParams["train_file"];
                Console.WriteLine("Reading training data from " + filename);
                trainData = ReadData(filename, d);
                Console.WriteLine(trainData.Count + " lines, " + trainData.Sum(l => l.Count) + " tokens, " + d.Count + " types");

                // Assuming dev data
                filename = argParams["dev_file"];
                Console.WriteLine("Reading dev data from " + filename);
                devData = ReadData(filename, d, true);
                Console.WriteLine(devData.Count + " lines, " + devData.Sum(l => l.Count) + " tokens, " + d.Count + " types");
            }

            // Test data?
            if (argParams.ContainsKey("test_file"))
            {
                string filename = argParams["test_file"];
                Console.WriteLine("Reading test data from " + filename);
                testData = ReadData(filename, d, fTest: true);
                Console.WriteLine(testData.Count + " lines, " + testData.Sum(l => l.Count) + " tokens, " + d.Count + " types");
            }

            // Build the model
            ParameterCollection model   = new ParameterCollection();
            Trainer             trainer = new SimpleSGDTrainer(model);
            // Create the language model
            LSTMBuilder      lstm = new LSTMBuilder(LAYERS, INPUT_DIM, HIDDEN_DIM, model);
            RNNLanguageModel lm   = new RNNLanguageModel(lstm, model, d, INPUT_DIM, HIDDEN_DIM, DROPOUT);

            // Load a model?
            if (argParams.ContainsKey("model_file"))
            {
                string fname = argParams["model_file"];
                Console.WriteLine("Reading parameters from " + fname + "...");
                model.Load(fname);
            }

            // Train?
            if (trainData != null)
            {
                string modelFname = "lm_" + DROPOUT + "_" + LAYERS + "_" + INPUT_DIM + "_" + HIDDEN_DIM + ".params";
                Console.WriteLine("Parameters will be written to: " + modelFname);

                double best             = double.MaxValue;
                int    reportEveryI     = Math.Min(100, trainData.Count);
                int    devEveryIReports = 25;

                Random r       = new Random();
                int    reports = 0;
                for (int iEpoch = 0; iEpoch < 100; iEpoch++)
                {
                    Stopwatch sw = Stopwatch.StartNew();
                    // Shuffle the train data
                    trainData.Sort((x, y) => r.Next(-1, 2));

                    // New iteration
                    double loss      = 0;
                    int    itemsSeen = 0;
                    int    charsSeen = 0;
                    // Go through entire train data
                    foreach (List <int> l in trainData)
                    {
                        // Build the LM graph
                        Expression loss_expr = lm.BuildLMGraph(l, DROPOUT > 0f);
                        loss      += loss_expr.ScalarValue();
                        charsSeen += l.Count;
                        // Backward & update
                        loss_expr.Backward();
                        trainer.Update();
                        // Report?
                        if (++itemsSeen % reportEveryI == 0)
                        {
                            reports++;
                            Console.WriteLine("#" + reports + " [epoch=" + (iEpoch + ((double)itemsSeen / trainData.Count)) + " lr=" + trainer.LearningRate + "] E = " + (loss / charsSeen) + " ppl=" + Math.Exp(loss / charsSeen) + " ");

                            // Run dev?
                            if (reports % devEveryIReports == 0)
                            {
                                double dloss  = 0;
                                int    dchars = 0;
                                foreach (var dl in devData)
                                {
                                    loss_expr = lm.BuildLMGraph(dl, false);
                                    dloss    += loss_expr.ScalarValue();
                                    dchars   += dl.Count;
                                }// next dev line
                                // New best?
                                if (dloss < best)
                                {
                                    model.Save(modelFname);
                                    best = dloss;
                                }
                                Console.WriteLine("\n***DEV [epoch=" + (iEpoch + ((double)itemsSeen / trainData.Count)) + "] E = " + (dloss / dchars) + " ppl=" + Math.Exp(dloss / dchars) + " ");
                            } // end of dev
                        }     //end of report
                    }         // next l
                }             // next epoch
            }                 // end of train
            // Test?
            if (testData != null)
            {
                Console.WriteLine("Evaluating test data...");
                double tloss  = 0;
                double tchars = 0;
                foreach (var l in testData)
                {
                    Expression loss_expr = lm.BuildLMGraph(l, false);
                    tloss  += loss_expr.ScalarValue();
                    tchars += l.Count;
                }// next test item

                Console.WriteLine("TEST                -LLH = " + tloss);
                Console.WriteLine("TEST CROSS ENTOPY (NATS) = " + (tloss / tchars));
                Console.WriteLine("TEST                 PPL = " + Math.Exp(tloss / tchars));
            }// end of test
        }