static void Main(string[] args) { DynetParams.FromArgs(args).Initialize(); // Alternatively, can initialize and it directly, e.g: // DynetParams dp = new DynetParams(); // dp.AutoBatch = true; // dp.MemDescriptor = "768"; // dp.Initialize(); const string EOS = "<EOS>"; List <string> characters = "abcdefghijklmnopqrstuvwxyz ".Select(c => c.ToString()).ToList(); characters.Add(EOS); // Lookup - dictionary Dictionary <string, int> c2i = Enumerable.Range(0, characters.Count).ToDictionary(i => characters[i], i => i); // Define the variables VOCAB_SIZE = characters.Count; LSTM_NUM_OF_LAYERS = 2; EMBEDDINGS_SIZE = 32; STATE_SIZE = 32; ATTENTION_SIZE = 32; // ParameterCollection (all the model parameters). ParameterCollection m = new ParameterCollection(); // A class defined locally used to contain all the parameters to transfer // them between functions and avoid global variables ParameterGroup pg = new ParameterGroup(); pg.c2i = c2i; pg.i2c = characters; pg.EOS = EOS; // LSTMs pg.enc_fwd_lstm = new LSTMBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, m); pg.enc_bwd_lstm = new LSTMBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, m); pg.dec_lstm = new LSTMBuilder(LSTM_NUM_OF_LAYERS, STATE_SIZE * 2 + EMBEDDINGS_SIZE, STATE_SIZE, m); // Create the parameters pg.input_lookup = m.AddLookupParameters(VOCAB_SIZE, new[] { EMBEDDINGS_SIZE }); pg.attention_w1 = m.AddParameters(new[] { ATTENTION_SIZE, STATE_SIZE * 2 }); pg.attention_w2 = m.AddParameters(new[] { ATTENTION_SIZE, STATE_SIZE * 2 * LSTM_NUM_OF_LAYERS }); pg.attention_v = m.AddParameters(new[] { 1, ATTENTION_SIZE }); pg.decoder_W = m.AddParameters(new[] { VOCAB_SIZE, STATE_SIZE }); pg.decoder_b = m.AddParameters(new[] { VOCAB_SIZE }); pg.output_lookup = m.AddLookupParameters(VOCAB_SIZE, new[] { EMBEDDINGS_SIZE }); Trainer trainer = new SimpleSGDTrainer(m); // For good practice, renew the computation graph dy.RenewCG(); // Train string trainSentence = "it is working"; // Run 600 epochs for (int iEpoch = 0; iEpoch < 600; iEpoch++) { // Loss Expression loss = CalculateLoss(trainSentence, trainSentence, pg); // Forward, backward, update float lossValue = loss.ScalarValue(); loss.Backward(); trainer.Update(); if (iEpoch % 20 == 0) { Console.WriteLine(lossValue); Console.WriteLine(GenerateSentence(trainSentence, pg)); } }// next epoch }
static void Main(string[] args) { DynetParams.FromArgs(args).Initialize(); // Alternatively, can initialize and it directly, e.g: // DynetParams dp = new DynetParams(); // dp.AutoBatch = true; // dp.MemDescriptor = "768"; // dp.Initialize(); const int ITERATIONS = 30; const int HIDDEN_SIZE = 8; // ParameterCollection (all the model parameters). ParameterCollection m = new ParameterCollection(); Trainer trainer = new SimpleSGDTrainer(m); // Create the parameters Parameter p_W = m.AddParameters(new[] { HIDDEN_SIZE, 2 }); Parameter p_b = m.AddParameters(new[] { HIDDEN_SIZE }); Parameter p_V = m.AddParameters(new[] { 1, HIDDEN_SIZE }); Parameter p_a = m.AddParameters(new[] { 1 }); // Load the model? string modelFname = args.FirstOrDefault(arg => arg.StartsWith("-model=")); if (modelFname != null) { modelFname = modelFname.Substring("-model=".Length); m.Load(modelFname); } // For good practice, renew the computation graph dy.RenewCG(); // Build the graph Expression W = dy.parameter(p_W); // Can also do: p_W.ToExpression(); Expression b = dy.parameter(p_b); Expression V = dy.parameter(p_V); Expression a = dy.parameter(p_a); // Set x_values to change the inputs to the network. Expression x = dy.input(new[] { 0f, 0f }); // Set y_value to change the target output Expression y = dy.input(0f); Expression h = dy.tanh(W * x + b); Expression y_pred = V * h + a; Expression loss_expr = dy.squared_distance(y_pred, y); // Show the computation graph, just for fun. dy.PrintCGGraphViz(); // Train the parameters. for (int iIter = 0; iIter < ITERATIONS; iIter++) { double loss = 0; for (int mi = 0; mi < 4; mi++) { float x1 = (mi % 2) != 0 ? 1 : -1; float x2 = ((mi / 2) % 2) != 0 ? 1 : -1; float yValue = (x1 != x2) ? 1 : -1; // Set the values x.SetValue(new[] { x1, x2 }); y.SetValue(yValue); // Forward & backward loss += loss_expr.ScalarValue(fRecalculate: true); loss_expr.Backward(); // Update trainer.Update(); } loss /= 4; Console.WriteLine("E = " + loss); }// next iteration // Print the four options x.SetValue(new[] { 1f, -1f }); Console.WriteLine("[ 1,-1] = " + y_pred.ScalarValue(fRecalculate: true)); x.SetValue(new[] { -1f, 1f }); Console.WriteLine("[-1, 1] = " + y_pred.ScalarValue(fRecalculate: true)); x.SetValue(new[] { 1f, 1f }); Console.WriteLine("[ 1, 1] = " + y_pred.ScalarValue(fRecalculate: true)); x.SetValue(new[] { -1f, -1f }); Console.WriteLine("[-1,-1] = " + y_pred.ScalarValue(fRecalculate: true)); // Output the model & parameter objects to a file m.Save("xor.model"); }
static void Main(string[] args) { DynetParams.FromArgs(args).Initialize(); // Alternatively, can initialize and it directly, e.g: // DynetParams dp = new DynetParams(); // dp.AutoBatch = true; // dp.MemDescriptor = "768"; // dp.Initialize(); // Expects all the parameters to be in the format -X=Y Dictionary <string, string> argParams = new Dictionary <string, string>(); foreach (string arg in args) { string[] parts = arg.TrimStart('-').Split('='); argParams[parts[0]] = parts[1]; } int LAYERS = 2; int INPUT_DIM = 50; int HIDDEN_DIM = 100; float DROPOUT = 0.0f; Dictionary <string, int> d = new Dictionary <string, int>(); d.Add("<UNK>", 0); d.Add("<s>", 1); d.Add("</s>", 2); // Data: List <List <int> > trainData = null; List <List <int> > devData = null; List <List <int> > testData = null; // Read the data if (argParams.ContainsKey("train_file")) { string filename = argParams["train_file"]; Console.WriteLine("Reading training data from " + filename); trainData = ReadData(filename, d); Console.WriteLine(trainData.Count + " lines, " + trainData.Sum(l => l.Count) + " tokens, " + d.Count + " types"); // Assuming dev data filename = argParams["dev_file"]; Console.WriteLine("Reading dev data from " + filename); devData = ReadData(filename, d, true); Console.WriteLine(devData.Count + " lines, " + devData.Sum(l => l.Count) + " tokens, " + d.Count + " types"); } // Test data? if (argParams.ContainsKey("test_file")) { string filename = argParams["test_file"]; Console.WriteLine("Reading test data from " + filename); testData = ReadData(filename, d, fTest: true); Console.WriteLine(testData.Count + " lines, " + testData.Sum(l => l.Count) + " tokens, " + d.Count + " types"); } // Build the model ParameterCollection model = new ParameterCollection(); Trainer trainer = new SimpleSGDTrainer(model); // Create the language model LSTMBuilder lstm = new LSTMBuilder(LAYERS, INPUT_DIM, HIDDEN_DIM, model); RNNLanguageModel lm = new RNNLanguageModel(lstm, model, d, INPUT_DIM, HIDDEN_DIM, DROPOUT); // Load a model? if (argParams.ContainsKey("model_file")) { string fname = argParams["model_file"]; Console.WriteLine("Reading parameters from " + fname + "..."); model.Load(fname); } // Train? if (trainData != null) { string modelFname = "lm_" + DROPOUT + "_" + LAYERS + "_" + INPUT_DIM + "_" + HIDDEN_DIM + ".params"; Console.WriteLine("Parameters will be written to: " + modelFname); double best = double.MaxValue; int reportEveryI = Math.Min(100, trainData.Count); int devEveryIReports = 25; Random r = new Random(); int reports = 0; for (int iEpoch = 0; iEpoch < 100; iEpoch++) { Stopwatch sw = Stopwatch.StartNew(); // Shuffle the train data trainData.Sort((x, y) => r.Next(-1, 2)); // New iteration double loss = 0; int itemsSeen = 0; int charsSeen = 0; // Go through entire train data foreach (List <int> l in trainData) { // Build the LM graph Expression loss_expr = lm.BuildLMGraph(l, DROPOUT > 0f); loss += loss_expr.ScalarValue(); charsSeen += l.Count; // Backward & update loss_expr.Backward(); trainer.Update(); // Report? if (++itemsSeen % reportEveryI == 0) { reports++; Console.WriteLine("#" + reports + " [epoch=" + (iEpoch + ((double)itemsSeen / trainData.Count)) + " lr=" + trainer.LearningRate + "] E = " + (loss / charsSeen) + " ppl=" + Math.Exp(loss / charsSeen) + " "); // Run dev? if (reports % devEveryIReports == 0) { double dloss = 0; int dchars = 0; foreach (var dl in devData) { loss_expr = lm.BuildLMGraph(dl, false); dloss += loss_expr.ScalarValue(); dchars += dl.Count; }// next dev line // New best? if (dloss < best) { model.Save(modelFname); best = dloss; } Console.WriteLine("\n***DEV [epoch=" + (iEpoch + ((double)itemsSeen / trainData.Count)) + "] E = " + (dloss / dchars) + " ppl=" + Math.Exp(dloss / dchars) + " "); } // end of dev } //end of report } // next l } // next epoch } // end of train // Test? if (testData != null) { Console.WriteLine("Evaluating test data..."); double tloss = 0; double tchars = 0; foreach (var l in testData) { Expression loss_expr = lm.BuildLMGraph(l, false); tloss += loss_expr.ScalarValue(); tchars += l.Count; }// next test item Console.WriteLine("TEST -LLH = " + tloss); Console.WriteLine("TEST CROSS ENTOPY (NATS) = " + (tloss / tchars)); Console.WriteLine("TEST PPL = " + Math.Exp(tloss / tchars)); }// end of test }