public QAgent21(DeepQLearn brain) { this.Brain = brain; }
static void Main(string[] args) // b r o k e n { Console.ForegroundColor = ConsoleColor.DarkMagenta; if (File.Exists(qAgentBrainPath)) { using (FileStream fstream = new FileStream(qAgentBrainPath, FileMode.Open, FileAccess.Read, FileShare.Read)) { qAgent = new BinaryFormatter().Deserialize(fstream) as QAgent; qAgent.Reinitialize(); } Console.WriteLine("QAgent loaded"); } else { var num_inputs = 6; // 9 eyes, each sees 3 numbers (wall, green, red thing proximity) var num_actions = 3; // 5 possible angles agent can turn var temporal_window = 1; // amount of temporal memory. 0 = agent lives in-the-moment :) var network_size = num_inputs * temporal_window + num_actions * temporal_window + num_inputs; // config brain var layer_defs = new List <LayerDefinition>(); // the value function network computes a value of taking any of the possible actions // given an input state. Here we specify one explicitly the hard way // but user could also equivalently instead use opt.hidden_layer_sizes = [20,20] // to just insert simple relu hidden layers. layer_defs.Add(new LayerDefinition { type = "input", out_sx = 1, out_sy = 1, out_depth = network_size }); layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); layer_defs.Add(new LayerDefinition { type = "regression", num_neurons = num_actions }); // options for the Temporal Difference learner that trains the above net // by backpropping the temporal difference learning rule. //var opt = new Options { method="sgd", learning_rate=0.01, l2_decay=0.001, momentum=0.9, batch_size=10, l1_decay=0.001 }; Options opt = new Options { method = "adadelta", l2_decay = 0.001, batch_size = 10 }; TrainingOptions tdtrainer_options = new TrainingOptions(); tdtrainer_options.temporal_window = temporal_window; tdtrainer_options.experience_size = 30000; tdtrainer_options.start_learn_threshold = 1000; tdtrainer_options.gamma = 0.7; tdtrainer_options.learning_steps_total = 200000; tdtrainer_options.learning_steps_burnin = 3000; tdtrainer_options.epsilon_min = 0.05; tdtrainer_options.epsilon_test_time = 0.00; tdtrainer_options.layer_defs = layer_defs; tdtrainer_options.options = opt; DeepQLearn brain = new DeepQLearn(num_inputs, num_actions, tdtrainer_options); qAgent = new QAgent(brain); } qAgent.startlearn(); new Thread(() => { while (true) { if (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond % 31 /*arbitrary*/ == 0) { using (FileStream fstream = new FileStream(qAgentBrainPath, FileMode.Create, FileAccess.Write, FileShare.ReadWrite)) { new BinaryFormatter().Serialize(fstream, qAgent); } } qAgent.tick(); } }).Start(); }
private static QAgent21 TrainAgent(GameEngine engine21) { var num_inputs = 1; // current score var num_actions = 2; // take a card or finish game var temporal_window = 0; // amount of temporal memory. 0 = agent lives in-the-moment :) var network_size = num_inputs * temporal_window + num_actions * temporal_window + num_inputs; var layer_defs = new List <LayerDefinition>(); // the value function network computes a value of taking any of the possible actions // given an input state. Here we specify one explicitly the hard way // but user could also equivalently instead use opt.hidden_layer_sizes = [20,20] // to just insert simple relu hidden layers. layer_defs.Add(new LayerDefinition { type = "input", out_sx = 1, out_sy = 1, out_depth = network_size }); //layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 21, activation = "relu" }); //layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); //layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); layer_defs.Add(new LayerDefinition { type = "regression", num_neurons = num_actions }); // options for the Temporal Difference learner that trains the above net // by backpropping the temporal difference learning rule. //var opt = new Options { method="sgd", learning_rate=0.01, l2_decay=0.001, momentum=0.9, batch_size=10, l1_decay=0.001 }; var opt = new Options { method = "adadelta", l2_decay = 0.001, batch_size = 10 }; var tdtrainer_options = new TrainingOptions(); tdtrainer_options.temporal_window = temporal_window; tdtrainer_options.experience_size = 3000; // size of experience replay memory tdtrainer_options.start_learn_threshold = 1000; // number of examples in experience replay memory before we begin learning tdtrainer_options.gamma = 1.0; // gamma is a crucial parameter that controls how much plan-ahead the agent does. In [0,1] tdtrainer_options.learning_steps_total = 15000; // number of steps we will learn for tdtrainer_options.learning_steps_burnin = 1000; // how many steps of the above to perform only random actions (in the beginning)? tdtrainer_options.epsilon_min = 0.01; // what epsilon value do we bottom out on? 0.0 => purely deterministic policy at end tdtrainer_options.epsilon_test_time = 0.00; // what epsilon to use at test time? (i.e. when learning is disabled) tdtrainer_options.layer_defs = layer_defs; tdtrainer_options.options = opt; var brain = new DeepQLearn(num_inputs, num_actions, tdtrainer_options); var agent = new QAgent21(brain); int accumulatedScore = 0; int accumulatedGameLenght = 0; int gamesInAccumulatedScore = 0; int batchSize = 5000; int total = 0; Stream bestAgentSerialized = new MemoryStream(); double bestBatchScore = double.MinValue; while (total < 50000) { GameState state = new GameState(); while (!state.IsFinished) { TurnOptions action = agent.Forward(state); //if (action == TurnOptions.FinishGame) //{ // Console.WriteLine($"finish at {state.Score}"); //} GameState newState = engine21.ApplyTurn(action, state); agent.Backward(newState); state = newState; accumulatedGameLenght++; } accumulatedScore += state.Score; gamesInAccumulatedScore++; total++; if (gamesInAccumulatedScore == batchSize) { double batchScore = accumulatedScore / (double)gamesInAccumulatedScore; Console.WriteLine($"{total} iterations. Error: {brain.visSelf()}. Length: {accumulatedGameLenght/(double)gamesInAccumulatedScore} Average score: {batchScore}"); accumulatedScore = 0; gamesInAccumulatedScore = 0; accumulatedGameLenght = 0; //if agent is good - save it if (batchScore > bestBatchScore) { bestBatchScore = batchScore; IFormatter formatter = new BinaryFormatter(); if (bestAgentSerialized != null) { bestAgentSerialized.Close(); bestAgentSerialized.Dispose(); } bestAgentSerialized = new MemoryStream(); formatter.Serialize(bestAgentSerialized, agent); } } } Console.WriteLine($"Best score: {bestBatchScore}"); Console.WriteLine("End"); //File.AppendAllText(AppDomain.CurrentDomain.BaseDirectory + "DNQ.trr", agent.AgentToJson()); IFormatter readFormatter = new BinaryFormatter(); bestAgentSerialized.Seek(0, SeekOrigin.Begin); var agentToReturn = (QAgent21)readFormatter.Deserialize(bestAgentSerialized); agentToReturn.Brain.learning = false; brain.learning = false; return(agentToReturn); }
private void startLearning_Click(object sender, EventArgs e) { if (qAgent == null) { var num_inputs = 27; // 9 eyes, each sees 3 numbers (wall, green, red thing proximity) var num_actions = 5; // 5 possible angles agent can turn var temporal_window = 4; // amount of temporal memory. 0 = agent lives in-the-moment :) var network_size = num_inputs * temporal_window + num_actions * temporal_window + num_inputs; var layer_defs = new List <LayerDefinition>(); // the value function network computes a value of taking any of the possible actions // given an input state. Here we specify one explicitly the hard way // but user could also equivalently instead use opt.hidden_layer_sizes = [20,20] // to just insert simple relu hidden layers. layer_defs.Add(new LayerDefinition { type = "input", out_sx = 1, out_sy = 1, out_depth = network_size }); layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); layer_defs.Add(new LayerDefinition { type = "regression", num_neurons = num_actions }); // options for the Temporal Difference learner that trains the above net // by backpropping the temporal difference learning rule. //var opt = new Options { method="sgd", learning_rate=0.01, l2_decay=0.001, momentum=0.9, batch_size=10, l1_decay=0.001 }; var opt = new Options { method = "adadelta", l2_decay = 0.001, batch_size = 10 }; var tdtrainer_options = new TrainingOptions(); tdtrainer_options.temporal_window = temporal_window; tdtrainer_options.experience_size = 30000; tdtrainer_options.start_learn_threshold = 1000; tdtrainer_options.gamma = 0.7; tdtrainer_options.learning_steps_total = 200000; tdtrainer_options.learning_steps_burnin = 3000; tdtrainer_options.epsilon_min = 0.05; tdtrainer_options.epsilon_test_time = 0.00; tdtrainer_options.layer_defs = layer_defs; tdtrainer_options.options = opt; var brain = new DeepQLearn(num_inputs, num_actions, tdtrainer_options); qAgent = new QAgent(brain, canvas.Width, canvas.Height); } else { qAgent.startlearn(); } if (workerThread == null) { workerThread = new Thread(new ThreadStart(BackgroundThread)); workerThread.Start(); } }
private void startLearn(bool delay) { if (qAgent == null) { var num_inputs = 27; // 9 eyes, each sees 3 numbers (wall, green, red thing proximity) var num_actions = 5; // 5 possible angles agent can turn var temporal_window = 4; // amount of temporal memory. 0 = agent lives in-the-moment :) var network_size = num_inputs * temporal_window + num_actions * temporal_window + num_inputs; var layer_defs = new List <LayerDefinition>(); // the value function network computes a value of taking any of the possible actions // given an input state. Here we specify one explicitly the hard way // but user could also equivalently instead use opt.hidden_layer_sizes = [20,20] // to just insert simple relu hidden layers. layer_defs.Add(new LayerDefinition { type = "input", out_sx = 1, out_sy = 1, out_depth = network_size }); layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); layer_defs.Add(new LayerDefinition { type = "regression", num_neurons = num_actions }); // options for the Temporal Difference learner that trains the above net // by backpropping the temporal difference learning rule. //var opt = new Options { method="sgd", learning_rate=0.01, l2_decay=0.001, momentum=0.9, batch_size=10, l1_decay=0.001 }; //var opt = new Options { method = "adadelta", l2_decay = 0.001, batch_size = 10 }; var opt = new Options { method = cboLearningMethod.Text, learning_rate = Double.Parse(txtLearningRate.Text), momentum = Double.Parse(txtLearningMomentum.Text), l1_decay = Double.Parse(txtLearningL1Decay.Text), l2_decay = Double.Parse(txtLearningL2Decay.Text), batch_size = Int32.Parse(txtLearningBatch.Text) }; var tdtrainer_options = new TrainingOptions(); tdtrainer_options.temporal_window = temporal_window; //tdtrainer_options.experience_size = 30000; tdtrainer_options.experience_size = experiencesize; //tdtrainer_options.start_learn_threshold = 1000; tdtrainer_options.start_learn_threshold = learnthreshold; tdtrainer_options.gamma = 0.7; //tdtrainer_options.learning_steps_total = 200000; tdtrainer_options.learning_steps_total = Int32.Parse(txtLearnTotal.Text); //tdtrainer_options.learning_steps_burnin = 3000; tdtrainer_options.learning_steps_burnin = Int32.Parse(txtLearnBurn.Text); tdtrainer_options.epsilon_min = 0.05; tdtrainer_options.epsilon_test_time = 0.00; tdtrainer_options.layer_defs = layer_defs; tdtrainer_options.options = opt; // determine when to use shared experience using static if (chkSharedExperience.Checked && staticExperience) { var brain = new DeepQLearnShared(num_inputs, num_actions, tdtrainer_options); brain.instance = this.instanceNumber; qAgent = new QAgent(brain, canvas.Width, canvas.Height, Int32.Parse(txtNumberItems.Text), chkRandomItems.Checked, chkObstructItems.Checked, infiniteItems); } // determine when to use shared experience using singleton else if (chkSharedExperience.Checked && !staticExperience) { var brain = new DeepQLearnSharedSingleton(num_inputs, num_actions, tdtrainer_options); brain.instance = this.instanceNumber; qAgent = new QAgent(brain, canvas.Width, canvas.Height, Int32.Parse(txtNumberItems.Text), chkRandomItems.Checked, chkObstructItems.Checked, infiniteItems); } // determine when to use nonshared experience else { var brain = new DeepQLearn(num_inputs, num_actions, tdtrainer_options); qAgent = new QAgent(brain, canvas.Width, canvas.Height, Int32.Parse(txtNumberItems.Text), chkRandomItems.Checked, chkObstructItems.Checked, infiniteItems); } } else { qAgent.startlearn(); } if (!delay) { qAgent.goveryfast(); interval = 0; } if (workerThread == null) { workerThread = new Thread(new ThreadStart(BackgroundThread)); workerThread.Start(); } }