private void goVeryFast_Click(object sender, EventArgs e) { qAgent.goveryfast(); interval = 0; }
private void startLearn(bool delay) { if (qAgent == null) { var num_inputs = 27; // 9 eyes, each sees 3 numbers (wall, green, red thing proximity) var num_actions = 5; // 5 possible angles agent can turn var temporal_window = 4; // amount of temporal memory. 0 = agent lives in-the-moment :) var network_size = num_inputs * temporal_window + num_actions * temporal_window + num_inputs; var layer_defs = new List <LayerDefinition>(); // the value function network computes a value of taking any of the possible actions // given an input state. Here we specify one explicitly the hard way // but user could also equivalently instead use opt.hidden_layer_sizes = [20,20] // to just insert simple relu hidden layers. layer_defs.Add(new LayerDefinition { type = "input", out_sx = 1, out_sy = 1, out_depth = network_size }); layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); layer_defs.Add(new LayerDefinition { type = "regression", num_neurons = num_actions }); // options for the Temporal Difference learner that trains the above net // by backpropping the temporal difference learning rule. //var opt = new Options { method="sgd", learning_rate=0.01, l2_decay=0.001, momentum=0.9, batch_size=10, l1_decay=0.001 }; //var opt = new Options { method = "adadelta", l2_decay = 0.001, batch_size = 10 }; var opt = new Options { method = cboLearningMethod.Text, learning_rate = Double.Parse(txtLearningRate.Text), momentum = Double.Parse(txtLearningMomentum.Text), l1_decay = Double.Parse(txtLearningL1Decay.Text), l2_decay = Double.Parse(txtLearningL2Decay.Text), batch_size = Int32.Parse(txtLearningBatch.Text) }; var tdtrainer_options = new TrainingOptions(); tdtrainer_options.temporal_window = temporal_window; //tdtrainer_options.experience_size = 30000; tdtrainer_options.experience_size = experiencesize; //tdtrainer_options.start_learn_threshold = 1000; tdtrainer_options.start_learn_threshold = learnthreshold; tdtrainer_options.gamma = 0.7; //tdtrainer_options.learning_steps_total = 200000; tdtrainer_options.learning_steps_total = Int32.Parse(txtLearnTotal.Text); //tdtrainer_options.learning_steps_burnin = 3000; tdtrainer_options.learning_steps_burnin = Int32.Parse(txtLearnBurn.Text); tdtrainer_options.epsilon_min = 0.05; tdtrainer_options.epsilon_test_time = 0.00; tdtrainer_options.layer_defs = layer_defs; tdtrainer_options.options = opt; // determine when to use shared experience using static if (chkSharedExperience.Checked && staticExperience) { var brain = new DeepQLearnShared(num_inputs, num_actions, tdtrainer_options); brain.instance = this.instanceNumber; qAgent = new QAgent(brain, canvas.Width, canvas.Height, Int32.Parse(txtNumberItems.Text), chkRandomItems.Checked, chkObstructItems.Checked, infiniteItems); } // determine when to use shared experience using singleton else if (chkSharedExperience.Checked && !staticExperience) { var brain = new DeepQLearnSharedSingleton(num_inputs, num_actions, tdtrainer_options); brain.instance = this.instanceNumber; qAgent = new QAgent(brain, canvas.Width, canvas.Height, Int32.Parse(txtNumberItems.Text), chkRandomItems.Checked, chkObstructItems.Checked, infiniteItems); } // determine when to use nonshared experience else { var brain = new DeepQLearn(num_inputs, num_actions, tdtrainer_options); qAgent = new QAgent(brain, canvas.Width, canvas.Height, Int32.Parse(txtNumberItems.Text), chkRandomItems.Checked, chkObstructItems.Checked, infiniteItems); } } else { qAgent.startlearn(); } if (!delay) { qAgent.goveryfast(); interval = 0; } if (workerThread == null) { workerThread = new Thread(new ThreadStart(BackgroundThread)); workerThread.Start(); } }