/// <summary> /// Generate an instance of <see cref="IImagesUploader"/> based on <paramref name="options"/>. /// </summary> /// <returns>Requested instance of the desired implementation of <see cref="IImagesUploader"/></returns> public static IImagesUploader GenerateImagesUploader(TrainingOptions options, ITrainingApi trainApi) { switch (options.ImageSource) { case "url": return(new UrlImagesUploader(trainApi, options.UploadBatchSize)); case "local": return(new LocalImagesUploader(trainApi, options.UploadBatchSize)); default: throw new InvalidOperationException($"Not supported source of images: {options.ImageSource}"); } }
/// <summary> /// Initialize or completely reset your agent /// </summary> /// <param name="numberOfStates"></param> /// <param name="numberOfActions"></param> /// <param name="gamma"></param> public void Reset(int numberOfStates, int numberOfActions, double gamma = 0.75) { Options = new TrainingOptions(gamma); NumberOfStates = numberOfStates; NumberOfActions = numberOfActions; Policy = new double[numberOfActions * numberOfStates]; for (int s = 0; s < NumberOfStates; s++) { var poss = GetAllowedActions(s); for (int i = 0, n = poss.Length; i < n; i++) { Policy[poss[i] * NumberOfStates + s] = 1.0 / poss.Length; } } }
public DQNAgent(TrainingOptions options, int states, int actions) { Net = new Network(4, "DQNAgent"); NumberOfStates = states; NumberOfActions = actions; Options = options; Net.Matrices[0] = new Matrix(Options.HiddenUnits, NumberOfStates); Net.Matrices[1] = new Matrix(NumberOfActions, options.HiddenUnits); Net.Matrices[2] = new Matrix(options.HiddenUnits, 1); Net.Matrices[3] = new Matrix(NumberOfActions, 1); Util.FillMatrixWithRandomGaussianNumbers(Net.Matrices[0], 0, 0.01); Util.FillMatrixWithRandomGaussianNumbers(Net.Matrices[1], 0, 0.01); Util.FillMatrixWithRandomGaussianNumbers(Net.Matrices[2], 0, 0.01); Util.FillMatrixWithRandomGaussianNumbers(Net.Matrices[3], 0, 0.01); Experience = new List <Experience>(); }
protected void Page_Load(object sender, EventArgs e) { if (!IsPostBack) { DataTable dropDownOptions = new DataTable(); dropDownOptions.Columns.Add("id"); DataRow row = dropDownOptions.NewRow(); row["id"] = 1; dropDownOptions.Rows.Add(row); row = dropDownOptions.NewRow(); row["id"] = 2; dropDownOptions.Rows.Add(row); dropDownOptions.AcceptChanges(); TrainingOptions.DataSource = dropDownOptions; TrainingOptions.Text = "Please choose"; TrainingOptions.TextField = "ID"; TrainingOptions.DataBindItems(); } }
public DeepQLearn(int num_states, int num_actions, TrainingOptions opt) { this.util = new Util(); this.opt = opt; // in number of time steps, of temporal memory // the ACTUAL input to the net will be (x,a) temporal_window times, and followed by current x // so to have no information from previous time step going into value function, set to 0. this.temporal_window = opt.temporal_window != int.MinValue ? opt.temporal_window : 1; // size of experience replay memory this.experience_size = opt.experience_size != int.MinValue ? opt.experience_size : 30000; // number of examples in experience replay memory before we begin learning this.start_learn_threshold = opt.start_learn_threshold != double.MinValue ? opt.start_learn_threshold : Math.Floor(Math.Min(this.experience_size * 0.1, 1000)); // gamma is a crucial parameter that controls how much plan-ahead the agent does. In [0,1] this.gamma = opt.gamma != double.MinValue ? opt.gamma : 0.8; // number of steps we will learn for this.learning_steps_total = opt.learning_steps_total != int.MinValue ? opt.learning_steps_total : 100000; // how many steps of the above to perform only random actions (in the beginning)? this.learning_steps_burnin = opt.learning_steps_burnin != int.MinValue ? opt.learning_steps_burnin : 3000; // what epsilon value do we bottom out on? 0.0 => purely deterministic policy at end this.epsilon_min = opt.epsilon_min != double.MinValue ? opt.epsilon_min : 0.05; // what epsilon to use at test time? (i.e. when learning is disabled) this.epsilon_test_time = opt.epsilon_test_time != double.MinValue ? opt.epsilon_test_time : 0.00; // advanced feature. Sometimes a random action should be biased towards some values // for example in flappy bird, we may want to choose to not flap more often if (opt.random_action_distribution != null) { // this better sum to 1 by the way, and be of length this.num_actions this.random_action_distribution = opt.random_action_distribution; if (this.random_action_distribution.Count != num_actions) { Console.WriteLine("TROUBLE. random_action_distribution should be same length as num_actions."); } var sum_of_dist = this.random_action_distribution.Sum(); if (Math.Abs(sum_of_dist - 1.0) > 0.0001) { Console.WriteLine("TROUBLE. random_action_distribution should sum to 1!"); } } else { this.random_action_distribution = new List <double>(); } // states that go into neural net to predict optimal action look as // x0,a0,x1,a1,x2,a2,...xt // this variable controls the size of that temporal window. Actions are // encoded as 1-of-k hot vectors this.net_inputs = num_states * this.temporal_window + num_actions * this.temporal_window + num_states; this.num_states = num_states; this.num_actions = num_actions; this.window_size = Math.Max(this.temporal_window, 2); // must be at least 2, but if we want more context even more this.state_window = new List <Volume>(); this.action_window = new List <int>(); this.reward_window = new List <double>(); this.net_window = new List <double[]>(); // Init wth dummy data for (int i = 0; i < window_size; i++) { this.state_window.Add(new Volume(1, 1, 1)); } for (int i = 0; i < window_size; i++) { this.action_window.Add(0); } for (int i = 0; i < window_size; i++) { this.reward_window.Add(0.0); } for (int i = 0; i < window_size; i++) { this.net_window.Add(new double[] { 0.0 }); } // create [state -> value of all possible actions] modeling net for the value function var layer_defs = new List <LayerDefinition>(); if (opt.layer_defs != null) { // this is an advanced usage feature, because size of the input to the network, and number of // actions must check out. This is not very pretty Object Oriented programming but I can't see // a way out of it :( layer_defs = opt.layer_defs; if (layer_defs.Count < 2) { Console.WriteLine("TROUBLE! must have at least 2 layers"); } if (layer_defs[0].type != "input") { Console.WriteLine("TROUBLE! first layer must be input layer!"); } if (layer_defs[layer_defs.Count - 1].type != "regression") { Console.WriteLine("TROUBLE! last layer must be input regression!"); } if (layer_defs[0].out_depth * layer_defs[0].out_sx * layer_defs[0].out_sy != this.net_inputs) { Console.WriteLine("TROUBLE! Number of inputs must be num_states * temporal_window + num_actions * temporal_window + num_states!"); } if (layer_defs[layer_defs.Count - 1].num_neurons != this.num_actions) { Console.WriteLine("TROUBLE! Number of regression neurons should be num_actions!"); } } else { // create a very simple neural net by default layer_defs.Add(new LayerDefinition { type = "input", out_sx = 1, out_sy = 1, out_depth = this.net_inputs }); if (opt.hidden_layer_sizes != null) { // allow user to specify this via the option, for convenience var hl = opt.hidden_layer_sizes; for (var k = 0; k < hl.Length; k++) { layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = hl[k], activation = "relu" }); // relu by default } } } // Create the network this.value_net = new Net(); this.value_net.makeLayers(layer_defs); // and finally we need a Temporal Difference Learning trainer! var options = new Options { learning_rate = 0.01, momentum = 0.0, batch_size = 64, l2_decay = 0.01 }; if (opt.options != null) { options = opt.options; // allow user to overwrite this } this.tdtrainer = new Trainer(this.value_net, options); // experience replay this.experience = new List <Experience>(); //DeepQLearn.experienceShared = new List<ExperienceShared>(); // static list not threadsafe DeepQLearn.experienceShared = new ConcurrentDictionary <int, ExperienceShared>(); // various housekeeping variables this.age = 0; // incremented every backward() this.forward_passes = 0; // incremented every forward() this.epsilon = 1.0; // controls exploration exploitation tradeoff. Should be annealed over time this.latest_reward = 0; //this.last_input = []; this.average_reward_window = new TrainingWindow(1000, 10); this.average_loss_window = new TrainingWindow(1000, 10); this.learning = true; }
/// <summary> /// Read and upload images to <paramref name="project"/> based on <paramref name="options"/> /// </summary> public static async Task <CreateImageSummaryModel> ReadAndUploadImagesAsync(this ITrainingApi trainingApi, Project project, TrainingOptions options, ICollection <string> allowedTagNames) { var images = ImagesLoaderGenerator.GenerateImagesLoader(options, allowedTagNames).LoadImages(); return(await ImagesUploaderGenerator.GenerateImagesUploader(options, trainingApi).UploadImagesAsync(images, project.Id)); }
public TrainingOptionsWindow(TrainingOptions options) { this.options = options; InitializeComponent(); }
//--------------------------------------------- #region Worker Thread private void backgroundWorker_DoWork(object sender, DoWorkEventArgs e) { if (!(e.Argument is TrainingOptions)) { backgroundWorker.ReportProgress(0, "Bad thread argument!"); e.Cancel = true; return; } TrainingOptions options = (TrainingOptions)e.Argument; //Create Teacher BackPropagationLearning networkTeacher = new BackPropagationLearning(m_networkContainer.ActivationNetwork); networkTeacher.LearningRate = options.firstLearningRate; networkTeacher.Momentum = options.momentum; //Start Training bool stop = false; int lastStatusEpoch = 0; int lastGraphEpoch = 0; int lastSaveEpoch = 0; backgroundWorker.ReportProgress(0, "Training..."); while (!stop) { #region Training Epoch this.m_networkState.ErrorTraining = networkTeacher.RunEpoch(options.TrainingVectors.Input, options.TrainingVectors.Output) /* / options.TrainingVectors.Input.Length */; this.m_networkState.ErrorValidation = networkTeacher.MeasureEpochError(options.ValidationVectors.Input, options.ValidationVectors.Output) /* / options.ValidationVectors.Input.Length */; // Adjust Training Rate if (options.secondLearningRate.HasValue) { networkTeacher.LearningRate = options.secondLearningRate.Value; } #endregion #region Mark Network Savepoint if (Properties.Settings.Default.training_Autosave == true && m_networkState.Epoch >= lastSaveEpoch + Properties.Settings.Default.training_AutosaveEpochs) { backgroundWorker.ReportProgress(0, UpdateType.NetworkSave); lastSaveEpoch = m_networkState.Epoch; } #endregion #region Graph Update if (Properties.Settings.Default.graph_Disable == false && m_networkState.Epoch >= lastGraphEpoch + Properties.Settings.Default.graph_UpdateRate) { backgroundWorker.ReportProgress(0, UpdateType.Graph); lastGraphEpoch = m_networkState.Epoch; } #endregion #region Statusbar Update if (Properties.Settings.Default.display_UpdateByTime == false && m_networkState.Epoch >= lastStatusEpoch + Properties.Settings.Default.display_UpdateRate) { if (options.TrainingType == TrainingType.ByError) { if (m_networkState.ErrorTraining != 0) { m_networkState.Progress = Math.Max(Math.Min((int)((options.limError * 100) / m_networkState.ErrorTraining), 100), 0); } } else { if (m_networkState.Epoch != 0) { m_networkState.Progress = Math.Max(Math.Min((int)((m_networkState.Epoch * 100) / options.limEpoch), 100), 0); } } backgroundWorker.ReportProgress(0, UpdateType.Statusbar); lastStatusEpoch = m_networkState.Epoch; } #endregion ++m_networkState.Epoch; // Sleep thread according to specified delay System.Threading.Thread.Sleep(Properties.Settings.Default.training_delay); #region Stop Conditions if (options.TrainingType == TrainingType.ByError) { if (m_networkState.ErrorTraining <= options.limError) { stop = true; } } else if (options.TrainingType == TrainingType.ByEpoch) { if (m_networkState.Epoch >= options.limEpoch) { stop = true; } } if (backgroundWorker.CancellationPending) { e.Cancel = true; stop = true; } #endregion } backgroundWorker.ReportProgress(0); }
public void Start() { if (this.backgroundWorker.IsBusy) { HistoryListener.Write("Trainer thread is busy!"); } else { HistoryListener.Write("Gathering information..."); TrainingOptions options = new TrainingOptions(); options.momentum = (double)numMomentum.Value; options.firstLearningRate = (double)numLearningRate.Value; options.limError = (double)numErrorLimit.Value; options.limEpoch = (int)numEpochLimit.Value; options.validateNetwork = cbValidate.Checked; options.secondLearningRate = cbChangeRate.Checked ? (double?)numChangeRate.Value : null; if (cbTrainingLayer.SelectedIndex == 0) { options.TrainingVectors = this.NetworkDatabase.CreateVectors(NetworkSet.Training); } else { options.TrainingVectors = this.NetworkDatabase.CreateVectors(NetworkSet.Training, (ushort)cbTrainingLayer.SelectedIndex); } options.ValidationVectors = this.NetworkDatabase.CreateVectors(NetworkSet.Validation); if (rbEpochLimit.Checked) { options.TrainingType = TrainingType.ByEpoch; } else if (rbErrorLimit.Checked) { options.TrainingType = TrainingType.ByError; } else if (rbManual.Checked) { options.TrainingType = TrainingType.Manual; } /* //foreach (Double[] inputs in options.TrainingVectors.Input) * //{ * String str = String.Empty; * foreach (Double input in options.TrainingVectors.Input[0]) * { * str += input + " "; * } * MessageBox.Show(str); * str = String.Empty; * foreach (Double input in options.TrainingVectors.Output[0]) * { * str += input + " "; * } * MessageBox.Show(str); * //} */ if (this.m_trainingPaused) { // Network is paused, then this.m_trainingPaused = false; } else { // Network is stopped, then // this.m_graphControl.ClearGraph(); } this.m_graphControl.TrimGraph(m_networkState.Epoch); if (this.cbSwitchGraph.Checked) { this.m_graphControl.ShowTab(); } if (this.TrainingStarted != null) { this.TrainingStarted.Invoke(this, EventArgs.Empty); } // Start timer this.timer.Start(); HistoryListener.Write("Starting thread"); this.backgroundWorker.RunWorkerAsync(options); } }
static void Main(string[] args) // b r o k e n { Console.ForegroundColor = ConsoleColor.DarkMagenta; if (File.Exists(qAgentBrainPath)) { using (FileStream fstream = new FileStream(qAgentBrainPath, FileMode.Open, FileAccess.Read, FileShare.Read)) { qAgent = new BinaryFormatter().Deserialize(fstream) as QAgent; qAgent.Reinitialize(); } Console.WriteLine("QAgent loaded"); } else { var num_inputs = 6; // 9 eyes, each sees 3 numbers (wall, green, red thing proximity) var num_actions = 3; // 5 possible angles agent can turn var temporal_window = 1; // amount of temporal memory. 0 = agent lives in-the-moment :) var network_size = num_inputs * temporal_window + num_actions * temporal_window + num_inputs; // config brain var layer_defs = new List <LayerDefinition>(); // the value function network computes a value of taking any of the possible actions // given an input state. Here we specify one explicitly the hard way // but user could also equivalently instead use opt.hidden_layer_sizes = [20,20] // to just insert simple relu hidden layers. layer_defs.Add(new LayerDefinition { type = "input", out_sx = 1, out_sy = 1, out_depth = network_size }); layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); layer_defs.Add(new LayerDefinition { type = "regression", num_neurons = num_actions }); // options for the Temporal Difference learner that trains the above net // by backpropping the temporal difference learning rule. //var opt = new Options { method="sgd", learning_rate=0.01, l2_decay=0.001, momentum=0.9, batch_size=10, l1_decay=0.001 }; Options opt = new Options { method = "adadelta", l2_decay = 0.001, batch_size = 10 }; TrainingOptions tdtrainer_options = new TrainingOptions(); tdtrainer_options.temporal_window = temporal_window; tdtrainer_options.experience_size = 30000; tdtrainer_options.start_learn_threshold = 1000; tdtrainer_options.gamma = 0.7; tdtrainer_options.learning_steps_total = 200000; tdtrainer_options.learning_steps_burnin = 3000; tdtrainer_options.epsilon_min = 0.05; tdtrainer_options.epsilon_test_time = 0.00; tdtrainer_options.layer_defs = layer_defs; tdtrainer_options.options = opt; DeepQLearn brain = new DeepQLearn(num_inputs, num_actions, tdtrainer_options); qAgent = new QAgent(brain); } qAgent.startlearn(); new Thread(() => { while (true) { if (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond % 31 /*arbitrary*/ == 0) { using (FileStream fstream = new FileStream(qAgentBrainPath, FileMode.Create, FileAccess.Write, FileShare.ReadWrite)) { new BinaryFormatter().Serialize(fstream, qAgent); } } qAgent.tick(); } }).Start(); }
static void Main(string[] args) { var rnd = new Random(); int max = 10; int min = 1; int nextPrint = 0, act1 = 0, act0 = 0; double total = 0, correct = 0; var state = new[] { rnd.Next(min, max), rnd.Next(min, max), rnd.Next(min, max), rnd.Next(min, max) }; var opt = new TrainingOptions { Alpha = 0.001, Epsilon = 0, ErrorClamp = 0.002, ExperienceAddEvery = 10, ExperienceSize = 1000, ExperienceStart = 0, HiddenUnits = 5, LearningSteps = 400 }; //we take 4 states i.e random numbers between 1 and 10 //we have 2 actions 1 if average of set is >5 and 0 if otherwise //we reward agent with 1 for every correct and -1 otherwise var agent = new DQNAgent(opt, state.Length, 2); //how to properly use the DPAgent //var agent2= new MyDPAgent(); //agent2.Reset(state.Length,2); while (total < 50000) { state = new[] { rnd.Next(min, max), rnd.Next(min, max), rnd.Next(min, max), rnd.Next(min, max) }; var action = agent.Act(state); if (action == 1) { act1++; } else { act0++; } if (state.Average() > 5 && action == 1) { agent.Learn(1); correct++; } else if (state.Average() <= 5 && action == 0) { agent.Learn(1); correct++; } else { agent.Learn(-1); } total++; //nextPrint++; if (total >= nextPrint) { Console.WriteLine("Score: " + (correct / total).ToString("P") + "Epoch: " + nextPrint); Console.WriteLine("Action 1: " + act1 + " Action 0: " + act0); nextPrint += 1000; } } // Console.WriteLine("Score: " + (correct / total).ToString("P")); Console.WriteLine("End"); File.AppendAllText(AppDomain.CurrentDomain.BaseDirectory + "DNQ.trr", agent.AgentToJson()); Console.ReadKey(); }
private void startLearning_Click(object sender, EventArgs e) { if (qAgent == null) { var num_inputs = 27; // 9 eyes, each sees 3 numbers (wall, green, red thing proximity) var num_actions = 5; // 5 possible angles agent can turn var temporal_window = 4; // amount of temporal memory. 0 = agent lives in-the-moment :) var network_size = num_inputs * temporal_window + num_actions * temporal_window + num_inputs; var layer_defs = new List <LayerDefinition>(); // the value function network computes a value of taking any of the possible actions // given an input state. Here we specify one explicitly the hard way // but user could also equivalently instead use opt.hidden_layer_sizes = [20,20] // to just insert simple relu hidden layers. layer_defs.Add(new LayerDefinition { type = "input", out_sx = 1, out_sy = 1, out_depth = network_size }); layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); layer_defs.Add(new LayerDefinition { type = "regression", num_neurons = num_actions }); // options for the Temporal Difference learner that trains the above net // by backpropping the temporal difference learning rule. //var opt = new Options { method="sgd", learning_rate=0.01, l2_decay=0.001, momentum=0.9, batch_size=10, l1_decay=0.001 }; var opt = new Options { method = "adadelta", l2_decay = 0.001, batch_size = 10 }; var tdtrainer_options = new TrainingOptions(); tdtrainer_options.temporal_window = temporal_window; tdtrainer_options.experience_size = 30000; tdtrainer_options.start_learn_threshold = 1000; tdtrainer_options.gamma = 0.7; tdtrainer_options.learning_steps_total = 200000; tdtrainer_options.learning_steps_burnin = 3000; tdtrainer_options.epsilon_min = 0.05; tdtrainer_options.epsilon_test_time = 0.00; tdtrainer_options.layer_defs = layer_defs; tdtrainer_options.options = opt; var brain = new DeepQLearn(num_inputs, num_actions, tdtrainer_options); qAgent = new QAgent(brain, canvas.Width, canvas.Height); } else { qAgent.startlearn(); } if (workerThread == null) { workerThread = new Thread(new ThreadStart(BackgroundThread)); workerThread.Start(); } }
public DeepQLearnShared(int num_states, int num_actions, TrainingOptions opt) : base(num_states, num_actions, opt) { }
private static QAgent21 TrainAgent(GameEngine engine21) { var num_inputs = 1; // current score var num_actions = 2; // take a card or finish game var temporal_window = 0; // amount of temporal memory. 0 = agent lives in-the-moment :) var network_size = num_inputs * temporal_window + num_actions * temporal_window + num_inputs; var layer_defs = new List <LayerDefinition>(); // the value function network computes a value of taking any of the possible actions // given an input state. Here we specify one explicitly the hard way // but user could also equivalently instead use opt.hidden_layer_sizes = [20,20] // to just insert simple relu hidden layers. layer_defs.Add(new LayerDefinition { type = "input", out_sx = 1, out_sy = 1, out_depth = network_size }); //layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 21, activation = "relu" }); //layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); //layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); layer_defs.Add(new LayerDefinition { type = "regression", num_neurons = num_actions }); // options for the Temporal Difference learner that trains the above net // by backpropping the temporal difference learning rule. //var opt = new Options { method="sgd", learning_rate=0.01, l2_decay=0.001, momentum=0.9, batch_size=10, l1_decay=0.001 }; var opt = new Options { method = "adadelta", l2_decay = 0.001, batch_size = 10 }; var tdtrainer_options = new TrainingOptions(); tdtrainer_options.temporal_window = temporal_window; tdtrainer_options.experience_size = 3000; // size of experience replay memory tdtrainer_options.start_learn_threshold = 1000; // number of examples in experience replay memory before we begin learning tdtrainer_options.gamma = 1.0; // gamma is a crucial parameter that controls how much plan-ahead the agent does. In [0,1] tdtrainer_options.learning_steps_total = 15000; // number of steps we will learn for tdtrainer_options.learning_steps_burnin = 1000; // how many steps of the above to perform only random actions (in the beginning)? tdtrainer_options.epsilon_min = 0.01; // what epsilon value do we bottom out on? 0.0 => purely deterministic policy at end tdtrainer_options.epsilon_test_time = 0.00; // what epsilon to use at test time? (i.e. when learning is disabled) tdtrainer_options.layer_defs = layer_defs; tdtrainer_options.options = opt; var brain = new DeepQLearn(num_inputs, num_actions, tdtrainer_options); var agent = new QAgent21(brain); int accumulatedScore = 0; int accumulatedGameLenght = 0; int gamesInAccumulatedScore = 0; int batchSize = 5000; int total = 0; Stream bestAgentSerialized = new MemoryStream(); double bestBatchScore = double.MinValue; while (total < 50000) { GameState state = new GameState(); while (!state.IsFinished) { TurnOptions action = agent.Forward(state); //if (action == TurnOptions.FinishGame) //{ // Console.WriteLine($"finish at {state.Score}"); //} GameState newState = engine21.ApplyTurn(action, state); agent.Backward(newState); state = newState; accumulatedGameLenght++; } accumulatedScore += state.Score; gamesInAccumulatedScore++; total++; if (gamesInAccumulatedScore == batchSize) { double batchScore = accumulatedScore / (double)gamesInAccumulatedScore; Console.WriteLine($"{total} iterations. Error: {brain.visSelf()}. Length: {accumulatedGameLenght/(double)gamesInAccumulatedScore} Average score: {batchScore}"); accumulatedScore = 0; gamesInAccumulatedScore = 0; accumulatedGameLenght = 0; //if agent is good - save it if (batchScore > bestBatchScore) { bestBatchScore = batchScore; IFormatter formatter = new BinaryFormatter(); if (bestAgentSerialized != null) { bestAgentSerialized.Close(); bestAgentSerialized.Dispose(); } bestAgentSerialized = new MemoryStream(); formatter.Serialize(bestAgentSerialized, agent); } } } Console.WriteLine($"Best score: {bestBatchScore}"); Console.WriteLine("End"); //File.AppendAllText(AppDomain.CurrentDomain.BaseDirectory + "DNQ.trr", agent.AgentToJson()); IFormatter readFormatter = new BinaryFormatter(); bestAgentSerialized.Seek(0, SeekOrigin.Begin); var agentToReturn = (QAgent21)readFormatter.Deserialize(bestAgentSerialized); agentToReturn.Brain.learning = false; brain.learning = false; return(agentToReturn); }
public void Start() { if (this.backgroundWorker.IsBusy) { HistoryListener.Write("Trainer thread is busy!"); } else { HistoryListener.Write("Gathering information..."); TrainingOptions options = new TrainingOptions(); options.momentum = (double)numMomentum.Value; options.firstLearningRate = (double)numLearningRate.Value; options.limError = (double)numErrorLimit.Value; options.limEpoch = (int)numEpochLimit.Value; options.validateNetwork = cbValidate.Checked; options.secondLearningRate = cbChangeRate.Checked ? (double?)numChangeRate.Value : null; if (cbTrainingLayer.SelectedIndex == 0) options.TrainingVectors = this.NetworkDatabase.CreateVectors(NetworkSet.Training); else options.TrainingVectors = this.NetworkDatabase.CreateVectors(NetworkSet.Training, (ushort)cbTrainingLayer.SelectedIndex); options.ValidationVectors = this.NetworkDatabase.CreateVectors(NetworkSet.Validation); if (rbEpochLimit.Checked) options.TrainingType = TrainingType.ByEpoch; else if (rbErrorLimit.Checked) options.TrainingType = TrainingType.ByError; else if (rbManual.Checked) options.TrainingType = TrainingType.Manual; /* //foreach (Double[] inputs in options.TrainingVectors.Input) //{ String str = String.Empty; foreach (Double input in options.TrainingVectors.Input[0]) { str += input + " "; } MessageBox.Show(str); str = String.Empty; foreach (Double input in options.TrainingVectors.Output[0]) { str += input + " "; } MessageBox.Show(str); //} */ if (this.m_trainingPaused) { // Network is paused, then this.m_trainingPaused = false; } else { // Network is stopped, then // this.m_graphControl.ClearGraph(); } this.m_graphControl.TrimGraph(m_networkState.Epoch); if (this.cbSwitchGraph.Checked) this.m_graphControl.ShowTab(); if (this.TrainingStarted != null) this.TrainingStarted.Invoke(this, EventArgs.Empty); // Start timer this.timer.Start(); HistoryListener.Write("Starting thread"); this.backgroundWorker.RunWorkerAsync(options); } }
private void startLearn(bool delay) { if (qAgent == null) { var num_inputs = 27; // 9 eyes, each sees 3 numbers (wall, green, red thing proximity) var num_actions = 5; // 5 possible angles agent can turn var temporal_window = 4; // amount of temporal memory. 0 = agent lives in-the-moment :) var network_size = num_inputs * temporal_window + num_actions * temporal_window + num_inputs; var layer_defs = new List <LayerDefinition>(); // the value function network computes a value of taking any of the possible actions // given an input state. Here we specify one explicitly the hard way // but user could also equivalently instead use opt.hidden_layer_sizes = [20,20] // to just insert simple relu hidden layers. layer_defs.Add(new LayerDefinition { type = "input", out_sx = 1, out_sy = 1, out_depth = network_size }); layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); layer_defs.Add(new LayerDefinition { type = "regression", num_neurons = num_actions }); // options for the Temporal Difference learner that trains the above net // by backpropping the temporal difference learning rule. //var opt = new Options { method="sgd", learning_rate=0.01, l2_decay=0.001, momentum=0.9, batch_size=10, l1_decay=0.001 }; //var opt = new Options { method = "adadelta", l2_decay = 0.001, batch_size = 10 }; var opt = new Options { method = cboLearningMethod.Text, learning_rate = Double.Parse(txtLearningRate.Text), momentum = Double.Parse(txtLearningMomentum.Text), l1_decay = Double.Parse(txtLearningL1Decay.Text), l2_decay = Double.Parse(txtLearningL2Decay.Text), batch_size = Int32.Parse(txtLearningBatch.Text) }; var tdtrainer_options = new TrainingOptions(); tdtrainer_options.temporal_window = temporal_window; //tdtrainer_options.experience_size = 30000; tdtrainer_options.experience_size = experiencesize; //tdtrainer_options.start_learn_threshold = 1000; tdtrainer_options.start_learn_threshold = learnthreshold; tdtrainer_options.gamma = 0.7; //tdtrainer_options.learning_steps_total = 200000; tdtrainer_options.learning_steps_total = Int32.Parse(txtLearnTotal.Text); //tdtrainer_options.learning_steps_burnin = 3000; tdtrainer_options.learning_steps_burnin = Int32.Parse(txtLearnBurn.Text); tdtrainer_options.epsilon_min = 0.05; tdtrainer_options.epsilon_test_time = 0.00; tdtrainer_options.layer_defs = layer_defs; tdtrainer_options.options = opt; // determine when to use shared experience using static if (chkSharedExperience.Checked && staticExperience) { var brain = new DeepQLearnShared(num_inputs, num_actions, tdtrainer_options); brain.instance = this.instanceNumber; qAgent = new QAgent(brain, canvas.Width, canvas.Height, Int32.Parse(txtNumberItems.Text), chkRandomItems.Checked, chkObstructItems.Checked, infiniteItems); } // determine when to use shared experience using singleton else if (chkSharedExperience.Checked && !staticExperience) { var brain = new DeepQLearnSharedSingleton(num_inputs, num_actions, tdtrainer_options); brain.instance = this.instanceNumber; qAgent = new QAgent(brain, canvas.Width, canvas.Height, Int32.Parse(txtNumberItems.Text), chkRandomItems.Checked, chkObstructItems.Checked, infiniteItems); } // determine when to use nonshared experience else { var brain = new DeepQLearn(num_inputs, num_actions, tdtrainer_options); qAgent = new QAgent(brain, canvas.Width, canvas.Height, Int32.Parse(txtNumberItems.Text), chkRandomItems.Checked, chkObstructItems.Checked, infiniteItems); } } else { qAgent.startlearn(); } if (!delay) { qAgent.goveryfast(); interval = 0; } if (workerThread == null) { workerThread = new Thread(new ThreadStart(BackgroundThread)); workerThread.Start(); } }