Exemplo n.º 1
0
        /// <summary>
        /// Generate an instance of <see cref="IImagesUploader"/> based on <paramref name="options"/>.
        /// </summary>
        /// <returns>Requested instance of the desired implementation of <see cref="IImagesUploader"/></returns>
        public static IImagesUploader GenerateImagesUploader(TrainingOptions options, ITrainingApi trainApi)
        {
            switch (options.ImageSource)
            {
            case "url":
                return(new UrlImagesUploader(trainApi, options.UploadBatchSize));

            case "local":
                return(new LocalImagesUploader(trainApi, options.UploadBatchSize));

            default:
                throw new InvalidOperationException($"Not supported source of images: {options.ImageSource}");
            }
        }
Exemplo n.º 2
0
 /// <summary>
 /// Initialize or completely reset your agent
 /// </summary>
 /// <param name="numberOfStates"></param>
 /// <param name="numberOfActions"></param>
 /// <param name="gamma"></param>
 public void Reset(int numberOfStates, int numberOfActions, double gamma = 0.75)
 {
     Options         = new TrainingOptions(gamma);
     NumberOfStates  = numberOfStates;
     NumberOfActions = numberOfActions;
     Policy          = new double[numberOfActions * numberOfStates];
     for (int s = 0; s < NumberOfStates; s++)
     {
         var poss = GetAllowedActions(s);
         for (int i = 0, n = poss.Length; i < n; i++)
         {
             Policy[poss[i] * NumberOfStates + s] = 1.0 / poss.Length;
         }
     }
 }
Exemplo n.º 3
0
 public DQNAgent(TrainingOptions options, int states, int actions)
 {
     Net             = new Network(4, "DQNAgent");
     NumberOfStates  = states;
     NumberOfActions = actions;
     Options         = options;
     Net.Matrices[0] = new Matrix(Options.HiddenUnits, NumberOfStates);
     Net.Matrices[1] = new Matrix(NumberOfActions, options.HiddenUnits);
     Net.Matrices[2] = new Matrix(options.HiddenUnits, 1);
     Net.Matrices[3] = new Matrix(NumberOfActions, 1);
     Util.FillMatrixWithRandomGaussianNumbers(Net.Matrices[0], 0, 0.01);
     Util.FillMatrixWithRandomGaussianNumbers(Net.Matrices[1], 0, 0.01);
     Util.FillMatrixWithRandomGaussianNumbers(Net.Matrices[2], 0, 0.01);
     Util.FillMatrixWithRandomGaussianNumbers(Net.Matrices[3], 0, 0.01);
     Experience = new List <Experience>();
 }
Exemplo n.º 4
0
 protected void Page_Load(object sender, EventArgs e)
 {
     if (!IsPostBack)
     {
         DataTable dropDownOptions = new DataTable();
         dropDownOptions.Columns.Add("id");
         DataRow row = dropDownOptions.NewRow();
         row["id"] = 1;
         dropDownOptions.Rows.Add(row);
         row       = dropDownOptions.NewRow();
         row["id"] = 2;
         dropDownOptions.Rows.Add(row);
         dropDownOptions.AcceptChanges();
         TrainingOptions.DataSource = dropDownOptions;
         TrainingOptions.Text       = "Please choose";
         TrainingOptions.TextField  = "ID";
         TrainingOptions.DataBindItems();
     }
 }
        public DeepQLearn(int num_states, int num_actions, TrainingOptions opt)
        {
            this.util = new Util();
            this.opt  = opt;

            // in number of time steps, of temporal memory
            // the ACTUAL input to the net will be (x,a) temporal_window times, and followed by current x
            // so to have no information from previous time step going into value function, set to 0.
            this.temporal_window = opt.temporal_window != int.MinValue ? opt.temporal_window : 1;
            // size of experience replay memory
            this.experience_size = opt.experience_size != int.MinValue ? opt.experience_size : 30000;
            // number of examples in experience replay memory before we begin learning
            this.start_learn_threshold = opt.start_learn_threshold != double.MinValue ? opt.start_learn_threshold : Math.Floor(Math.Min(this.experience_size * 0.1, 1000));
            // gamma is a crucial parameter that controls how much plan-ahead the agent does. In [0,1]
            this.gamma = opt.gamma != double.MinValue ? opt.gamma : 0.8;

            // number of steps we will learn for
            this.learning_steps_total = opt.learning_steps_total != int.MinValue ? opt.learning_steps_total : 100000;
            // how many steps of the above to perform only random actions (in the beginning)?
            this.learning_steps_burnin = opt.learning_steps_burnin != int.MinValue ? opt.learning_steps_burnin : 3000;
            // what epsilon value do we bottom out on? 0.0 => purely deterministic policy at end
            this.epsilon_min = opt.epsilon_min != double.MinValue ? opt.epsilon_min : 0.05;
            // what epsilon to use at test time? (i.e. when learning is disabled)
            this.epsilon_test_time = opt.epsilon_test_time != double.MinValue ? opt.epsilon_test_time : 0.00;

            // advanced feature. Sometimes a random action should be biased towards some values
            // for example in flappy bird, we may want to choose to not flap more often
            if (opt.random_action_distribution != null)
            {
                // this better sum to 1 by the way, and be of length this.num_actions
                this.random_action_distribution = opt.random_action_distribution;
                if (this.random_action_distribution.Count != num_actions)
                {
                    Console.WriteLine("TROUBLE. random_action_distribution should be same length as num_actions.");
                }

                var sum_of_dist = this.random_action_distribution.Sum();
                if (Math.Abs(sum_of_dist - 1.0) > 0.0001)
                {
                    Console.WriteLine("TROUBLE. random_action_distribution should sum to 1!");
                }
            }
            else
            {
                this.random_action_distribution = new List <double>();
            }

            // states that go into neural net to predict optimal action look as
            // x0,a0,x1,a1,x2,a2,...xt
            // this variable controls the size of that temporal window. Actions are
            // encoded as 1-of-k hot vectors
            this.net_inputs    = num_states * this.temporal_window + num_actions * this.temporal_window + num_states;
            this.num_states    = num_states;
            this.num_actions   = num_actions;
            this.window_size   = Math.Max(this.temporal_window, 2); // must be at least 2, but if we want more context even more
            this.state_window  = new List <Volume>();
            this.action_window = new List <int>();
            this.reward_window = new List <double>();
            this.net_window    = new List <double[]>();

            // Init wth dummy data
            for (int i = 0; i < window_size; i++)
            {
                this.state_window.Add(new Volume(1, 1, 1));
            }
            for (int i = 0; i < window_size; i++)
            {
                this.action_window.Add(0);
            }
            for (int i = 0; i < window_size; i++)
            {
                this.reward_window.Add(0.0);
            }
            for (int i = 0; i < window_size; i++)
            {
                this.net_window.Add(new double[] { 0.0 });
            }

            // create [state -> value of all possible actions] modeling net for the value function
            var layer_defs = new List <LayerDefinition>();

            if (opt.layer_defs != null)
            {
                // this is an advanced usage feature, because size of the input to the network, and number of
                // actions must check out. This is not very pretty Object Oriented programming but I can't see
                // a way out of it :(
                layer_defs = opt.layer_defs;
                if (layer_defs.Count < 2)
                {
                    Console.WriteLine("TROUBLE! must have at least 2 layers");
                }
                if (layer_defs[0].type != "input")
                {
                    Console.WriteLine("TROUBLE! first layer must be input layer!");
                }
                if (layer_defs[layer_defs.Count - 1].type != "regression")
                {
                    Console.WriteLine("TROUBLE! last layer must be input regression!");
                }
                if (layer_defs[0].out_depth * layer_defs[0].out_sx * layer_defs[0].out_sy != this.net_inputs)
                {
                    Console.WriteLine("TROUBLE! Number of inputs must be num_states * temporal_window + num_actions * temporal_window + num_states!");
                }
                if (layer_defs[layer_defs.Count - 1].num_neurons != this.num_actions)
                {
                    Console.WriteLine("TROUBLE! Number of regression neurons should be num_actions!");
                }
            }
            else
            {
                // create a very simple neural net by default
                layer_defs.Add(new LayerDefinition {
                    type = "input", out_sx = 1, out_sy = 1, out_depth = this.net_inputs
                });
                if (opt.hidden_layer_sizes != null)
                {
                    // allow user to specify this via the option, for convenience
                    var hl = opt.hidden_layer_sizes;
                    for (var k = 0; k < hl.Length; k++)
                    {
                        layer_defs.Add(new LayerDefinition {
                            type = "fc", num_neurons = hl[k], activation = "relu"
                        });                                                                                            // relu by default
                    }
                }
            }

            // Create the network
            this.value_net = new Net();
            this.value_net.makeLayers(layer_defs);

            // and finally we need a Temporal Difference Learning trainer!
            var options = new Options {
                learning_rate = 0.01, momentum = 0.0, batch_size = 64, l2_decay = 0.01
            };

            if (opt.options != null)
            {
                options = opt.options; // allow user to overwrite this
            }

            this.tdtrainer = new Trainer(this.value_net, options);

            // experience replay
            this.experience = new List <Experience>();
            //DeepQLearn.experienceShared = new List<ExperienceShared>(); // static list not threadsafe
            DeepQLearn.experienceShared = new ConcurrentDictionary <int, ExperienceShared>();

            // various housekeeping variables
            this.age            = 0;   // incremented every backward()
            this.forward_passes = 0;   // incremented every forward()
            this.epsilon        = 1.0; // controls exploration exploitation tradeoff. Should be annealed over time
            this.latest_reward  = 0;
            //this.last_input = [];
            this.average_reward_window = new TrainingWindow(1000, 10);
            this.average_loss_window   = new TrainingWindow(1000, 10);
            this.learning = true;
        }
        /// <summary>
        /// Read and upload images to <paramref name="project"/> based on <paramref name="options"/>
        /// </summary>
        public static async Task <CreateImageSummaryModel> ReadAndUploadImagesAsync(this ITrainingApi trainingApi,
                                                                                    Project project, TrainingOptions options, ICollection <string> allowedTagNames)
        {
            var images = ImagesLoaderGenerator.GenerateImagesLoader(options, allowedTagNames).LoadImages();

            return(await ImagesUploaderGenerator.GenerateImagesUploader(options, trainingApi).UploadImagesAsync(images, project.Id));
        }
Exemplo n.º 7
0
        public TrainingOptionsWindow(TrainingOptions options)
        {
            this.options = options;

            InitializeComponent();
        }
Exemplo n.º 8
0
        //---------------------------------------------


        #region Worker Thread
        private void backgroundWorker_DoWork(object sender, DoWorkEventArgs e)
        {
            if (!(e.Argument is TrainingOptions))
            {
                backgroundWorker.ReportProgress(0, "Bad thread argument!");
                e.Cancel = true;
                return;
            }

            TrainingOptions options = (TrainingOptions)e.Argument;


            //Create Teacher
            BackPropagationLearning networkTeacher = new BackPropagationLearning(m_networkContainer.ActivationNetwork);

            networkTeacher.LearningRate = options.firstLearningRate;
            networkTeacher.Momentum     = options.momentum;

            //Start Training
            bool stop            = false;
            int  lastStatusEpoch = 0;
            int  lastGraphEpoch  = 0;
            int  lastSaveEpoch   = 0;

            backgroundWorker.ReportProgress(0, "Training...");


            while (!stop)
            {
                #region Training Epoch
                this.m_networkState.ErrorTraining   = networkTeacher.RunEpoch(options.TrainingVectors.Input, options.TrainingVectors.Output) /* / options.TrainingVectors.Input.Length */;
                this.m_networkState.ErrorValidation = networkTeacher.MeasureEpochError(options.ValidationVectors.Input, options.ValidationVectors.Output) /* / options.ValidationVectors.Input.Length */;

                // Adjust Training Rate
                if (options.secondLearningRate.HasValue)
                {
                    networkTeacher.LearningRate = options.secondLearningRate.Value;
                }
                #endregion


                #region Mark Network Savepoint
                if (Properties.Settings.Default.training_Autosave == true &&
                    m_networkState.Epoch >= lastSaveEpoch + Properties.Settings.Default.training_AutosaveEpochs)
                {
                    backgroundWorker.ReportProgress(0, UpdateType.NetworkSave);
                    lastSaveEpoch = m_networkState.Epoch;
                }
                #endregion

                #region Graph Update
                if (Properties.Settings.Default.graph_Disable == false &&
                    m_networkState.Epoch >= lastGraphEpoch + Properties.Settings.Default.graph_UpdateRate)
                {
                    backgroundWorker.ReportProgress(0, UpdateType.Graph);
                    lastGraphEpoch = m_networkState.Epoch;
                }
                #endregion

                #region Statusbar Update
                if (Properties.Settings.Default.display_UpdateByTime == false &&
                    m_networkState.Epoch >= lastStatusEpoch + Properties.Settings.Default.display_UpdateRate)
                {
                    if (options.TrainingType == TrainingType.ByError)
                    {
                        if (m_networkState.ErrorTraining != 0)
                        {
                            m_networkState.Progress = Math.Max(Math.Min((int)((options.limError * 100) / m_networkState.ErrorTraining), 100), 0);
                        }
                    }
                    else
                    {
                        if (m_networkState.Epoch != 0)
                        {
                            m_networkState.Progress = Math.Max(Math.Min((int)((m_networkState.Epoch * 100) / options.limEpoch), 100), 0);
                        }
                    }

                    backgroundWorker.ReportProgress(0, UpdateType.Statusbar);
                    lastStatusEpoch = m_networkState.Epoch;
                }
                #endregion


                ++m_networkState.Epoch;

                // Sleep thread according to specified delay
                System.Threading.Thread.Sleep(Properties.Settings.Default.training_delay);

                #region Stop Conditions
                if (options.TrainingType == TrainingType.ByError)
                {
                    if (m_networkState.ErrorTraining <= options.limError)
                    {
                        stop = true;
                    }
                }
                else if (options.TrainingType == TrainingType.ByEpoch)
                {
                    if (m_networkState.Epoch >= options.limEpoch)
                    {
                        stop = true;
                    }
                }


                if (backgroundWorker.CancellationPending)
                {
                    e.Cancel = true;
                    stop     = true;
                }
                #endregion
            }

            backgroundWorker.ReportProgress(0);
        }
Exemplo n.º 9
0
        public void Start()
        {
            if (this.backgroundWorker.IsBusy)
            {
                HistoryListener.Write("Trainer thread is busy!");
            }
            else
            {
                HistoryListener.Write("Gathering information...");

                TrainingOptions options = new TrainingOptions();
                options.momentum           = (double)numMomentum.Value;
                options.firstLearningRate  = (double)numLearningRate.Value;
                options.limError           = (double)numErrorLimit.Value;
                options.limEpoch           = (int)numEpochLimit.Value;
                options.validateNetwork    = cbValidate.Checked;
                options.secondLearningRate = cbChangeRate.Checked ? (double?)numChangeRate.Value : null;

                if (cbTrainingLayer.SelectedIndex == 0)
                {
                    options.TrainingVectors = this.NetworkDatabase.CreateVectors(NetworkSet.Training);
                }
                else
                {
                    options.TrainingVectors = this.NetworkDatabase.CreateVectors(NetworkSet.Training, (ushort)cbTrainingLayer.SelectedIndex);
                }


                options.ValidationVectors = this.NetworkDatabase.CreateVectors(NetworkSet.Validation);

                if (rbEpochLimit.Checked)
                {
                    options.TrainingType = TrainingType.ByEpoch;
                }
                else if (rbErrorLimit.Checked)
                {
                    options.TrainingType = TrainingType.ByError;
                }
                else if (rbManual.Checked)
                {
                    options.TrainingType = TrainingType.Manual;
                }

                /* //foreach (Double[] inputs in options.TrainingVectors.Input)
                 * //{
                 *   String str = String.Empty;
                 *   foreach (Double input in options.TrainingVectors.Input[0])
                 *   {
                 *       str += input + " ";
                 *   }
                 *   MessageBox.Show(str);
                 *   str = String.Empty;
                 *   foreach (Double input in options.TrainingVectors.Output[0])
                 *   {
                 *       str += input + " ";
                 *   }
                 *   MessageBox.Show(str);
                 * //}
                 */
                if (this.m_trainingPaused)
                {   // Network is paused, then
                    this.m_trainingPaused = false;
                }
                else
                {   // Network is stopped, then
                    //         this.m_graphControl.ClearGraph();
                }

                this.m_graphControl.TrimGraph(m_networkState.Epoch);

                if (this.cbSwitchGraph.Checked)
                {
                    this.m_graphControl.ShowTab();
                }

                if (this.TrainingStarted != null)
                {
                    this.TrainingStarted.Invoke(this, EventArgs.Empty);
                }

                // Start timer
                this.timer.Start();

                HistoryListener.Write("Starting thread");
                this.backgroundWorker.RunWorkerAsync(options);
            }
        }
Exemplo n.º 10
0
        static void Main(string[] args)  // b r o k e n
        {
            Console.ForegroundColor = ConsoleColor.DarkMagenta;
            if (File.Exists(qAgentBrainPath))
            {
                using (FileStream fstream = new FileStream(qAgentBrainPath, FileMode.Open, FileAccess.Read, FileShare.Read)) {
                    qAgent = new BinaryFormatter().Deserialize(fstream) as QAgent;
                    qAgent.Reinitialize();
                }
                Console.WriteLine("QAgent loaded");
            }
            else
            {
                var num_inputs      = 6; // 9 eyes, each sees 3 numbers (wall, green, red thing proximity)
                var num_actions     = 3; // 5 possible angles agent can turn
                var temporal_window = 1; // amount of temporal memory. 0 = agent lives in-the-moment :)
                var network_size    = num_inputs * temporal_window + num_actions * temporal_window + num_inputs;


                // config brain
                var layer_defs = new List <LayerDefinition>();

                // the value function network computes a value of taking any of the possible actions
                // given an input state. Here we specify one explicitly the hard way
                // but user could also equivalently instead use opt.hidden_layer_sizes = [20,20]
                // to just insert simple relu hidden layers.
                layer_defs.Add(new LayerDefinition {
                    type = "input", out_sx = 1, out_sy = 1, out_depth = network_size
                });
                layer_defs.Add(new LayerDefinition {
                    type = "fc", num_neurons = 96, activation = "relu"
                });
                layer_defs.Add(new LayerDefinition {
                    type = "fc", num_neurons = 96, activation = "relu"
                });
                layer_defs.Add(new LayerDefinition {
                    type = "fc", num_neurons = 96, activation = "relu"
                });
                layer_defs.Add(new LayerDefinition {
                    type = "regression", num_neurons = num_actions
                });

                // options for the Temporal Difference learner that trains the above net
                // by backpropping the temporal difference learning rule.
                //var opt = new Options { method="sgd", learning_rate=0.01, l2_decay=0.001, momentum=0.9, batch_size=10, l1_decay=0.001 };
                Options opt = new Options {
                    method = "adadelta", l2_decay = 0.001, batch_size = 10
                };

                TrainingOptions tdtrainer_options = new TrainingOptions();
                tdtrainer_options.temporal_window       = temporal_window;
                tdtrainer_options.experience_size       = 30000;
                tdtrainer_options.start_learn_threshold = 1000;
                tdtrainer_options.gamma = 0.7;
                tdtrainer_options.learning_steps_total  = 200000;
                tdtrainer_options.learning_steps_burnin = 3000;
                tdtrainer_options.epsilon_min           = 0.05;
                tdtrainer_options.epsilon_test_time     = 0.00;
                tdtrainer_options.layer_defs            = layer_defs;
                tdtrainer_options.options = opt;

                DeepQLearn brain = new DeepQLearn(num_inputs, num_actions, tdtrainer_options);
                qAgent = new QAgent(brain);
            }
            qAgent.startlearn();
            new Thread(() => {
                while (true)
                {
                    if (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond % 31 /*arbitrary*/ == 0)
                    {
                        using (FileStream fstream = new FileStream(qAgentBrainPath, FileMode.Create, FileAccess.Write, FileShare.ReadWrite)) {
                            new BinaryFormatter().Serialize(fstream, qAgent);
                        }
                    }
                    qAgent.tick();
                }
            }).Start();
        }
Exemplo n.º 11
0
        static void Main(string[] args)
        {
            var    rnd = new Random();
            int    max = 10;
            int    min = 1;
            int    nextPrint = 0, act1 = 0, act0 = 0;
            double total = 0, correct = 0;
            var    state = new[] { rnd.Next(min, max), rnd.Next(min, max), rnd.Next(min, max), rnd.Next(min, max) };
            var    opt   = new TrainingOptions
            {
                Alpha              = 0.001,
                Epsilon            = 0,
                ErrorClamp         = 0.002,
                ExperienceAddEvery = 10,
                ExperienceSize     = 1000,
                ExperienceStart    = 0,
                HiddenUnits        = 5,
                LearningSteps      = 400
            };
            //we take 4 states i.e random numbers between 1 and 10
            //we have 2 actions 1 if average of set is >5 and 0 if otherwise
            //we reward agent with 1 for every correct and -1 otherwise
            var agent = new DQNAgent(opt, state.Length, 2);

            //how to properly use the DPAgent
            //var agent2= new MyDPAgent();
            //agent2.Reset(state.Length,2);

            while (total < 50000)
            {
                state = new[] { rnd.Next(min, max), rnd.Next(min, max), rnd.Next(min, max), rnd.Next(min, max) };
                var action = agent.Act(state);
                if (action == 1)
                {
                    act1++;
                }
                else
                {
                    act0++;
                }
                if (state.Average() > 5 && action == 1)
                {
                    agent.Learn(1);
                    correct++;
                }
                else if (state.Average() <= 5 && action == 0)
                {
                    agent.Learn(1);
                    correct++;
                }
                else
                {
                    agent.Learn(-1);
                }
                total++;
                //nextPrint++;
                if (total >= nextPrint)
                {
                    Console.WriteLine("Score: " + (correct / total).ToString("P") + "Epoch: " + nextPrint);
                    Console.WriteLine("Action 1: " + act1 + " Action 0: " + act0);
                    nextPrint += 1000;
                }
            }
            // Console.WriteLine("Score: " + (correct / total).ToString("P"));
            Console.WriteLine("End");
            File.AppendAllText(AppDomain.CurrentDomain.BaseDirectory + "DNQ.trr", agent.AgentToJson());
            Console.ReadKey();
        }
Exemplo n.º 12
0
        private void startLearning_Click(object sender, EventArgs e)
        {
            if (qAgent == null)
            {
                var num_inputs      = 27; // 9 eyes, each sees 3 numbers (wall, green, red thing proximity)
                var num_actions     = 5;  // 5 possible angles agent can turn
                var temporal_window = 4;  // amount of temporal memory. 0 = agent lives in-the-moment :)
                var network_size    = num_inputs * temporal_window + num_actions * temporal_window + num_inputs;

                var layer_defs = new List <LayerDefinition>();

                // the value function network computes a value of taking any of the possible actions
                // given an input state. Here we specify one explicitly the hard way
                // but user could also equivalently instead use opt.hidden_layer_sizes = [20,20]
                // to just insert simple relu hidden layers.
                layer_defs.Add(new LayerDefinition {
                    type = "input", out_sx = 1, out_sy = 1, out_depth = network_size
                });
                layer_defs.Add(new LayerDefinition {
                    type = "fc", num_neurons = 96, activation = "relu"
                });
                layer_defs.Add(new LayerDefinition {
                    type = "fc", num_neurons = 96, activation = "relu"
                });
                layer_defs.Add(new LayerDefinition {
                    type = "fc", num_neurons = 96, activation = "relu"
                });
                layer_defs.Add(new LayerDefinition {
                    type = "regression", num_neurons = num_actions
                });

                // options for the Temporal Difference learner that trains the above net
                // by backpropping the temporal difference learning rule.
                //var opt = new Options { method="sgd", learning_rate=0.01, l2_decay=0.001, momentum=0.9, batch_size=10, l1_decay=0.001 };
                var opt = new Options {
                    method = "adadelta", l2_decay = 0.001, batch_size = 10
                };

                var tdtrainer_options = new TrainingOptions();
                tdtrainer_options.temporal_window       = temporal_window;
                tdtrainer_options.experience_size       = 30000;
                tdtrainer_options.start_learn_threshold = 1000;
                tdtrainer_options.gamma = 0.7;
                tdtrainer_options.learning_steps_total  = 200000;
                tdtrainer_options.learning_steps_burnin = 3000;
                tdtrainer_options.epsilon_min           = 0.05;
                tdtrainer_options.epsilon_test_time     = 0.00;
                tdtrainer_options.layer_defs            = layer_defs;
                tdtrainer_options.options = opt;

                var brain = new DeepQLearn(num_inputs, num_actions, tdtrainer_options);
                qAgent = new QAgent(brain, canvas.Width, canvas.Height);
            }
            else
            {
                qAgent.startlearn();
            }

            if (workerThread == null)
            {
                workerThread = new Thread(new ThreadStart(BackgroundThread));
                workerThread.Start();
            }
        }
Exemplo n.º 13
0
 public DeepQLearnShared(int num_states, int num_actions, TrainingOptions opt) : base(num_states, num_actions, opt)
 {
 }
Exemplo n.º 14
0
        private static QAgent21 TrainAgent(GameEngine engine21)
        {
            var num_inputs      = 1; // current score
            var num_actions     = 2; // take a card or finish game
            var temporal_window = 0; // amount of temporal memory. 0 = agent lives in-the-moment :)
            var network_size    = num_inputs * temporal_window + num_actions * temporal_window + num_inputs;

            var layer_defs = new List <LayerDefinition>();

            // the value function network computes a value of taking any of the possible actions
            // given an input state. Here we specify one explicitly the hard way
            // but user could also equivalently instead use opt.hidden_layer_sizes = [20,20]
            // to just insert simple relu hidden layers.
            layer_defs.Add(new LayerDefinition {
                type = "input", out_sx = 1, out_sy = 1, out_depth = network_size
            });
            //layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 21, activation = "relu" });
            //layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" });
            //layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" });
            layer_defs.Add(new LayerDefinition {
                type = "regression", num_neurons = num_actions
            });

            // options for the Temporal Difference learner that trains the above net
            // by backpropping the temporal difference learning rule.
            //var opt = new Options { method="sgd", learning_rate=0.01, l2_decay=0.001, momentum=0.9, batch_size=10, l1_decay=0.001 };
            var opt = new Options {
                method = "adadelta", l2_decay = 0.001, batch_size = 10
            };

            var tdtrainer_options = new TrainingOptions();

            tdtrainer_options.temporal_window       = temporal_window;
            tdtrainer_options.experience_size       = 3000;  // size of experience replay memory
            tdtrainer_options.start_learn_threshold = 1000;  // number of examples in experience replay memory before we begin learning
            tdtrainer_options.gamma = 1.0;                   // gamma is a crucial parameter that controls how much plan-ahead the agent does. In [0,1]
            tdtrainer_options.learning_steps_total  = 15000; // number of steps we will learn for
            tdtrainer_options.learning_steps_burnin = 1000;  // how many steps of the above to perform only random actions (in the beginning)?
            tdtrainer_options.epsilon_min           = 0.01;  // what epsilon value do we bottom out on? 0.0 => purely deterministic policy at end
            tdtrainer_options.epsilon_test_time     = 0.00;  // what epsilon to use at test time? (i.e. when learning is disabled)
            tdtrainer_options.layer_defs            = layer_defs;
            tdtrainer_options.options = opt;

            var brain = new DeepQLearn(num_inputs, num_actions, tdtrainer_options);
            var agent = new QAgent21(brain);

            int    accumulatedScore        = 0;
            int    accumulatedGameLenght   = 0;
            int    gamesInAccumulatedScore = 0;
            int    batchSize           = 5000;
            int    total               = 0;
            Stream bestAgentSerialized = new MemoryStream();
            double bestBatchScore      = double.MinValue;

            while (total < 50000)
            {
                GameState state = new GameState();

                while (!state.IsFinished)
                {
                    TurnOptions action = agent.Forward(state);
                    //if (action == TurnOptions.FinishGame)
                    //{
                    //    Console.WriteLine($"finish at {state.Score}");
                    //}
                    GameState newState = engine21.ApplyTurn(action, state);

                    agent.Backward(newState);
                    state = newState;

                    accumulatedGameLenght++;
                }

                accumulatedScore += state.Score;
                gamesInAccumulatedScore++;

                total++;
                if (gamesInAccumulatedScore == batchSize)
                {
                    double batchScore = accumulatedScore / (double)gamesInAccumulatedScore;
                    Console.WriteLine($"{total} iterations. Error: {brain.visSelf()}. Length: {accumulatedGameLenght/(double)gamesInAccumulatedScore} Average score: {batchScore}");
                    accumulatedScore        = 0;
                    gamesInAccumulatedScore = 0;
                    accumulatedGameLenght   = 0;

                    //if agent is good - save it
                    if (batchScore > bestBatchScore)
                    {
                        bestBatchScore = batchScore;
                        IFormatter formatter = new BinaryFormatter();
                        if (bestAgentSerialized != null)
                        {
                            bestAgentSerialized.Close();
                            bestAgentSerialized.Dispose();
                        }
                        bestAgentSerialized = new MemoryStream();
                        formatter.Serialize(bestAgentSerialized, agent);
                    }
                }
            }
            Console.WriteLine($"Best score: {bestBatchScore}");
            Console.WriteLine("End");
            //File.AppendAllText(AppDomain.CurrentDomain.BaseDirectory + "DNQ.trr", agent.AgentToJson());

            IFormatter readFormatter = new BinaryFormatter();

            bestAgentSerialized.Seek(0, SeekOrigin.Begin);
            var agentToReturn = (QAgent21)readFormatter.Deserialize(bestAgentSerialized);

            agentToReturn.Brain.learning = false;

            brain.learning = false;
            return(agentToReturn);
        }
Exemplo n.º 15
0
        public void Start()
        {
            if (this.backgroundWorker.IsBusy)
            {
                HistoryListener.Write("Trainer thread is busy!");
            }
            else
            {
                HistoryListener.Write("Gathering information...");

                TrainingOptions options = new TrainingOptions();
                options.momentum = (double)numMomentum.Value;
                options.firstLearningRate = (double)numLearningRate.Value;
                options.limError = (double)numErrorLimit.Value;
                options.limEpoch = (int)numEpochLimit.Value;
                options.validateNetwork = cbValidate.Checked;
                options.secondLearningRate = cbChangeRate.Checked ? (double?)numChangeRate.Value : null;

                if (cbTrainingLayer.SelectedIndex == 0)
                    options.TrainingVectors = this.NetworkDatabase.CreateVectors(NetworkSet.Training);
                else
                    options.TrainingVectors = this.NetworkDatabase.CreateVectors(NetworkSet.Training, (ushort)cbTrainingLayer.SelectedIndex);

                options.ValidationVectors = this.NetworkDatabase.CreateVectors(NetworkSet.Validation);

                if (rbEpochLimit.Checked)
                options.TrainingType = TrainingType.ByEpoch;
                else if (rbErrorLimit.Checked)
                    options.TrainingType = TrainingType.ByError;
                else if (rbManual.Checked)
                    options.TrainingType = TrainingType.Manual;

               /* //foreach (Double[] inputs in options.TrainingVectors.Input)
                //{
                    String str = String.Empty;
                    foreach (Double input in options.TrainingVectors.Input[0])
                    {
                        str += input + " ";
                    }
                    MessageBox.Show(str);
                    str = String.Empty;
                    foreach (Double input in options.TrainingVectors.Output[0])
                    {
                        str += input + " ";
                    }
                    MessageBox.Show(str);
                //}
              */
                if (this.m_trainingPaused)
                {   // Network is paused, then
                    this.m_trainingPaused = false;
                }
                else
                {   // Network is stopped, then
               //         this.m_graphControl.ClearGraph();
                }

                this.m_graphControl.TrimGraph(m_networkState.Epoch);

                if (this.cbSwitchGraph.Checked)
                    this.m_graphControl.ShowTab();

                if (this.TrainingStarted != null)
                    this.TrainingStarted.Invoke(this, EventArgs.Empty);

                // Start timer
                this.timer.Start();

                HistoryListener.Write("Starting thread");
                this.backgroundWorker.RunWorkerAsync(options);
            }
        }
        private void startLearn(bool delay)
        {
            if (qAgent == null)
            {
                var num_inputs      = 27; // 9 eyes, each sees 3 numbers (wall, green, red thing proximity)
                var num_actions     = 5;  // 5 possible angles agent can turn
                var temporal_window = 4;  // amount of temporal memory. 0 = agent lives in-the-moment :)
                var network_size    = num_inputs * temporal_window + num_actions * temporal_window + num_inputs;

                var layer_defs = new List <LayerDefinition>();

                // the value function network computes a value of taking any of the possible actions
                // given an input state. Here we specify one explicitly the hard way
                // but user could also equivalently instead use opt.hidden_layer_sizes = [20,20]
                // to just insert simple relu hidden layers.
                layer_defs.Add(new LayerDefinition {
                    type = "input", out_sx = 1, out_sy = 1, out_depth = network_size
                });
                layer_defs.Add(new LayerDefinition {
                    type = "fc", num_neurons = 96, activation = "relu"
                });
                layer_defs.Add(new LayerDefinition {
                    type = "fc", num_neurons = 96, activation = "relu"
                });
                layer_defs.Add(new LayerDefinition {
                    type = "fc", num_neurons = 96, activation = "relu"
                });
                layer_defs.Add(new LayerDefinition {
                    type = "regression", num_neurons = num_actions
                });

                // options for the Temporal Difference learner that trains the above net
                // by backpropping the temporal difference learning rule.
                //var opt = new Options { method="sgd", learning_rate=0.01, l2_decay=0.001, momentum=0.9, batch_size=10, l1_decay=0.001 };
                //var opt = new Options { method = "adadelta", l2_decay = 0.001, batch_size = 10 };
                var opt = new Options {
                    method = cboLearningMethod.Text, learning_rate = Double.Parse(txtLearningRate.Text), momentum = Double.Parse(txtLearningMomentum.Text), l1_decay = Double.Parse(txtLearningL1Decay.Text), l2_decay = Double.Parse(txtLearningL2Decay.Text), batch_size = Int32.Parse(txtLearningBatch.Text)
                };

                var tdtrainer_options = new TrainingOptions();
                tdtrainer_options.temporal_window = temporal_window;
                //tdtrainer_options.experience_size = 30000;
                tdtrainer_options.experience_size = experiencesize;
                //tdtrainer_options.start_learn_threshold = 1000;
                tdtrainer_options.start_learn_threshold = learnthreshold;
                tdtrainer_options.gamma = 0.7;
                //tdtrainer_options.learning_steps_total = 200000;
                tdtrainer_options.learning_steps_total = Int32.Parse(txtLearnTotal.Text);
                //tdtrainer_options.learning_steps_burnin = 3000;
                tdtrainer_options.learning_steps_burnin = Int32.Parse(txtLearnBurn.Text);
                tdtrainer_options.epsilon_min           = 0.05;
                tdtrainer_options.epsilon_test_time     = 0.00;
                tdtrainer_options.layer_defs            = layer_defs;
                tdtrainer_options.options = opt;

                // determine when to use shared experience using static
                if (chkSharedExperience.Checked && staticExperience)
                {
                    var brain = new DeepQLearnShared(num_inputs, num_actions, tdtrainer_options);
                    brain.instance = this.instanceNumber;
                    qAgent         = new QAgent(brain, canvas.Width, canvas.Height, Int32.Parse(txtNumberItems.Text), chkRandomItems.Checked, chkObstructItems.Checked, infiniteItems);
                }
                // determine when to use shared experience using singleton
                else if (chkSharedExperience.Checked && !staticExperience)
                {
                    var brain = new DeepQLearnSharedSingleton(num_inputs, num_actions, tdtrainer_options);
                    brain.instance = this.instanceNumber;
                    qAgent         = new QAgent(brain, canvas.Width, canvas.Height, Int32.Parse(txtNumberItems.Text), chkRandomItems.Checked, chkObstructItems.Checked, infiniteItems);
                }
                // determine when to use nonshared experience
                else
                {
                    var brain = new DeepQLearn(num_inputs, num_actions, tdtrainer_options);
                    qAgent = new QAgent(brain, canvas.Width, canvas.Height, Int32.Parse(txtNumberItems.Text), chkRandomItems.Checked, chkObstructItems.Checked, infiniteItems);
                }
            }
            else
            {
                qAgent.startlearn();
            }

            if (!delay)
            {
                qAgent.goveryfast();
                interval = 0;
            }

            if (workerThread == null)
            {
                workerThread = new Thread(new ThreadStart(BackgroundThread));
                workerThread.Start();
            }
        }