예제 #1
0
        public void Main()
        {
            const int gameSize = 3;
            var       game     = new Game(gameSize);
            var       q_table  = Matematica.ArrayJagged_Ini <double>(gameSize * gameSize, 4);

            int num_episodes          = 10000;
            int max_steps_per_episode = 100;

            double learning_rate = 0.1;
            double discount_rate = 0.99;

            double exploration_rate       = 1;
            double max_exploration_rate   = 1;
            double min_exploration_rate   = 0.01;
            double exploration_decay_rate = 0.01;

            var rewards_all_episodes = new List <int>();

            //inside vars
            for (int i = 0; i < num_episodes; i++)
            {
                var state = game.GameReset();
                int rewards_current_episode = 0;

                for (int e = 0; e < max_steps_per_episode; e++)
                {
                    // Exploration-exploitation trade-off
                    double exploration_rate_threshold = Matematica.rng.NextDouble();

                    int action = Matematica.ArgMax(q_table[state]);

                    if (exploration_rate_threshold <= exploration_rate)
                    {
                        action = Explore_action(action);
                    }

                    //(new_state, reward, done, info) = env.step(action);
                    var(new_state, reward, done) = game.Step(action);

                    q_table[state][action] = Matematica.Q_Table(q_table[state][action], q_table[new_state], learning_rate, reward, discount_rate);

                    state = new_state;
                    rewards_current_episode += reward;

                    if (done)
                    {
                        break;
                    }
                }

                exploration_rate = Matematica.Exploration(min_exploration_rate, max_exploration_rate, exploration_decay_rate, i);

                rewards_all_episodes.Add(rewards_current_episode);
            }

            Average_Reward_Print(rewards_all_episodes, num_episodes);
            Console.WriteLine(q_table[2][1]);
        }
예제 #2
0
        public (int new_state, int reward, bool done) Step(int action)
        {
            MoveByIndex(action);
            int new_state = Matematica.Array2D_GetFlatIndex(game_matriz, pos_y, pos_x);

            var reward = rewards[pos_x, pos_y];

            totalReward += reward;

            var done = false;

            if (rewards_to_win <= totalReward)
            {
                done = true;
            }

            return(new_state, reward, done);
        }
예제 #3
0
 public int GameReset()
 {
     ReSet(border);
     return(Matematica.Array2D_GetFlatIndex(game_matriz, pos_y, pos_x));
 }