コード例 #1
0
ファイル: CartPole.cs プロジェクト: uzbekdev1/DeepRL
        static void Main(string[] args)
        {
            Env env = new CartPoleEnv();
            //https://github.com/rlcode/reinforcement-learning/blob/master/2-cartpole/1-dqn/cartpole_dqn.py

            var valueFunc = new DQN(env.ObservationSpace.Shape, env.ActionSpace.NumberOfValues(), new[] { 24, 24 }, 0.001f, 0.99f, 32, new ExperienceReplay(2000))
            {
                TargetModelUpdateInterval = 1
            };

            Agent agent = new AgentQL("dqn_cartpole", env, valueFunc)
            {
                Verbose          = true,
                RewardOnDone     = -100,
                EpsilonDecayMode = EEpsilonDecayMode.EveryStep,
                EpsilonDecay     = 0.999f,
                WarmupSteps      = 1000
            };

            agent.Train(300, 500);
            Console.WriteLine($"Average reward {agent.Test(50, 300, 0)}");

            //while (!env.Step((int)env.ActionSpace.Sample()[0], out var nextState, out var reward))
            //{
            //    env.Render();
            //    Thread.Sleep(50);
            //}

            return;
        }
コード例 #2
0
ファイル: LunarLander.cs プロジェクト: uzbekdev1/DeepRL
        static void Main(string[] args)
        {
            Env env = new LunarLanderEnv();

            var memory = new PriorityExperienceReplay(100000);

            var qFunc = new DQN(env.ObservationSpace.Shape, env.ActionSpace.NumberOfValues(), new[] { 256, 128 }, 0.0001f, 0.999f, 32, memory)
            {
                MemoryInterval            = 1,
                EnableDoubleDQN           = true,
                TargetModelUpdateInterval = 4000,
                TrainingEpochs            = 1
            };

            Agent agent = new AgentQL("dqn_lunarlander", env, qFunc)
            {
                WarmupSteps         = 5000,
                MaxEpsilon          = 1.0f,
                MinEpsilon          = 0.01f,
                EpsilonDecay        = 0.995f,
                TrainInterval       = 1,
                RewardClipping      = false,
                TrainRenderInterval = 10,
                Verbose             = true,
                RenderFreq          = 80,
            };

            agent.Train(1500, 1500);
            //agent.Load($"{agent.Name}_1500");
            agent.Test(100, 400, 2);
        }
コード例 #3
0
        static void Main(string[] args)
        {
            Env env = new AtariEnv("../../../roms/pong.bin", true);

            var memory = new PriorityExperienceReplay(100000);

            var qFunc = new DQNConv(new [] { 64, 64 }, env.ActionSpace.NumberOfValues(), 0.00025f, 0.999f, 32, memory)
            {
                MemoryInterval            = 1,
                EnableDoubleDQN           = true,
                TargetModelUpdateInterval = 4000,
                TrainingEpochs            = 1
            };

            Agent agent = new AgentQL("dqnconv_atari", env, qFunc)
            {
                WarmupSteps         = 1000,
                MaxEpsilon          = 1.0f,
                MinEpsilon          = 0.01f,
                EpsilonDecay        = 0.995f,
                TrainInterval       = 4,
                RewardClipping      = false,
                TrainRenderInterval = 1,
                Verbose             = true,
                RenderFreq          = 60,
            };

            agent.Train(1500, 1000);

            return;
        }
コード例 #4
0
        static void Main(string[] args)
        {
            Env env = new TaxiEnv();

            Agent agent = new AgentQL("qtable_taxi", env, new QTable(env.ObservationSpace.NumberOfValues(), env.ActionSpace.NumberOfValues(), 0.7f, 0.618f))
            {
                Verbose = true
            };

            agent.Train(50000, 100);
            Console.WriteLine($"Average reward {agent.Test(100, 100, 1)}");

            //while (!env.Step((int)env.ActionSpace.Sample()[0], out var nextState, out var reward))
            //    env.Render();

            return;
        }