Ejemplo n.º 1
0
        static void Main(string[] args)
        {
            Env env = new LunarLanderEnv();

            var memory = new PriorityExperienceReplay(100000);

            var qFunc = new DQN(env.ObservationSpace.Shape, env.ActionSpace.NumberOfValues(), new[] { 256, 128 }, 0.0001f, 0.999f, 32, memory)
            {
                MemoryInterval            = 1,
                EnableDoubleDQN           = true,
                TargetModelUpdateInterval = 4000,
                TrainingEpochs            = 1
            };

            Agent agent = new AgentQL("dqn_lunarlander", env, qFunc)
            {
                WarmupSteps         = 5000,
                MaxEpsilon          = 1.0f,
                MinEpsilon          = 0.01f,
                EpsilonDecay        = 0.995f,
                TrainInterval       = 1,
                RewardClipping      = false,
                TrainRenderInterval = 10,
                Verbose             = true,
                RenderFreq          = 80,
            };

            agent.Train(1500, 1500);
            //agent.Load($"{agent.Name}_1500");
            agent.Test(100, 400, 2);
        }
Ejemplo n.º 2
0
        static void Main(string[] args)
        {
            Env env = new AtariEnv("../../../roms/pong.bin", true);

            var memory = new PriorityExperienceReplay(100000);

            var qFunc = new DQNConv(new [] { 64, 64 }, env.ActionSpace.NumberOfValues(), 0.00025f, 0.999f, 32, memory)
            {
                MemoryInterval            = 1,
                EnableDoubleDQN           = true,
                TargetModelUpdateInterval = 4000,
                TrainingEpochs            = 1
            };

            Agent agent = new AgentQL("dqnconv_atari", env, qFunc)
            {
                WarmupSteps         = 1000,
                MaxEpsilon          = 1.0f,
                MinEpsilon          = 0.01f,
                EpsilonDecay        = 0.995f,
                TrainInterval       = 4,
                RewardClipping      = false,
                TrainRenderInterval = 1,
                Verbose             = true,
                RenderFreq          = 60,
            };

            agent.Train(1500, 1000);

            return;
        }