static void Main(string[] args) { Env env = new CartPoleEnv(); //https://github.com/rlcode/reinforcement-learning/blob/master/2-cartpole/1-dqn/cartpole_dqn.py var valueFunc = new DQN(env.ObservationSpace.Shape, env.ActionSpace.NumberOfValues(), new[] { 24, 24 }, 0.001f, 0.99f, 32, new ExperienceReplay(2000)) { TargetModelUpdateInterval = 1 }; Agent agent = new AgentQL("dqn_cartpole", env, valueFunc) { Verbose = true, RewardOnDone = -100, EpsilonDecayMode = EEpsilonDecayMode.EveryStep, EpsilonDecay = 0.999f, WarmupSteps = 1000 }; agent.Train(300, 500); Console.WriteLine($"Average reward {agent.Test(50, 300, 0)}"); //while (!env.Step((int)env.ActionSpace.Sample()[0], out var nextState, out var reward)) //{ // env.Render(); // Thread.Sleep(50); //} return; }
static void Main(string[] args) { Env env = new LunarLanderEnv(); var memory = new PriorityExperienceReplay(100000); var qFunc = new DQN(env.ObservationSpace.Shape, env.ActionSpace.NumberOfValues(), new[] { 256, 128 }, 0.0001f, 0.999f, 32, memory) { MemoryInterval = 1, EnableDoubleDQN = true, TargetModelUpdateInterval = 4000, TrainingEpochs = 1 }; Agent agent = new AgentQL("dqn_lunarlander", env, qFunc) { WarmupSteps = 5000, MaxEpsilon = 1.0f, MinEpsilon = 0.01f, EpsilonDecay = 0.995f, TrainInterval = 1, RewardClipping = false, TrainRenderInterval = 10, Verbose = true, RenderFreq = 80, }; agent.Train(1500, 1500); //agent.Load($"{agent.Name}_1500"); agent.Test(100, 400, 2); }
static void Main(string[] args) { Env env = new AtariEnv("../../../roms/pong.bin", true); var memory = new PriorityExperienceReplay(100000); var qFunc = new DQNConv(new [] { 64, 64 }, env.ActionSpace.NumberOfValues(), 0.00025f, 0.999f, 32, memory) { MemoryInterval = 1, EnableDoubleDQN = true, TargetModelUpdateInterval = 4000, TrainingEpochs = 1 }; Agent agent = new AgentQL("dqnconv_atari", env, qFunc) { WarmupSteps = 1000, MaxEpsilon = 1.0f, MinEpsilon = 0.01f, EpsilonDecay = 0.995f, TrainInterval = 4, RewardClipping = false, TrainRenderInterval = 1, Verbose = true, RenderFreq = 60, }; agent.Train(1500, 1000); return; }
static void Main(string[] args) { Env env = new TaxiEnv(); Agent agent = new AgentQL("qtable_taxi", env, new QTable(env.ObservationSpace.NumberOfValues(), env.ActionSpace.NumberOfValues(), 0.7f, 0.618f)) { Verbose = true }; agent.Train(50000, 100); Console.WriteLine($"Average reward {agent.Test(100, 100, 1)}"); //while (!env.Step((int)env.ActionSpace.Sample()[0], out var nextState, out var reward)) // env.Render(); return; }