public void TestDecay() { var random = new Random(); var epsilon = 0.5; var eGreedy = new EGreedy(epsilon, random, DecayHelpers.ConstantDecay(1, 5, 0.5, 0.0)); var qValue = new QValue(new double[] { 121, 231, 425, 676, 812, 1012, 1231, 1301, 1412, 1541, 1701, 2015 }); var valueEpsilon = 0.00000000001; Assert.AreEqual(0.5, eGreedy.Epsilon, valueEpsilon); eGreedy.Update(1); Assert.AreEqual(0.4, eGreedy.Epsilon, valueEpsilon); eGreedy.Update(2); Assert.AreEqual(0.3, eGreedy.Epsilon, valueEpsilon); eGreedy.Update(3); Assert.AreEqual(0.2, eGreedy.Epsilon, valueEpsilon); eGreedy.Update(4); Assert.AreEqual(0.1, eGreedy.Epsilon, valueEpsilon); eGreedy.Update(5); Assert.AreEqual(0.0, eGreedy.Epsilon, valueEpsilon); eGreedy.Update(6); Assert.AreEqual(0.0, eGreedy.Epsilon, valueEpsilon); eGreedy.Update(7); Assert.AreEqual(0.0, eGreedy.Epsilon, valueEpsilon); }
/// <summary> /// Creates the learner /// </summary> protected override ILearningAlgorithm <MouseState> CreateLearner() { double alpha = 1; double gamma = 0.1; int stopDecayAt = (int)(0.9 * this.Environment.Config.MaxEpisodes); double epsilon = 0.4; var selectionPolicy = new EGreedy( epsilon, this.Environment.Config.Random, DecayHelpers.ConstantDecay(0, stopDecayAt, epsilon, 0)); return(QLearning <MouseState> .New( Enum.GetValues(typeof(MouseAction)).Length, selectionPolicy, alpha, gamma, this.Environment.Config.Random)); //return Sarsa<MouseState>.New( // Enum.GetValues(typeof(MouseAction)).Length, // selectionPolicy, // alpha, // gamma, // this.Environment.Config.Random); }
public void TestConstantDecay() { var decay = DecayHelpers.ConstantDecay(1, 5, 0.5, 0.0); double value = 0.5; var epsilon = 0.00000000001; value = decay(value, 0); Assert.AreEqual(0.5, value, epsilon); value = decay(value, 1); Assert.AreEqual(0.4, value, epsilon); value = decay(value, 2); Assert.AreEqual(0.3, value, epsilon); value = decay(value, 3); Assert.AreEqual(0.2, value, epsilon); value = decay(value, 4); Assert.AreEqual(0.1, value, epsilon); value = decay(value, 5); Assert.AreEqual(0.0, value, epsilon); value = decay(value, 6); Assert.AreEqual(0.0, value, epsilon); value = decay(value, 7); Assert.AreEqual(0.0, value, epsilon); }
/// <summary> /// Creates the learner /// </summary> protected override ILearningAlgorithm <BoardState> CreateLearner() { double alpha = 0.05; double gamma = 0.1; int stopDecayAt = (int)(0.4 * this.Environment.Config.MaxEpisodes); double epsilon = 0.4; var selectionPolicy = new EGreedy( epsilon, this.Environment.Config.Random, DecayHelpers.ConstantDecay(0, stopDecayAt, epsilon, 0)); //double tau = 200; //var selectionPolicy = new Softmax( // tau, // this.Environment.Config.Random, // DecayHelpers.ConstantDecay(0, stopDecayAt, tau, 0)); //return QLearning<BoardState>.New( // this.boardSize * this.boardSize, // selectionPolicy, // alpha, // gamma, // this.Environment.Config.Random); return(Sarsa <BoardState> .New( this.boardSize *this.boardSize, selectionPolicy, alpha, gamma, this.Environment.Config.Random)); }
/// <summary> /// Runs the example /// </summary> public static void Run() { var slotMachines = new List <SlotMachine>(); slotMachines.Add(new SlotMachine(20, 120)); slotMachines.Add(new SlotMachine(5, 100)); slotMachines.Add(new SlotMachine(40, 150)); slotMachines.Add(new SlotMachine(25, 130)); slotMachines.Add(new SlotMachine(25, 120)); slotMachines.Add(new SlotMachine(60, 120)); var random = new Random(1337); int trainingEpisodes = 10000; double decayRatio = 0.4; var environment = new MultiArmedBanditEnvironment(new Configuration(trainingEpisodes, random), slotMachines); var agent = new StatelessAgent <MultiArmedBanditEnvironment>(env => { double alpha = 0.05; double gamma = 0.1; int stopDecayAt = (int)(decayRatio * env.Config.MaxEpisodes); double epsilon = 0.1; var selectionPolicy = new EGreedy( epsilon, env.Config.Random, DecayHelpers.ConstantDecay(0, stopDecayAt, epsilon, 0)); return(QLearning <EmptyState> .New( slotMachines.Count, selectionPolicy, alpha, gamma, env.Config.Random)); }); environment.AddAgent(agent); environment.Initialize(); for (int episode = 0; episode < environment.Config.MaxEpisodes; episode++) { environment.Reset(episode); environment.Update(episode); } Console.WriteLine(string.Format("Total reward: {0}", environment.TotalReward)); Console.ReadLine(); }