public void TestRandomProbability() { var random = new Random(1337); var epsilon = 0.4; var eGreedy = new EGreedy(epsilon, random); var qValue = new QValue(new double[] { 121, 231, 425, 676, 812, 1012, 1231, 1301, 1412, 1541, 1701, 2015 }); var bestAction = PolicyHelpers.SelectMax(qValue, random); int numBestSelected = 0; int numTests = 3000; for (int i = 0; i < numTests; i++) { int action = eGreedy.Select(qValue); if (action == bestAction) { numBestSelected++; } } Assert.AreEqual((1 - epsilon) + epsilon * (1.0 / qValue.Count), numBestSelected / (double)numTests, 0.05); }
public void TestDecay() { var random = new Random(); var epsilon = 0.5; var eGreedy = new EGreedy(epsilon, random, DecayHelpers.ConstantDecay(1, 5, 0.5, 0.0)); var qValue = new QValue(new double[] { 121, 231, 425, 676, 812, 1012, 1231, 1301, 1412, 1541, 1701, 2015 }); var valueEpsilon = 0.00000000001; Assert.AreEqual(0.5, eGreedy.Epsilon, valueEpsilon); eGreedy.Update(1); Assert.AreEqual(0.4, eGreedy.Epsilon, valueEpsilon); eGreedy.Update(2); Assert.AreEqual(0.3, eGreedy.Epsilon, valueEpsilon); eGreedy.Update(3); Assert.AreEqual(0.2, eGreedy.Epsilon, valueEpsilon); eGreedy.Update(4); Assert.AreEqual(0.1, eGreedy.Epsilon, valueEpsilon); eGreedy.Update(5); Assert.AreEqual(0.0, eGreedy.Epsilon, valueEpsilon); eGreedy.Update(6); Assert.AreEqual(0.0, eGreedy.Epsilon, valueEpsilon); eGreedy.Update(7); Assert.AreEqual(0.0, eGreedy.Epsilon, valueEpsilon); }
/// <summary> /// Creates the learner /// </summary> protected override ILearningAlgorithm <BoardState> CreateLearner() { double alpha = 0.05; double gamma = 0.1; int stopDecayAt = (int)(0.4 * this.Environment.Config.MaxEpisodes); double epsilon = 0.4; var selectionPolicy = new EGreedy( epsilon, this.Environment.Config.Random, DecayHelpers.ConstantDecay(0, stopDecayAt, epsilon, 0)); //double tau = 200; //var selectionPolicy = new Softmax( // tau, // this.Environment.Config.Random, // DecayHelpers.ConstantDecay(0, stopDecayAt, tau, 0)); //return QLearning<BoardState>.New( // this.boardSize * this.boardSize, // selectionPolicy, // alpha, // gamma, // this.Environment.Config.Random); return(Sarsa <BoardState> .New( this.boardSize *this.boardSize, selectionPolicy, alpha, gamma, this.Environment.Config.Random)); }
/// <summary> /// Creates the learner /// </summary> protected override ILearningAlgorithm <MouseState> CreateLearner() { double alpha = 1; double gamma = 0.1; int stopDecayAt = (int)(0.9 * this.Environment.Config.MaxEpisodes); double epsilon = 0.4; var selectionPolicy = new EGreedy( epsilon, this.Environment.Config.Random, DecayHelpers.ConstantDecay(0, stopDecayAt, epsilon, 0)); return(QLearning <MouseState> .New( Enum.GetValues(typeof(MouseAction)).Length, selectionPolicy, alpha, gamma, this.Environment.Config.Random)); //return Sarsa<MouseState>.New( // Enum.GetValues(typeof(MouseAction)).Length, // selectionPolicy, // alpha, // gamma, // this.Environment.Config.Random); }
/// <summary> /// Runs the example /// </summary> public static void Run() { var slotMachines = new List <SlotMachine>(); slotMachines.Add(new SlotMachine(20, 120)); slotMachines.Add(new SlotMachine(5, 100)); slotMachines.Add(new SlotMachine(40, 150)); slotMachines.Add(new SlotMachine(25, 130)); slotMachines.Add(new SlotMachine(25, 120)); slotMachines.Add(new SlotMachine(60, 120)); var random = new Random(1337); int trainingEpisodes = 10000; double decayRatio = 0.4; var environment = new MultiArmedBanditEnvironment(new Configuration(trainingEpisodes, random), slotMachines); var agent = new StatelessAgent <MultiArmedBanditEnvironment>(env => { double alpha = 0.05; double gamma = 0.1; int stopDecayAt = (int)(decayRatio * env.Config.MaxEpisodes); double epsilon = 0.1; var selectionPolicy = new EGreedy( epsilon, env.Config.Random, DecayHelpers.ConstantDecay(0, stopDecayAt, epsilon, 0)); return(QLearning <EmptyState> .New( slotMachines.Count, selectionPolicy, alpha, gamma, env.Config.Random)); }); environment.AddAgent(agent); environment.Initialize(); for (int episode = 0; episode < environment.Config.MaxEpisodes; episode++) { environment.Reset(episode); environment.Update(episode); } Console.WriteLine(string.Format("Total reward: {0}", environment.TotalReward)); Console.ReadLine(); }