Exemple #1
1
        public void TestRandomProbability()
        {
            var random  = new Random(1337);
            var epsilon = 0.4;
            var eGreedy = new EGreedy(epsilon, random);
            var qValue  = new QValue(new double[]
            {
                121, 231, 425, 676, 812, 1012, 1231, 1301, 1412, 1541, 1701, 2015
            });
            var bestAction = PolicyHelpers.SelectMax(qValue, random);

            int numBestSelected = 0;
            int numTests        = 3000;

            for (int i = 0; i < numTests; i++)
            {
                int action = eGreedy.Select(qValue);

                if (action == bestAction)
                {
                    numBestSelected++;
                }
            }

            Assert.AreEqual((1 - epsilon) + epsilon * (1.0 / qValue.Count), numBestSelected / (double)numTests, 0.05);
        }
Exemple #2
0
        public void TestDecay()
        {
            var random  = new Random();
            var epsilon = 0.5;
            var eGreedy = new EGreedy(epsilon, random, DecayHelpers.ConstantDecay(1, 5, 0.5, 0.0));
            var qValue  = new QValue(new double[]
            {
                121, 231, 425, 676, 812, 1012, 1231, 1301, 1412, 1541, 1701, 2015
            });

            var valueEpsilon = 0.00000000001;

            Assert.AreEqual(0.5, eGreedy.Epsilon, valueEpsilon);
            eGreedy.Update(1);

            Assert.AreEqual(0.4, eGreedy.Epsilon, valueEpsilon);
            eGreedy.Update(2);

            Assert.AreEqual(0.3, eGreedy.Epsilon, valueEpsilon);
            eGreedy.Update(3);

            Assert.AreEqual(0.2, eGreedy.Epsilon, valueEpsilon);
            eGreedy.Update(4);

            Assert.AreEqual(0.1, eGreedy.Epsilon, valueEpsilon);
            eGreedy.Update(5);

            Assert.AreEqual(0.0, eGreedy.Epsilon, valueEpsilon);
            eGreedy.Update(6);

            Assert.AreEqual(0.0, eGreedy.Epsilon, valueEpsilon);
            eGreedy.Update(7);

            Assert.AreEqual(0.0, eGreedy.Epsilon, valueEpsilon);
        }
Exemple #3
0
        /// <summary>
        /// Creates the learner
        /// </summary>
        protected override ILearningAlgorithm <BoardState> CreateLearner()
        {
            double alpha       = 0.05;
            double gamma       = 0.1;
            int    stopDecayAt = (int)(0.4 * this.Environment.Config.MaxEpisodes);

            double epsilon         = 0.4;
            var    selectionPolicy = new EGreedy(
                epsilon,
                this.Environment.Config.Random,
                DecayHelpers.ConstantDecay(0, stopDecayAt, epsilon, 0));

            //double tau = 200;
            //var selectionPolicy = new Softmax(
            //	tau,
            //	this.Environment.Config.Random,
            //	DecayHelpers.ConstantDecay(0, stopDecayAt, tau, 0));

            //return QLearning<BoardState>.New(
            //	this.boardSize * this.boardSize,
            //	selectionPolicy,
            //	alpha,
            //	gamma,
            //	this.Environment.Config.Random);
            return(Sarsa <BoardState> .New(
                       this.boardSize *this.boardSize,
                       selectionPolicy,
                       alpha,
                       gamma,
                       this.Environment.Config.Random));
        }
Exemple #4
0
        /// <summary>
        /// Creates the learner
        /// </summary>
        protected override ILearningAlgorithm <MouseState> CreateLearner()
        {
            double alpha       = 1;
            double gamma       = 0.1;
            int    stopDecayAt = (int)(0.9 * this.Environment.Config.MaxEpisodes);

            double epsilon = 0.4;

            var selectionPolicy = new EGreedy(
                epsilon,
                this.Environment.Config.Random,
                DecayHelpers.ConstantDecay(0, stopDecayAt, epsilon, 0));

            return(QLearning <MouseState> .New(
                       Enum.GetValues(typeof(MouseAction)).Length,
                       selectionPolicy,
                       alpha,
                       gamma,
                       this.Environment.Config.Random));

            //return Sarsa<MouseState>.New(
            //	Enum.GetValues(typeof(MouseAction)).Length,
            //	selectionPolicy,
            //	alpha,
            //	gamma,
            //	this.Environment.Config.Random);
        }
Exemple #5
0
        /// <summary>
        /// Runs the example
        /// </summary>
        public static void Run()
        {
            var slotMachines = new List <SlotMachine>();

            slotMachines.Add(new SlotMachine(20, 120));
            slotMachines.Add(new SlotMachine(5, 100));
            slotMachines.Add(new SlotMachine(40, 150));
            slotMachines.Add(new SlotMachine(25, 130));
            slotMachines.Add(new SlotMachine(25, 120));
            slotMachines.Add(new SlotMachine(60, 120));

            var    random           = new Random(1337);
            int    trainingEpisodes = 10000;
            double decayRatio       = 0.4;

            var environment = new MultiArmedBanditEnvironment(new Configuration(trainingEpisodes, random), slotMachines);
            var agent       = new StatelessAgent <MultiArmedBanditEnvironment>(env =>
            {
                double alpha    = 0.05;
                double gamma    = 0.1;
                int stopDecayAt = (int)(decayRatio * env.Config.MaxEpisodes);

                double epsilon = 0.1;

                var selectionPolicy = new EGreedy(
                    epsilon,
                    env.Config.Random,
                    DecayHelpers.ConstantDecay(0, stopDecayAt, epsilon, 0));

                return(QLearning <EmptyState> .New(
                           slotMachines.Count,
                           selectionPolicy,
                           alpha,
                           gamma,
                           env.Config.Random));
            });

            environment.AddAgent(agent);
            environment.Initialize();

            for (int episode = 0; episode < environment.Config.MaxEpisodes; episode++)
            {
                environment.Reset(episode);
                environment.Update(episode);
            }

            Console.WriteLine(string.Format("Total reward: {0}", environment.TotalReward));
            Console.ReadLine();
        }