예제 #1
0
        /// <summary>
        /// Creates the learner
        /// </summary>
        protected override ILearningAlgorithm <MouseState> CreateLearner()
        {
            double alpha       = 1;
            double gamma       = 0.1;
            int    stopDecayAt = (int)(0.9 * this.Environment.Config.MaxEpisodes);

            double epsilon = 0.4;

            var selectionPolicy = new EGreedy(
                epsilon,
                this.Environment.Config.Random,
                DecayHelpers.ConstantDecay(0, stopDecayAt, epsilon, 0));

            return(QLearning <MouseState> .New(
                       Enum.GetValues(typeof(MouseAction)).Length,
                       selectionPolicy,
                       alpha,
                       gamma,
                       this.Environment.Config.Random));

            //return Sarsa<MouseState>.New(
            //	Enum.GetValues(typeof(MouseAction)).Length,
            //	selectionPolicy,
            //	alpha,
            //	gamma,
            //	this.Environment.Config.Random);
        }
예제 #2
0
        /// <summary>
        /// Runs the example
        /// </summary>
        public static void Run()
        {
            var slotMachines = new List <SlotMachine>();

            slotMachines.Add(new SlotMachine(20, 120));
            slotMachines.Add(new SlotMachine(5, 100));
            slotMachines.Add(new SlotMachine(40, 150));
            slotMachines.Add(new SlotMachine(25, 130));
            slotMachines.Add(new SlotMachine(25, 120));
            slotMachines.Add(new SlotMachine(60, 120));

            var    random           = new Random(1337);
            int    trainingEpisodes = 10000;
            double decayRatio       = 0.4;

            var environment = new MultiArmedBanditEnvironment(new Configuration(trainingEpisodes, random), slotMachines);
            var agent       = new StatelessAgent <MultiArmedBanditEnvironment>(env =>
            {
                double alpha    = 0.05;
                double gamma    = 0.1;
                int stopDecayAt = (int)(decayRatio * env.Config.MaxEpisodes);

                double epsilon = 0.1;

                var selectionPolicy = new EGreedy(
                    epsilon,
                    env.Config.Random,
                    DecayHelpers.ConstantDecay(0, stopDecayAt, epsilon, 0));

                return(QLearning <EmptyState> .New(
                           slotMachines.Count,
                           selectionPolicy,
                           alpha,
                           gamma,
                           env.Config.Random));
            });

            environment.AddAgent(agent);
            environment.Initialize();

            for (int episode = 0; episode < environment.Config.MaxEpisodes; episode++)
            {
                environment.Reset(episode);
                environment.Update(episode);
            }

            Console.WriteLine(string.Format("Total reward: {0}", environment.TotalReward));
            Console.ReadLine();
        }