/// <summary>
        /// Executes the given action for the given agent
        /// </summary>
        /// <param name="agent">The agent</param>
        /// <param name="actionNumber">The index of the action</param>
        /// <param name="episode">The current episode number</param>
        public override void Execute(StatelessAgent <MultiArmedBanditEnvironment> agent, int actionNumber, int episode)
        {
            double reward = this.CalculateReward(actionNumber);

            this.TotalReward += reward;
            agent.Reward(reward, false, episode);
        }
Example #2
0
        /// <summary>
        /// Runs the example
        /// </summary>
        public static void Run()
        {
            var slotMachines = new List <SlotMachine>();

            slotMachines.Add(new SlotMachine(20, 120));
            slotMachines.Add(new SlotMachine(5, 100));
            slotMachines.Add(new SlotMachine(40, 150));
            slotMachines.Add(new SlotMachine(25, 130));
            slotMachines.Add(new SlotMachine(25, 120));
            slotMachines.Add(new SlotMachine(60, 120));

            var    random           = new Random(1337);
            int    trainingEpisodes = 10000;
            double decayRatio       = 0.4;

            var environment = new MultiArmedBanditEnvironment(new Configuration(trainingEpisodes, random), slotMachines);
            var agent       = new StatelessAgent <MultiArmedBanditEnvironment>(env =>
            {
                double alpha    = 0.05;
                double gamma    = 0.1;
                int stopDecayAt = (int)(decayRatio * env.Config.MaxEpisodes);

                double epsilon = 0.1;

                var selectionPolicy = new EGreedy(
                    epsilon,
                    env.Config.Random,
                    DecayHelpers.ConstantDecay(0, stopDecayAt, epsilon, 0));

                return(QLearning <EmptyState> .New(
                           slotMachines.Count,
                           selectionPolicy,
                           alpha,
                           gamma,
                           env.Config.Random));
            });

            environment.AddAgent(agent);
            environment.Initialize();

            for (int episode = 0; episode < environment.Config.MaxEpisodes; episode++)
            {
                environment.Reset(episode);
                environment.Update(episode);
            }

            Console.WriteLine(string.Format("Total reward: {0}", environment.TotalReward));
            Console.ReadLine();
        }
 //Not used
 public override EmptyState GetDefaultState(StatelessAgent <MultiArmedBanditEnvironment> agent)
 {
     throw new NotImplementedException();
 }