Exemple #1
0
        public MC_AIXI_CTW(AIXIEnvironment env, Dictionary<string, string> options)
            : base(env, options)
        {
            Int32.TryParse(options["ct-depth"], out this.Depth);

            //pick what implementation of CTW tree to use
            if (options.ContainsKey("ctw-model") && options["ctw-model"] == "ctf")
            {
                this.ContextTree = new CTWContextTreeFast(this.Depth);
            }
            else if (!options.ContainsKey("ctw-model") || options["ctw-model"] == "ct")
            {
                this.ContextTree = new CTWContextTree(this.Depth);
            }
            else {
                throw new ArgumentException("unknown ctw-model in options");
            }

            //this.context_tree = new CTWContextTree(this.depth);

            Int32.TryParse(options["agent-horizon"], out this.Horizon);

            Int32.TryParse(options["mc-simulations"], out this.McSimulations);

            this.Reset();
        }
Exemple #2
0
        public MC_AIXI_CTW(AIXIEnvironment env, Dictionary <string, string> options)
            : base(env, options)
        {
            Int32.TryParse(options["ct-depth"], out this.Depth);

            //pick what implementation of CTW tree to use
            if (options.ContainsKey("ctw-model") && options["ctw-model"] == "ctf")
            {
                this.ContextTree = new CTWContextTreeFast(this.Depth);
            }
            else if (!options.ContainsKey("ctw-model") || options["ctw-model"] == "ct")
            {
                this.ContextTree = new CTWContextTree(this.Depth);
            }
            else
            {
                throw new ArgumentException("unknown ctw-model in options");
            }

            //this.context_tree = new CTWContextTree(this.depth);


            Int32.TryParse(options["agent-horizon"], out this.Horizon);

            Int32.TryParse(options["mc-simulations"], out this.McSimulations);


            this.Reset();
        }
Exemple #3
0
        public Agent(AIXIEnvironment env, Dictionary<string, string> options)
        {
            this.Environment = env;
            this.Options = options;
            this.LastUpdate = ActionUpdate;

            if (options.ContainsKey("learning-period"))
            {
                Int32.TryParse(options["learning-period"], out this.LearningPeriod);
            }
            else {
                this.LearningPeriod = 0;
            }
        }
Exemple #4
0
        public Agent(AIXIEnvironment env, Dictionary <string, string> options)
        {
            this.Environment = env;
            this.Options     = options;
            this.LastUpdate  = ActionUpdate;

            if (options.ContainsKey("learning-period"))
            {
                Int32.TryParse(options["learning-period"], out this.LearningPeriod);
            }
            else
            {
                this.LearningPeriod = 0;
            }
        }
Exemple #5
0
 public RandomAgent(AIXIEnvironment env, Dictionary <string, string> options)
     : base(env, options)
 {
     this.Horizon = 0;
 }
Exemple #6
0
        //Interaction loop for interaction between agent and environment.
        //This part is done in BrainSimulator in other version
        // interaction begins with generating observation and reward from environment and giving it to agent
        // agent then generates action and cycle repeats.
        public static void InteractionLoop(Agent agent, AIXIEnvironment env, Dictionary<string, string> options)
        {
            Random rnd;
            if (options.ContainsKey("random-seed"))
            {
                int seed;
                int.TryParse(options["random-seed"], out seed);
                rnd = new Random(seed);
            }
            else
            {
                rnd = new Random();
            }

            // Exploration = try random action
            // probability will decay exponentially as exploreRate * exploreDecay ** round_number
            var exploreRate = 0.0;
            if (options.ContainsKey("exploration"))
            {
                exploreRate = Utils.MyToDouble(options["exploration"]);
            }
            var explore = exploreRate > 0;

            var exploreDecay = 0.0;
            if (options.ContainsKey("explore-decay"))
            {
                exploreDecay = Utils.MyToDouble(options["explore-decay"]);
            }

            Debug.Assert(0.0 <= exploreRate);
            Debug.Assert(0.0 <= exploreDecay && exploreDecay <= 1.0);

            //automatic halting after certain number of rounds
            var terminateAge = 0;
            if (options.ContainsKey("terminate-age"))
            {
                terminateAge = Convert.ToInt32(options["terminate-age"]);
            }
            var terminateCheck = terminateAge > 0;
            Debug.Assert(0 <= terminateAge);

            // when learning period passes, agent will stop changing/improving model and just use it.
            var learningPeriod = 0;
            if (options.ContainsKey("learning-period"))
            {
                learningPeriod = Convert.ToInt32(options["learning-period"]);
            }
            Debug.Assert(0 <= learningPeriod);

            var cycle = 0;
            while (!env.IsFinished)
            {

                if (terminateCheck && agent.Age > terminateAge)
                {
                    break;
                }
                var cycleStartTime = DateTime.Now;
                var observation = env.Observation;
                var reward = env.Reward;

                if (learningPeriod > 0 && cycle > learningPeriod)
                {
                    explore = false;
                }

                //give observation and reward to agent.
                agent.ModelUpdatePercept(observation, reward);

                var explored = false;
                int action;

                if (explore && rnd.NextDouble() < exploreRate)
                {
                    explored = true;
                    action = agent.GenerateRandomAction();
                }
                else
                {
                    //get agents response to observation and reward
                    action = agent.Search();
                }

                //pass agent's action to environment
                env.PerformAction(action);
                agent.ModelUpdateAction(action);

                var timeTaken = DateTime.Now - cycleStartTime;

                Console.WriteLine("{0}:\t{1},{2},{3}\t{4},{5}  \t{6},{7}\t>{8},{9}",
                    cycle, observation, reward, action,
                    explored, exploreRate,
                    agent.TotalReward, agent.AverageReward(),
                    timeTaken, agent.ModelSize()
                    );

                if (explore)
                {
                    exploreRate *= exploreDecay;
                }
                cycle += 1;
            }
        }
Exemple #7
0
        //Interaction loop for interaction between agent and environment.
        //This part is done in BrainSimulator in other version
        // interaction begins with generating observation and reward from environment and giving it to agent
        // agent then generates action and cycle repeats.
        public static void InteractionLoop(Agent agent, AIXIEnvironment env, Dictionary <string, string> options)
        {
            Random rnd;

            if (options.ContainsKey("random-seed"))
            {
                int seed;
                int.TryParse(options["random-seed"], out seed);
                rnd = new Random(seed);
            }
            else
            {
                rnd = new Random();
            }

            // Exploration = try random action
            // probability will decay exponentially as exploreRate * exploreDecay ** round_number
            var exploreRate = 0.0;

            if (options.ContainsKey("exploration"))
            {
                exploreRate = Utils.MyToDouble(options["exploration"]);
            }
            var explore = exploreRate > 0;

            var exploreDecay = 0.0;

            if (options.ContainsKey("explore-decay"))
            {
                exploreDecay = Utils.MyToDouble(options["explore-decay"]);
            }

            Debug.Assert(0.0 <= exploreRate);
            Debug.Assert(0.0 <= exploreDecay && exploreDecay <= 1.0);

            //automatic halting after certain number of rounds
            var terminateAge = 0;

            if (options.ContainsKey("terminate-age"))
            {
                terminateAge = Convert.ToInt32(options["terminate-age"]);
            }
            var terminateCheck = terminateAge > 0;

            Debug.Assert(0 <= terminateAge);

            // when learning period passes, agent will stop changing/improving model and just use it.
            var learningPeriod = 0;

            if (options.ContainsKey("learning-period"))
            {
                learningPeriod = Convert.ToInt32(options["learning-period"]);
            }
            Debug.Assert(0 <= learningPeriod);

            var cycle = 0;

            while (!env.IsFinished)
            {
                if (terminateCheck && agent.Age > terminateAge)
                {
                    break;
                }
                var cycleStartTime = DateTime.Now;
                var observation    = env.Observation;
                var reward         = env.Reward;

                if (learningPeriod > 0 && cycle > learningPeriod)
                {
                    explore = false;
                }

                //give observation and reward to agent.
                agent.ModelUpdatePercept(observation, reward);

                var explored = false;
                int action;

                if (explore && rnd.NextDouble() < exploreRate)
                {
                    explored = true;
                    action   = agent.GenerateRandomAction();
                }
                else
                {
                    //get agents response to observation and reward
                    action = agent.Search();
                }

                //pass agent's action to environment
                env.PerformAction(action);
                agent.ModelUpdateAction(action);

                var timeTaken = DateTime.Now - cycleStartTime;

                Console.WriteLine("{0}:\t{1},{2},{3}\t{4},{5}  \t{6},{7}\t>{8},{9}",
                                  cycle, observation, reward, action,
                                  explored, exploreRate,
                                  agent.TotalReward, agent.AverageReward(),
                                  timeTaken, agent.ModelSize()
                                  );


                if (explore)
                {
                    exploreRate *= exploreDecay;
                }
                cycle += 1;
            }
        }
Exemple #8
0
 public RandomAgent(AIXIEnvironment env, Dictionary<string, string> options)
     : base(env, options)
 {
     this.Horizon = 0;
 }