ModelUpdateAction() public abstract method

public abstract ModelUpdateAction ( int action ) : void
action int
return void
コード例 #1
0
        public double Sample(Agent agent, int horizon)
        {
            double reward = 0.0;

            if (horizon == 0)
            {
                return((int)reward);
            }
            else if (this.Type == ChanceNode)
            {
                var percept      = agent.GeneratePerceptAndUpdate();
                int observation  = percept.Item1;
                int randomReward = percept.Item2;

                if (!this.Children.ContainsKey(observation))  //new node ->add it as decision node
                {
                    this.Children[observation] = new MonteCarloSearchNode(DecisionNode);
                }
                MonteCarloSearchNode observationChild = this.Children[observation];

                reward = randomReward + observationChild.Sample(agent, horizon - 1);
            }
            else if (this.Visits == 0) //unvisited decision node or we have exceeded maximum tree depth
            {
                reward = agent.Playout(horizon);
//                Console.WriteLine("from playout: reward ="+reward);
            }
            else   //Previously visited decision node

            {
                int actionNullable = this.SelectAction(agent);
                int action         = actionNullable;

                agent.ModelUpdateAction(action);

                if (!this.Children.ContainsKey(action))     //this action is new chance child
                {
                    this.Children[action] = new MonteCarloSearchNode(ChanceNode);
                }
                MonteCarloSearchNode actionChild = this.Children[action];

                reward = actionChild.Sample(agent, horizon);   //it is not clear if not horizon-1. (asks pyaixi)
            }

            double visitsDouble = this.Visits;

            //Console.WriteLine("> {3} - {0}, {1}, {2}", this.mean, reward, (reward + (visitsDouble * this.mean) / (visitsDouble + 1.0)), visitsDouble);
            this.Mean   = (reward + (visitsDouble * this.Mean)) / (1.0 + visitsDouble);
            this.Visits = this.Visits + 1;

            return(reward);
        }
コード例 #2
0
ファイル: Program.cs プロジェクト: GoodAI/SummerCamp
        //Interaction loop for interaction between agent and environment.
        //This part is done in BrainSimulator in other version
        // interaction begins with generating observation and reward from environment and giving it to agent
        // agent then generates action and cycle repeats.
        public static void InteractionLoop(Agent agent, AIXIEnvironment env, Dictionary<string, string> options)
        {
            Random rnd;
            if (options.ContainsKey("random-seed"))
            {
                int seed;
                int.TryParse(options["random-seed"], out seed);
                rnd = new Random(seed);
            }
            else
            {
                rnd = new Random();
            }

            // Exploration = try random action
            // probability will decay exponentially as exploreRate * exploreDecay ** round_number
            var exploreRate = 0.0;
            if (options.ContainsKey("exploration"))
            {
                exploreRate = Utils.MyToDouble(options["exploration"]);
            }
            var explore = exploreRate > 0;

            var exploreDecay = 0.0;
            if (options.ContainsKey("explore-decay"))
            {
                exploreDecay = Utils.MyToDouble(options["explore-decay"]);
            }

            Debug.Assert(0.0 <= exploreRate);
            Debug.Assert(0.0 <= exploreDecay && exploreDecay <= 1.0);

            //automatic halting after certain number of rounds
            var terminateAge = 0;
            if (options.ContainsKey("terminate-age"))
            {
                terminateAge = Convert.ToInt32(options["terminate-age"]);
            }
            var terminateCheck = terminateAge > 0;
            Debug.Assert(0 <= terminateAge);

            // when learning period passes, agent will stop changing/improving model and just use it.
            var learningPeriod = 0;
            if (options.ContainsKey("learning-period"))
            {
                learningPeriod = Convert.ToInt32(options["learning-period"]);
            }
            Debug.Assert(0 <= learningPeriod);

            var cycle = 0;
            while (!env.IsFinished)
            {

                if (terminateCheck && agent.Age > terminateAge)
                {
                    break;
                }
                var cycleStartTime = DateTime.Now;
                var observation = env.Observation;
                var reward = env.Reward;

                if (learningPeriod > 0 && cycle > learningPeriod)
                {
                    explore = false;
                }

                //give observation and reward to agent.
                agent.ModelUpdatePercept(observation, reward);

                var explored = false;
                int action;

                if (explore && rnd.NextDouble() < exploreRate)
                {
                    explored = true;
                    action = agent.GenerateRandomAction();
                }
                else
                {
                    //get agents response to observation and reward
                    action = agent.Search();
                }

                //pass agent's action to environment
                env.PerformAction(action);
                agent.ModelUpdateAction(action);

                var timeTaken = DateTime.Now - cycleStartTime;

                Console.WriteLine("{0}:\t{1},{2},{3}\t{4},{5}  \t{6},{7}\t>{8},{9}",
                    cycle, observation, reward, action,
                    explored, exploreRate,
                    agent.TotalReward, agent.AverageReward(),
                    timeTaken, agent.ModelSize()
                    );

                if (explore)
                {
                    exploreRate *= exploreDecay;
                }
                cycle += 1;
            }
        }
コード例 #3
0
ファイル: Program.cs プロジェクト: ourobouros/SummerCamp
        //Interaction loop for interaction between agent and environment.
        //This part is done in BrainSimulator in other version
        // interaction begins with generating observation and reward from environment and giving it to agent
        // agent then generates action and cycle repeats.
        public static void InteractionLoop(Agent agent, AIXIEnvironment env, Dictionary <string, string> options)
        {
            Random rnd;

            if (options.ContainsKey("random-seed"))
            {
                int seed;
                int.TryParse(options["random-seed"], out seed);
                rnd = new Random(seed);
            }
            else
            {
                rnd = new Random();
            }

            // Exploration = try random action
            // probability will decay exponentially as exploreRate * exploreDecay ** round_number
            var exploreRate = 0.0;

            if (options.ContainsKey("exploration"))
            {
                exploreRate = Utils.MyToDouble(options["exploration"]);
            }
            var explore = exploreRate > 0;

            var exploreDecay = 0.0;

            if (options.ContainsKey("explore-decay"))
            {
                exploreDecay = Utils.MyToDouble(options["explore-decay"]);
            }

            Debug.Assert(0.0 <= exploreRate);
            Debug.Assert(0.0 <= exploreDecay && exploreDecay <= 1.0);

            //automatic halting after certain number of rounds
            var terminateAge = 0;

            if (options.ContainsKey("terminate-age"))
            {
                terminateAge = Convert.ToInt32(options["terminate-age"]);
            }
            var terminateCheck = terminateAge > 0;

            Debug.Assert(0 <= terminateAge);

            // when learning period passes, agent will stop changing/improving model and just use it.
            var learningPeriod = 0;

            if (options.ContainsKey("learning-period"))
            {
                learningPeriod = Convert.ToInt32(options["learning-period"]);
            }
            Debug.Assert(0 <= learningPeriod);

            var cycle = 0;

            while (!env.IsFinished)
            {
                if (terminateCheck && agent.Age > terminateAge)
                {
                    break;
                }
                var cycleStartTime = DateTime.Now;
                var observation    = env.Observation;
                var reward         = env.Reward;

                if (learningPeriod > 0 && cycle > learningPeriod)
                {
                    explore = false;
                }

                //give observation and reward to agent.
                agent.ModelUpdatePercept(observation, reward);

                var explored = false;
                int action;

                if (explore && rnd.NextDouble() < exploreRate)
                {
                    explored = true;
                    action   = agent.GenerateRandomAction();
                }
                else
                {
                    //get agents response to observation and reward
                    action = agent.Search();
                }

                //pass agent's action to environment
                env.PerformAction(action);
                agent.ModelUpdateAction(action);

                var timeTaken = DateTime.Now - cycleStartTime;

                Console.WriteLine("{0}:\t{1},{2},{3}\t{4},{5}  \t{6},{7}\t>{8},{9}",
                                  cycle, observation, reward, action,
                                  explored, exploreRate,
                                  agent.TotalReward, agent.AverageReward(),
                                  timeTaken, agent.ModelSize()
                                  );


                if (explore)
                {
                    exploreRate *= exploreDecay;
                }
                cycle += 1;
            }
        }
コード例 #4
0
        public double Sample(Agent agent, int horizon)
        {
            double reward = 0.0;

            if (horizon == 0) {
                return (int)reward;
            }
            else if (this.Type == ChanceNode) {
                var percept = agent.GeneratePerceptAndUpdate();
                int observation = percept.Item1;
                int randomReward = percept.Item2;

                if (!this.Children.ContainsKey(observation)) {//new node ->add it as decision node
                    this.Children[observation] = new MonteCarloSearchNode(DecisionNode);
                }
                MonteCarloSearchNode observationChild = this.Children[observation];

                reward = randomReward + observationChild.Sample(agent, horizon-1);
            }
            else if (this.Visits == 0) //unvisited decision node or we have exceeded maximum tree depth
            {
                reward = agent.Playout(horizon);
            //                Console.WriteLine("from playout: reward ="+reward);
            }
            else { //Previously visited decision node

                int actionNullable = this.SelectAction(agent);
                int action = actionNullable;

                agent.ModelUpdateAction(action);

                if (!this.Children.ContainsKey(action)){    //this action is new chance child
                    this.Children[action]=new MonteCarloSearchNode(ChanceNode);
                }
                MonteCarloSearchNode actionChild = this.Children[action];

                reward = actionChild.Sample(agent, horizon);   //it is not clear if not horizon-1. (asks pyaixi)
            }

            double visitsDouble = this.Visits;
            //Console.WriteLine("> {3} - {0}, {1}, {2}", this.mean, reward, (reward + (visitsDouble * this.mean) / (visitsDouble + 1.0)), visitsDouble);
            this.Mean = (reward + (visitsDouble*this.Mean)) / (1.0 + visitsDouble);
            this.Visits = this.Visits+1;

            return reward;
        }