public MC_AIXI_CTW(AIXIEnvironment env, Dictionary<string, string> options) : base(env, options) { Int32.TryParse(options["ct-depth"], out this.Depth); //pick what implementation of CTW tree to use if (options.ContainsKey("ctw-model") && options["ctw-model"] == "ctf") { this.ContextTree = new CTWContextTreeFast(this.Depth); } else if (!options.ContainsKey("ctw-model") || options["ctw-model"] == "ct") { this.ContextTree = new CTWContextTree(this.Depth); } else { throw new ArgumentException("unknown ctw-model in options"); } //this.context_tree = new CTWContextTree(this.depth); Int32.TryParse(options["agent-horizon"], out this.Horizon); Int32.TryParse(options["mc-simulations"], out this.McSimulations); this.Reset(); }
public MC_AIXI_CTW(AIXIEnvironment env, Dictionary <string, string> options) : base(env, options) { Int32.TryParse(options["ct-depth"], out this.Depth); //pick what implementation of CTW tree to use if (options.ContainsKey("ctw-model") && options["ctw-model"] == "ctf") { this.ContextTree = new CTWContextTreeFast(this.Depth); } else if (!options.ContainsKey("ctw-model") || options["ctw-model"] == "ct") { this.ContextTree = new CTWContextTree(this.Depth); } else { throw new ArgumentException("unknown ctw-model in options"); } //this.context_tree = new CTWContextTree(this.depth); Int32.TryParse(options["agent-horizon"], out this.Horizon); Int32.TryParse(options["mc-simulations"], out this.McSimulations); this.Reset(); }
public Agent(AIXIEnvironment env, Dictionary<string, string> options) { this.Environment = env; this.Options = options; this.LastUpdate = ActionUpdate; if (options.ContainsKey("learning-period")) { Int32.TryParse(options["learning-period"], out this.LearningPeriod); } else { this.LearningPeriod = 0; } }
public Agent(AIXIEnvironment env, Dictionary <string, string> options) { this.Environment = env; this.Options = options; this.LastUpdate = ActionUpdate; if (options.ContainsKey("learning-period")) { Int32.TryParse(options["learning-period"], out this.LearningPeriod); } else { this.LearningPeriod = 0; } }
public RandomAgent(AIXIEnvironment env, Dictionary <string, string> options) : base(env, options) { this.Horizon = 0; }
//Interaction loop for interaction between agent and environment. //This part is done in BrainSimulator in other version // interaction begins with generating observation and reward from environment and giving it to agent // agent then generates action and cycle repeats. public static void InteractionLoop(Agent agent, AIXIEnvironment env, Dictionary<string, string> options) { Random rnd; if (options.ContainsKey("random-seed")) { int seed; int.TryParse(options["random-seed"], out seed); rnd = new Random(seed); } else { rnd = new Random(); } // Exploration = try random action // probability will decay exponentially as exploreRate * exploreDecay ** round_number var exploreRate = 0.0; if (options.ContainsKey("exploration")) { exploreRate = Utils.MyToDouble(options["exploration"]); } var explore = exploreRate > 0; var exploreDecay = 0.0; if (options.ContainsKey("explore-decay")) { exploreDecay = Utils.MyToDouble(options["explore-decay"]); } Debug.Assert(0.0 <= exploreRate); Debug.Assert(0.0 <= exploreDecay && exploreDecay <= 1.0); //automatic halting after certain number of rounds var terminateAge = 0; if (options.ContainsKey("terminate-age")) { terminateAge = Convert.ToInt32(options["terminate-age"]); } var terminateCheck = terminateAge > 0; Debug.Assert(0 <= terminateAge); // when learning period passes, agent will stop changing/improving model and just use it. var learningPeriod = 0; if (options.ContainsKey("learning-period")) { learningPeriod = Convert.ToInt32(options["learning-period"]); } Debug.Assert(0 <= learningPeriod); var cycle = 0; while (!env.IsFinished) { if (terminateCheck && agent.Age > terminateAge) { break; } var cycleStartTime = DateTime.Now; var observation = env.Observation; var reward = env.Reward; if (learningPeriod > 0 && cycle > learningPeriod) { explore = false; } //give observation and reward to agent. agent.ModelUpdatePercept(observation, reward); var explored = false; int action; if (explore && rnd.NextDouble() < exploreRate) { explored = true; action = agent.GenerateRandomAction(); } else { //get agents response to observation and reward action = agent.Search(); } //pass agent's action to environment env.PerformAction(action); agent.ModelUpdateAction(action); var timeTaken = DateTime.Now - cycleStartTime; Console.WriteLine("{0}:\t{1},{2},{3}\t{4},{5} \t{6},{7}\t>{8},{9}", cycle, observation, reward, action, explored, exploreRate, agent.TotalReward, agent.AverageReward(), timeTaken, agent.ModelSize() ); if (explore) { exploreRate *= exploreDecay; } cycle += 1; } }
//Interaction loop for interaction between agent and environment. //This part is done in BrainSimulator in other version // interaction begins with generating observation and reward from environment and giving it to agent // agent then generates action and cycle repeats. public static void InteractionLoop(Agent agent, AIXIEnvironment env, Dictionary <string, string> options) { Random rnd; if (options.ContainsKey("random-seed")) { int seed; int.TryParse(options["random-seed"], out seed); rnd = new Random(seed); } else { rnd = new Random(); } // Exploration = try random action // probability will decay exponentially as exploreRate * exploreDecay ** round_number var exploreRate = 0.0; if (options.ContainsKey("exploration")) { exploreRate = Utils.MyToDouble(options["exploration"]); } var explore = exploreRate > 0; var exploreDecay = 0.0; if (options.ContainsKey("explore-decay")) { exploreDecay = Utils.MyToDouble(options["explore-decay"]); } Debug.Assert(0.0 <= exploreRate); Debug.Assert(0.0 <= exploreDecay && exploreDecay <= 1.0); //automatic halting after certain number of rounds var terminateAge = 0; if (options.ContainsKey("terminate-age")) { terminateAge = Convert.ToInt32(options["terminate-age"]); } var terminateCheck = terminateAge > 0; Debug.Assert(0 <= terminateAge); // when learning period passes, agent will stop changing/improving model and just use it. var learningPeriod = 0; if (options.ContainsKey("learning-period")) { learningPeriod = Convert.ToInt32(options["learning-period"]); } Debug.Assert(0 <= learningPeriod); var cycle = 0; while (!env.IsFinished) { if (terminateCheck && agent.Age > terminateAge) { break; } var cycleStartTime = DateTime.Now; var observation = env.Observation; var reward = env.Reward; if (learningPeriod > 0 && cycle > learningPeriod) { explore = false; } //give observation and reward to agent. agent.ModelUpdatePercept(observation, reward); var explored = false; int action; if (explore && rnd.NextDouble() < exploreRate) { explored = true; action = agent.GenerateRandomAction(); } else { //get agents response to observation and reward action = agent.Search(); } //pass agent's action to environment env.PerformAction(action); agent.ModelUpdateAction(action); var timeTaken = DateTime.Now - cycleStartTime; Console.WriteLine("{0}:\t{1},{2},{3}\t{4},{5} \t{6},{7}\t>{8},{9}", cycle, observation, reward, action, explored, exploreRate, agent.TotalReward, agent.AverageReward(), timeTaken, agent.ModelSize() ); if (explore) { exploreRate *= exploreDecay; } cycle += 1; } }
public RandomAgent(AIXIEnvironment env, Dictionary<string, string> options) : base(env, options) { this.Horizon = 0; }