C# (CSharp) AIXI Agent.ModelUpdatePercept Exemples

Langage de programmation: C# (CSharp)

Espace de nommage/Pack: AIXI

Class/Type: Agent

Méthode/Fonction: ModelUpdatePercept

Exemples au hotexamples.com: 2

C# (CSharp) AIXI Agent.ModelUpdatePercept - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de AIXI.Agent.ModelUpdatePercept extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

ModelUpdateAction(2)

AverageReward(1)

GeneratePerceptAndUpdate(1)

GenerateRandomAction(1)

MaximumReward(1)

ModelSize(1)

ModelUpdatePercept(1)

Playout(1)

Search(1)

Méthodes fréquemment utilisées

ModelUpdateAction (2)

AverageReward (1)

GeneratePerceptAndUpdate (1)

GenerateRandomAction (1)

MaximumReward (1)

ModelSize (1)

ModelUpdatePercept (1)

Playout (1)

Search (1)

ModelUpdatePercept() public abstract méthode

public abstract ModelUpdatePercept ( int observation, int reward ) : void

observation int

reward int

Résultat void

Agent Class Documentation

Exemple #1

0

Afficher le fichier

Fichier : Program.cs Projet : GoodAI/SummerCamp

//Interaction loop for interaction between agent and environment. //This part is done in BrainSimulator in other version // interaction begins with generating observation and reward from environment and giving it to agent // agent then generates action and cycle repeats. public static void InteractionLoop(Agent agent, AIXIEnvironment env, Dictionary<string, string> options) { Random rnd; if (options.ContainsKey("random-seed")) { int seed; int.TryParse(options["random-seed"], out seed); rnd = new Random(seed); } else { rnd = new Random(); } // Exploration = try random action // probability will decay exponentially as exploreRate * exploreDecay ** round_number var exploreRate = 0.0; if (options.ContainsKey("exploration")) { exploreRate = Utils.MyToDouble(options["exploration"]); } var explore = exploreRate > 0; var exploreDecay = 0.0; if (options.ContainsKey("explore-decay")) { exploreDecay = Utils.MyToDouble(options["explore-decay"]); } Debug.Assert(0.0 <= exploreRate); Debug.Assert(0.0 <= exploreDecay && exploreDecay <= 1.0); //automatic halting after certain number of rounds var terminateAge = 0; if (options.ContainsKey("terminate-age")) { terminateAge = Convert.ToInt32(options["terminate-age"]); } var terminateCheck = terminateAge > 0; Debug.Assert(0 <= terminateAge); // when learning period passes, agent will stop changing/improving model and just use it. var learningPeriod = 0; if (options.ContainsKey("learning-period")) { learningPeriod = Convert.ToInt32(options["learning-period"]); } Debug.Assert(0 <= learningPeriod); var cycle = 0; while (!env.IsFinished) { if (terminateCheck && agent.Age > terminateAge) { break; } var cycleStartTime = DateTime.Now; var observation = env.Observation; var reward = env.Reward; if (learningPeriod > 0 && cycle > learningPeriod) { explore = false; } //give observation and reward to agent. agent.ModelUpdatePercept(observation, reward); var explored = false; int action; if (explore && rnd.NextDouble() < exploreRate) { explored = true; action = agent.GenerateRandomAction(); } else { //get agents response to observation and reward action = agent.Search(); } //pass agent's action to environment env.PerformAction(action); agent.ModelUpdateAction(action); var timeTaken = DateTime.Now - cycleStartTime; Console.WriteLine("{0}:\t{1},{2},{3}\t{4},{5} \t{6},{7}\t>{8},{9}", cycle, observation, reward, action, explored, exploreRate, agent.TotalReward, agent.AverageReward(), timeTaken, agent.ModelSize() ); if (explore) { exploreRate *= exploreDecay; } cycle += 1; } }

Exemple #2

0

Afficher le fichier

Fichier : Program.cs Projet : ourobouros/SummerCamp

//Interaction loop for interaction between agent and environment. //This part is done in BrainSimulator in other version // interaction begins with generating observation and reward from environment and giving it to agent // agent then generates action and cycle repeats. public static void InteractionLoop(Agent agent, AIXIEnvironment env, Dictionary <string, string> options) { Random rnd; if (options.ContainsKey("random-seed")) { int seed; int.TryParse(options["random-seed"], out seed); rnd = new Random(seed); } else { rnd = new Random(); } // Exploration = try random action // probability will decay exponentially as exploreRate * exploreDecay ** round_number var exploreRate = 0.0; if (options.ContainsKey("exploration")) { exploreRate = Utils.MyToDouble(options["exploration"]); } var explore = exploreRate > 0; var exploreDecay = 0.0; if (options.ContainsKey("explore-decay")) { exploreDecay = Utils.MyToDouble(options["explore-decay"]); } Debug.Assert(0.0 <= exploreRate); Debug.Assert(0.0 <= exploreDecay && exploreDecay <= 1.0); //automatic halting after certain number of rounds var terminateAge = 0; if (options.ContainsKey("terminate-age")) { terminateAge = Convert.ToInt32(options["terminate-age"]); } var terminateCheck = terminateAge > 0; Debug.Assert(0 <= terminateAge); // when learning period passes, agent will stop changing/improving model and just use it. var learningPeriod = 0; if (options.ContainsKey("learning-period")) { learningPeriod = Convert.ToInt32(options["learning-period"]); } Debug.Assert(0 <= learningPeriod); var cycle = 0; while (!env.IsFinished) { if (terminateCheck && agent.Age > terminateAge) { break; } var cycleStartTime = DateTime.Now; var observation = env.Observation; var reward = env.Reward; if (learningPeriod > 0 && cycle > learningPeriod) { explore = false; } //give observation and reward to agent. agent.ModelUpdatePercept(observation, reward); var explored = false; int action; if (explore && rnd.NextDouble() < exploreRate) { explored = true; action = agent.GenerateRandomAction(); } else { //get agents response to observation and reward action = agent.Search(); } //pass agent's action to environment env.PerformAction(action); agent.ModelUpdateAction(action); var timeTaken = DateTime.Now - cycleStartTime; Console.WriteLine("{0}:\t{1},{2},{3}\t{4},{5} \t{6},{7}\t>{8},{9}", cycle, observation, reward, action, explored, exploreRate, agent.TotalReward, agent.AverageReward(), timeTaken, agent.ModelSize() ); if (explore) { exploreRate *= exploreDecay; } cycle += 1; } }

public abstract ModelUpdatePercept ( int observation, int reward ) : void
observation	int
reward	int
Résultat	void