C# (CSharp) AIXI Agent Beispiele

Programmiersprache: C# (CSharp)

Namespace / Paketname: AIXI

Klasse / Typ: Agent

Beispiele auf hotexamples.com: 3

C# (CSharp) AIXI Agent - 3 Beispiele gefunden. Dies sind die am besten bewerteten C# (CSharp) Beispiele für die AIXI.Agent, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

ModelUpdateAction(2)

AverageReward(1)

GeneratePerceptAndUpdate(1)

GenerateRandomAction(1)

MaximumReward(1)

ModelSize(1)

ModelUpdatePercept(1)

Playout(1)

Search(1)

Agent Class Documentation

Beispiel #1

Datei anzeigen

Datei: MonteCarloSearchTree.cs Projekt: GoodAI/SummerCamp

        public double Sample(Agent agent, int horizon)
        {
            double reward = 0.0;

            if (horizon == 0) {
                return (int)reward;
            }
            else if (this.Type == ChanceNode) {
                var percept = agent.GeneratePerceptAndUpdate();
                int observation = percept.Item1;
                int randomReward = percept.Item2;

                if (!this.Children.ContainsKey(observation)) {//new node ->add it as decision node
                    this.Children[observation] = new MonteCarloSearchNode(DecisionNode);
                }
                MonteCarloSearchNode observationChild = this.Children[observation];

                reward = randomReward + observationChild.Sample(agent, horizon-1);
            }
            else if (this.Visits == 0) //unvisited decision node or we have exceeded maximum tree depth
            {
                reward = agent.Playout(horizon);
            //                Console.WriteLine("from playout: reward ="+reward);
            }
            else { //Previously visited decision node

                int actionNullable = this.SelectAction(agent);
                int action = actionNullable;

                agent.ModelUpdateAction(action);

                if (!this.Children.ContainsKey(action)){    //this action is new chance child
                    this.Children[action]=new MonteCarloSearchNode(ChanceNode);
                }
                MonteCarloSearchNode actionChild = this.Children[action];

                reward = actionChild.Sample(agent, horizon);   //it is not clear if not horizon-1. (asks pyaixi)
            }

            double visitsDouble = this.Visits;
            //Console.WriteLine("> {3} - {0}, {1}, {2}", this.mean, reward, (reward + (visitsDouble * this.mean) / (visitsDouble + 1.0)), visitsDouble);
            this.Mean = (reward + (visitsDouble*this.Mean)) / (1.0 + visitsDouble);
            this.Visits = this.Visits+1;

            return reward;
        }

Beispiel #2

Datei anzeigen

Datei: Program.cs Projekt: GoodAI/SummerCamp

        //Interaction loop for interaction between agent and environment.
        //This part is done in BrainSimulator in other version
        // interaction begins with generating observation and reward from environment and giving it to agent
        // agent then generates action and cycle repeats.
        public static void InteractionLoop(Agent agent, AIXIEnvironment env, Dictionary<string, string> options)
        {
            Random rnd;
            if (options.ContainsKey("random-seed"))
            {
                int seed;
                int.TryParse(options["random-seed"], out seed);
                rnd = new Random(seed);
            }
            else
            {
                rnd = new Random();
            }

            // Exploration = try random action
            // probability will decay exponentially as exploreRate * exploreDecay ** round_number
            var exploreRate = 0.0;
            if (options.ContainsKey("exploration"))
            {
                exploreRate = Utils.MyToDouble(options["exploration"]);
            }
            var explore = exploreRate > 0;

            var exploreDecay = 0.0;
            if (options.ContainsKey("explore-decay"))
            {
                exploreDecay = Utils.MyToDouble(options["explore-decay"]);
            }

            Debug.Assert(0.0 <= exploreRate);
            Debug.Assert(0.0 <= exploreDecay && exploreDecay <= 1.0);

            //automatic halting after certain number of rounds
            var terminateAge = 0;
            if (options.ContainsKey("terminate-age"))
            {
                terminateAge = Convert.ToInt32(options["terminate-age"]);
            }
            var terminateCheck = terminateAge > 0;
            Debug.Assert(0 <= terminateAge);

            // when learning period passes, agent will stop changing/improving model and just use it.
            var learningPeriod = 0;
            if (options.ContainsKey("learning-period"))
            {
                learningPeriod = Convert.ToInt32(options["learning-period"]);
            }
            Debug.Assert(0 <= learningPeriod);

            var cycle = 0;
            while (!env.IsFinished)
            {

                if (terminateCheck && agent.Age > terminateAge)
                {
                    break;
                }
                var cycleStartTime = DateTime.Now;
                var observation = env.Observation;
                var reward = env.Reward;

                if (learningPeriod > 0 && cycle > learningPeriod)
                {
                    explore = false;
                }

                //give observation and reward to agent.
                agent.ModelUpdatePercept(observation, reward);

                var explored = false;
                int action;

                if (explore && rnd.NextDouble() < exploreRate)
                {
                    explored = true;
                    action = agent.GenerateRandomAction();
                }
                else
                {
                    //get agents response to observation and reward
                    action = agent.Search();
                }

                //pass agent's action to environment
                env.PerformAction(action);
                agent.ModelUpdateAction(action);

                var timeTaken = DateTime.Now - cycleStartTime;

                Console.WriteLine("{0}:\t{1},{2},{3}\t{4},{5}  \t{6},{7}\t>{8},{9}",
                    cycle, observation, reward, action,
                    explored, exploreRate,
                    agent.TotalReward, agent.AverageReward(),
                    timeTaken, agent.ModelSize()
                    );

                if (explore)
                {
                    exploreRate *= exploreDecay;
                }
                cycle += 1;
            }
        }

Beispiel #3

Datei anzeigen

Datei: MonteCarloSearchTree.cs Projekt: GoodAI/SummerCamp

        public int SelectAction(Agent agent)
        {
            Debug.Assert(agent.MaximumReward() != null, "this is weird place, - in selection action");

            double exploreBias = (double)agent.Horizon * agent.MaximumReward().Value;
            double explorationNumerator = this.ExplorationConstant * Math.Log(this.Visits);
            int bestAction = -1;
            double bestPriority = double.NegativeInfinity;

            foreach (int action in agent.Environment.ValidActions) {
                MonteCarloSearchNode node=null;
                if (this.Children.ContainsKey(action)) {
                    node=this.Children[action];
                }
                double priority;
                if (node == null || node.Visits == 0) {
                    // previously unexplored node
                    priority = this.UnexploredBias;    //unexplored bias
                }
                else{
                    priority = node.Mean + exploreBias * Math.Sqrt(explorationNumerator / node.Visits);
                }

                if (priority > (bestPriority+Utils.RandomDouble(0, 0.001))){
                    bestAction=action;
                    bestPriority=priority;
                }

            }
            return bestAction;
        }