public CtwContextTreeUndo(MC_AIXI_CTW agent)
 {
     this.Age         = agent.Age;
     this.TotalReward = agent.TotalReward;
     this.HistorySize = agent.history_size();
     this.LastUpdate  = agent.LastUpdate;
 }
 public CtwContextTreeUndo(MC_AIXI_CTW agent)
 {
     this.Age = agent.Age;
     this.TotalReward = agent.TotalReward;
     this.HistorySize = agent.history_size();
     this.LastUpdate = agent.LastUpdate;
 }
示例#3
0
        public AgentTest()
        {
            this.Options = new Dictionary<string, string>();

            Options["ct-depth"] = "4";
            Options["agent-horizon"] = "6";
            Options["mc-simulations"] = "200";

            this.Env = new CoinFlip(Options);

            this.Agent = new MC_AIXI_CTW(Env, Options);
        }
示例#4
0
        private static void Main(string[] args)
        {
            new ConsoleEnvUI();
            return;

            var options = new Dictionary <string, string>();

            //POSSIBLE OPTIONS:

            //ctw-model: possible values ct/ctf/cuda. Which CTW tree implementation should CTW-agent use.
            options["ctw-model"] = "ctf";

            //exploration: (possible values 0.0-1.0), initial probability of exploration.
            //      exploration is probability of taking random action
            options["exploration"] = "0.99";

            //explore-decay (possible values 0.0-1.0). Probability of exploration will decay exponentially.
            //      It will be multiplied by explore-decay every cycle
            options["explore-decay"] = "0.9995";

            //ct-depth (possible values int 1+, common values are 8-100) depth of CTW tree.
            //      That means, how many bits it will look back while deciding
            options["ct-depth"] = "96";

            //agent-horizon: (values int, 1+) how far into future (in bits) we should try to predict the future.
            //      From predicted future is computed how good this future is.
            options["agent-horizon"] = "4";

            //mc-simulations (possible values 1+). Number of monte carlo simulations to do.
            //      Higher number means better convergence (ie: better data for decision). But also slow speed.
            options["mc-simulations"] = "200";

            //terminate-age: (possible values int 1+)
            //      how many cycles to do, before ending. 0=run forever
            //      cycle = environment gives observation and reward and agent reacts with action
            options["terminate-age"] = "10000";


            var env = new RockPaperScissorsEnvironment(options);

            var agent = new MC_AIXI_CTW(env, options);

            var startingTime = DateTime.Now;

            InteractionLoop(agent, env, options);
            var endingTime = DateTime.Now;

            Console.WriteLine("time: {0}", endingTime - startingTime);

            Console.ReadLine();
        }
示例#5
0
        private static void Main(string[] args)
        {
            new ConsoleEnvUI();
            return;

            var options = new Dictionary<string, string>();

            //POSSIBLE OPTIONS:

            //ctw-model: possible values ct/ctf/cuda. Which CTW tree implementation should CTW-agent use.
            options["ctw-model"] = "ctf";

            //exploration: (possible values 0.0-1.0), initial probability of exploration.
            //      exploration is probability of taking random action
            options["exploration"] = "0.99";

            //explore-decay (possible values 0.0-1.0). Probability of exploration will decay exponentially.
            //      It will be multiplied by explore-decay every cycle
            options["explore-decay"] = "0.9995";

            //ct-depth (possible values int 1+, common values are 8-100) depth of CTW tree.
            //      That means, how many bits it will look back while deciding
            options["ct-depth"] = "96";

            //agent-horizon: (values int, 1+) how far into future (in bits) we should try to predict the future.
            //      From predicted future is computed how good this future is.
            options["agent-horizon"] = "4";

            //mc-simulations (possible values 1+). Number of monte carlo simulations to do.
            //      Higher number means better convergence (ie: better data for decision). But also slow speed.
            options["mc-simulations"] = "200";

            //terminate-age: (possible values int 1+)
            //      how many cycles to do, before ending. 0=run forever
            //      cycle = environment gives observation and reward and agent reacts with action
            options["terminate-age"] = "10000";

            var env = new RockPaperScissorsEnvironment(options);

            var agent = new MC_AIXI_CTW(env, options);

            var startingTime = DateTime.Now;
            InteractionLoop(agent, env, options);
            var endingTime = DateTime.Now;

            Console.WriteLine("time: {0}", endingTime - startingTime);

            Console.ReadLine();
        }