private void loadNet_Click(object sender, EventArgs e)
        {
            // Open the network file
            using (FileStream fstream = new FileStream(netFile, FileMode.Open))
            {
                // Load the network from file
                qAgent = new BinaryFormatter().Deserialize(fstream) as QAgent;

                // REDO: load instance to singleton *** ExperienceShared has to be annotated as [Serializable]
                if (chkSharedExperience.Checked && staticExperience)
                {
                    ((DeepQLearning.DRLAgent.DeepQLearnShared)qAgent.w.agents[0].brain).Init(qAgent.w.agents[0].brain.experience);
                }
                if (chkSharedExperience.Checked && !staticExperience)
                {
                    ((DeepQLearning.DRLAgent.DeepQLearnSharedSingleton)qAgent.w.agents[0].brain).Init(qAgent.w.agents[0].brain.experience);
                }

                qAgent.Reinitialize();
            }

            if (workerThread == null)
            {
                workerThread = new Thread(new ThreadStart(BackgroundThread));
                workerThread.Start();
            }

            displayBox.Text = "QNetwork loaded successfully";
        }
示例#2
0
        static void comRunQ()
        {
            Console.WriteLine("Q is player..?");
            byte temp3 = byte.Parse(Console.ReadLine());

            Console.WriteLine("How many games?");
            int temp1 = int.Parse(Console.ReadLine());

            Console.WriteLine("Board size?");
            int temp4 = int.Parse(Console.ReadLine());

            Console.WriteLine("Which opponent?");
            string        command = Console.ReadLine();
            StreamReader  reader  = new StreamReader("qdatafiles.txt");
            List <String> files   = new List <string>();

            while (reader.Peek() != -1)
            {
                files.Add(reader.ReadLine());
            }
            reader.Close();
            foreach (String s in files)
            {
                Console.WriteLine("Using file " + s);
                QAgent qagentm = new QAgent(temp3);
                qagentm.Load(s);
                qagentm.TurnOffExploration();
                qagentm.TurnOffLearning();
                RunQ(temp3, qagentm, command, temp1, temp4);
            }
        }
示例#3
0
        private void Init(QAgent agent, QOption option)
        {
            if (Benchmark)
            {
                BenchmarkSave.CurrentTestID = _instance.BenchmarkID;
                BenchmarkSave.Runs          = _instance.BenchmarkRuns;
            }
            else if (Mode == QAIMode.Testing && BenchmarkID != null && !BenchmarkID.Equals(""))
            {
                BenchmarkSave.ModelPath = _instance.BenchmarkID;
            }
            else
            {
                BenchmarkSave.CurrentTestID = agent.AI_ID().ID;
                BenchmarkSave.Runs          = 1;
            }
            Debug.Log("Running " + BenchmarkSave.ModelPath + " in mode " + Mode);

            _stopwatch = Stopwatch.StartNew();
            if (Tester != null)
            {
                Tester.Init();
            }

            DontDestroyOnLoad(gameObject);
            switch (Mode)
            {
            case QAIMode.Imitating: {
                _imitation = new QImitation();
                break;
            }

            default: {
                var qlCNN = new QLearningCNN(PrioritizedSweeping, option);
                _qlearning = qlCNN;
                _qlearning.Reset(agent);

                if (Remake)
                {
                    _qlearning.RemakeModel(agent.GetState());
                }
                else
                {
                    _qlearning.LoadModel();
                }

                if (VisualizeNetwork)
                {
                    _visualizer = _qlearning.CreateVisualizer();
                }

                qlCNN.CNN.ValuesComputed += (data, isTraining) => { if (NetworkValuesUpdated != null)
                                                                    {
                                                                        NetworkValuesUpdated(data, isTraining);
                                                                    }
                };
                break;
            }
            }
        }
示例#4
0
        public static SARS MakeSARS(this QAgent agent, QAction move, QState state)
        {
            move.Invoke();
            var s0 = agent.GetState();

            return(new SARS(state, move, s0));
        }
示例#5
0
 public static QAction ToQAction(this QAgent agent, Action a)
 {
     return(agent.GetType().GetMethods(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance)
            .Where(m => m.Name == a.Method.Name)
            .Where(m => m.GetCustomAttributes(typeof(QBehavior), true).Length > 0)
            .Select(m => ((QBehavior)m.GetCustomAttributes(typeof(QBehavior), true).First()).ObtainAction(agent, m.Name)).First());
 }
示例#6
0
        static void TrainMadQ(byte qnumber, QAgent agent, String logname, String savename, int iterations)
        {
            Virus        virus = new Virus(2, 5);
            int          wins = 0, wins2 = 0;
            byte         oppnumber = qnumber == 1 ? (byte)2 : (byte)1;
            Agent        opp       = new MixedAgent(0.1, false, oppnumber);
            StreamWriter writer    = new StreamWriter(logname);

            for (int i = 1; i <= iterations; i++)
            {
                int winner = RunGame(virus, qnumber == 1 ? (Agent)agent : opp, qnumber == 2 ? (Agent)agent : opp);
                wins  += winner == 1 ? 1 : 0;
                wins2 += winner == 1 ? 1 : 0;

                if (i % 100 == 0)
                {
                    writer.WriteLine(wins2);
                    wins2 = 0;
                }
                if (i % 10000 == 0)
                {
                    agent.Save(savename);
                    Console.WriteLine("Iteration: " + i);
                    Console.WriteLine("Wins: " + wins);
                    wins = 0;
                }
                virus = new Virus(2, 5);
            }
            writer.Close();
        }
示例#7
0
        static void Main(string[] args)
        {
            QAgent <int, int> agent = new QAgent <int, int>();

            int secretNumber = 5;

            //for each state we can add 1 and subtract 1
            agent.RequestActionsForState = (int state) => new List <int>()
            {
                -1, 1
            };


            for (int i = 0; i < 10; i++)
            {
                int guessedNumber = 0;
                Console.WriteLine("--------------------------");
                do
                {
                    //flag if game is finished
                    bool finish    = false;
                    int  prevstate = guessedNumber;
                    var  action    = agent.GetAction(guessedNumber);
                    var  reward    = -0.1;


                    //execute action
                    guessedNumber += action;
                    Console.WriteLine(guessedNumber);



                    //specify the rule that guessed number cant be less than 0
                    if (guessedNumber < 0)
                    {
                        reward = -1;
                        finish = true;
                    }

                    //rule for victory
                    if (guessedNumber == secretNumber)
                    {
                        reward = 1;
                        finish = true;
                    }

                    //learn what would happen after action is executed
                    agent.UpdateState(prevstate, action, reward, guessedNumber);

                    //finish the game
                    if (finish)
                    {
                        break;
                    }
                } while (true);
            }

            Console.ReadLine();
        }
示例#8
0
 public override bool SetupNextTest(QAgent agent)
 {
     if (_testStarted)
     {
         return(false);
     }
     _timeStart   = Time.time;
     _testStarted = true;
     return(true);
 }
示例#9
0
        static void comTrainMadQ()
        {
            Console.WriteLine("Q is player..?");
            byte   temp3  = byte.Parse(Console.ReadLine());
            QAgent qagent = new QAgent(temp3);

            qagent.Load("qmad");
            qagent.MinLearning = 0.05;
            Console.WriteLine("How many iterations?");
            int temp1 = int.Parse(Console.ReadLine());

            TrainMadQ(temp3, qagent, "qmadlog", "qmad", temp1);
        }
示例#10
0
        static void RunQ(byte qnumber, QAgent agent, String opponent, int iterations, int size)
        {
            Virus virus     = new Virus(2, size);
            int   wins      = 0;
            byte  oppnumber = qnumber == 1 ? (byte)2 : (byte)1;

            Agent opp;

            switch (opponent)
            {
            case "brute":
                opp = new BruteForceAgent(oppnumber);
                break;

            case "minimax4":
                opp = new MinimaxAgent(4, oppnumber);
                break;

            case "minimax3":
                opp = new MinimaxAgent(3, oppnumber);
                break;

            case "minimax2":
                opp = new MinimaxAgent(2, oppnumber);
                break;

            default:
                opp = new BruteForceAgent(oppnumber);
                break;
            }

            int n = 0;

            while (File.Exists("qwinlog" + n))
            {
                n++;
            }
            StreamWriter writer = new StreamWriter("qwinlog" + n);

            for (int i = 1; i <= iterations; i++)
            {
                int winner = RunGame(virus, qnumber == 1 ? (Agent)agent : opp, qnumber == 2 ? (Agent)agent : opp);
                wins += winner == 1 ? 1 : 0;
                virus = new Virus(2, size);
            }
            writer.WriteLine(wins);
            Console.WriteLine("Iteration: " + iterations);
            Console.WriteLine("Wins: " + wins);
            wins = 0;
            writer.Close();
        }
示例#11
0
        public static void Imitate(QAgent agent, QState state, Action a)
        {
            if (_instance == null || _instance.Mode != QAIMode.Imitating)
            {
                return;
            }
            var terminal = _instance._imitation.Imitate(state, agent.ToQAction(a));

            if (terminal)
            {
//	        _instance._imitation.Save(); // Saving is now done in the Option Window, where the learning is started.
                EditorApplication.isPlaying = false;
            }
        }
示例#12
0
 public override bool SetupNextTest(QAgent agent)
 {
     Running        = true;
     Time.timeScale = 3;
     _scores.Add(new TestScore());
     _iteration++;
     FindObjectOfType <PongBall>().Reset(new Vector2(-1, y));
     y += 0.2f;
     if (Math.Abs(y) < 0.01) //y == 0
     {
         y += 0.2f;
     }
     return(y <= 1);
 }
示例#13
0
        private void loadNet_Click(object sender, EventArgs e)
        {
            // Load the netwok from file
            using (FileStream fstream = new FileStream(netFile, FileMode.Open))
            {
                qAgent = new BinaryFormatter().Deserialize(fstream) as QAgent;
                qAgent.Reinitialize();
            }

            if (workerThread == null)
            {
                workerThread = new Thread(new ThreadStart(BackgroundThread));
                workerThread.Start();
            }
        }
示例#14
0
    public override void OnActionTaken(QAgent agent, QAction action, QState state)
    {
        var car = (SlotCar)agent;

        if (!car.OnTrack && !_isCrashing)
        {
            _isCrashing = true;
            _crashes++;
        }
        else if (car.OnTrack && _isCrashing)
        {
            _isCrashing = false;
        }
        _maxDist = car.DistanceTravelled - car.StartPosition;
    }
示例#15
0
 public static void InitAgent(QAgent agent, QOption option = null)
 {
     option = option ?? new QOption();
     if (_instance == null)
     {
         _instance = FindObjectOfType <QAIManager>();
         _instance.Init(agent, option);
     }
     BenchmarkSave.SaveBenchmarks = _instance.Benchmark;
     _instance._sceneIsOver       = false;
     _instance._testIsOver        = false;
     Agent = agent;
     if (_instance.Mode != QAIMode.Imitating)
     {
         _instance._qlearning.Reset(agent);
     }
 }
示例#16
0
        public override bool SetupNextTest(QAgent agent)
        {
            var visualizer = FindObjectOfType <GridResultsVisualizer>();

            visualizer.enabled = true;
            visualizer.DrawResults(_results);

            if (_positions.Count == 0)
            {
                return(false);
            }
            RunPosistion   = _positions[0];
            RunPosistion.y = 1;
            ((GridWoman)agent).transform.position = RunPosistion;
            _positions.RemoveAt(0);
            return(true);
        }
示例#17
0
        static void GatherQDataFull(String opponent, int iterations, int saveinterval, byte player, double disc, double lrmod, double lrstrt, double exmod, double exstrt, bool lrsqrt, bool exsqrt, double randomness, double minlearn, double initvalue = 0)
        {
            QAgent agent;
            String name = opponent + disc + "Q_L" + lrmod + (lrsqrt ? "persqrt" : "per1") + "_E" + exmod + (exsqrt ? "persqrt" : "per1") + "r" + randomness + "m" + minlearn + "ls" + lrstrt + "xs" + exstrt + "i" + initvalue;

            Console.WriteLine("Gathering data for Q: " + name + ":");
            for (int i = 0; i < 10; i++)
            {
                if (!File.Exists(name + ".log" + i))
                {
                    agent             = new QAgent(player, disc, lrmod, lrstrt, exmod, exstrt, lrsqrt ? 0.5 : 1, exsqrt ? 0.5 : 1, initvalue);
                    agent.RandomRate  = randomness;
                    agent.MinLearning = minlearn;
                    TrainQ(5, player, agent, opponent, name + ".log" + i, name + ".sav" + i, iterations, saveinterval);
                }
            }
        }
示例#18
0
 public QAction ObtainAction(QAgent agent, string name)
 {
     if (_conditional)
     {
         var p =
             agent.GetType()
             .GetMethods(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance)
             .FirstOrDefault(m => m.Name == _predicate);
         if (p == default(MethodInfo))
         {
             throw new Exception("Predicate method " + _predicate + " does not exist.");
         }
         if (p.GetCustomAttributes(typeof(QPredicate), true).Length == 0)
         {
             throw new Exception("Predicate method " + _predicate + " is not properly annotated as a QPredicate.");
         }
     }
     return(new QAction(
                name,
                (Action)Delegate.CreateDelegate(typeof(Action), agent, name),
                _conditional ? (QPredicate.Basic)Delegate.CreateDelegate(typeof(QPredicate.Basic), agent, _predicate) : null
                ));
 }
示例#19
0
        public TTTModule() : base("/ttt")
        {
            QAgent <string, int> agentX = new QAgent <string, int>();

            agentX.RequestActionsForState = SpaceIndexes;

            QAgent <string, int> agentO = new QAgent <string, int>();

            agentO.RequestActionsForState = SpaceIndexes;

            Post("/predict", o =>
            {
                var name   = this.Request.Query["name"].ToString();
                var reader = new StreamReader(Request.Body);
                var state  = reader.ReadToEnd();
                if (File.Exists($"agent{name}.json"))
                {
                    agentX.Deserialize(File.ReadAllText($"agent{name}.json"));
                }

                return(agentX.GetAction(state).ToString());
            });

            Post("/learn", o =>
            {
                var name = this.Request.Query["name"].ToString();
                if (File.Exists($"agent{name}.json"))
                {
                    agentX.Deserialize(File.ReadAllText($"agent{name}.json"));
                }

                agentX.UpdateState(this.Request.Form["prevState"].ToString(), int.Parse(this.Request.Form["action"].ToString()), int.Parse(this.Request.Form["reward"].ToString()), this.Request.Form["newState"].ToString());

                File.WriteAllText($"agent{name}.json", agentX.Serialize());
                return("OK");
            });
        }
示例#20
0
        static void comTrainQ(bool load)
        {
            Console.WriteLine("Q is player..?");
            byte   temp3  = byte.Parse(Console.ReadLine());
            QAgent qagent = new QAgent(temp3);

            if (load)
            {
                qagent.Load("TrainingData");
            }
            Console.WriteLine("How many iterations?");
            int temp1 = int.Parse(Console.ReadLine());

            Console.WriteLine("Save how often?");
            int temp2 = int.Parse(Console.ReadLine());

            Console.WriteLine("Board size?");
            int temp4 = int.Parse(Console.ReadLine());

            Console.WriteLine("Which opponent?");
            string command = Console.ReadLine();

            TrainQ(temp4, temp3, qagent, command, "log", "TrainingData", temp1, temp2);
        }
示例#21
0
 public abstract bool SetupNextTest(QAgent agent);
示例#22
0
 public abstract void OnActionTaken(QAgent agent, QAction action, QState state);
示例#23
0
        static void Main(string[] args)  // b r o k e n
        {
            Console.ForegroundColor = ConsoleColor.DarkMagenta;
            if (File.Exists(qAgentBrainPath))
            {
                using (FileStream fstream = new FileStream(qAgentBrainPath, FileMode.Open, FileAccess.Read, FileShare.Read)) {
                    qAgent = new BinaryFormatter().Deserialize(fstream) as QAgent;
                    qAgent.Reinitialize();
                }
                Console.WriteLine("QAgent loaded");
            }
            else
            {
                var num_inputs      = 6; // 9 eyes, each sees 3 numbers (wall, green, red thing proximity)
                var num_actions     = 3; // 5 possible angles agent can turn
                var temporal_window = 1; // amount of temporal memory. 0 = agent lives in-the-moment :)
                var network_size    = num_inputs * temporal_window + num_actions * temporal_window + num_inputs;


                // config brain
                var layer_defs = new List <LayerDefinition>();

                // the value function network computes a value of taking any of the possible actions
                // given an input state. Here we specify one explicitly the hard way
                // but user could also equivalently instead use opt.hidden_layer_sizes = [20,20]
                // to just insert simple relu hidden layers.
                layer_defs.Add(new LayerDefinition {
                    type = "input", out_sx = 1, out_sy = 1, out_depth = network_size
                });
                layer_defs.Add(new LayerDefinition {
                    type = "fc", num_neurons = 96, activation = "relu"
                });
                layer_defs.Add(new LayerDefinition {
                    type = "fc", num_neurons = 96, activation = "relu"
                });
                layer_defs.Add(new LayerDefinition {
                    type = "fc", num_neurons = 96, activation = "relu"
                });
                layer_defs.Add(new LayerDefinition {
                    type = "regression", num_neurons = num_actions
                });

                // options for the Temporal Difference learner that trains the above net
                // by backpropping the temporal difference learning rule.
                //var opt = new Options { method="sgd", learning_rate=0.01, l2_decay=0.001, momentum=0.9, batch_size=10, l1_decay=0.001 };
                Options opt = new Options {
                    method = "adadelta", l2_decay = 0.001, batch_size = 10
                };

                TrainingOptions tdtrainer_options = new TrainingOptions();
                tdtrainer_options.temporal_window       = temporal_window;
                tdtrainer_options.experience_size       = 30000;
                tdtrainer_options.start_learn_threshold = 1000;
                tdtrainer_options.gamma = 0.7;
                tdtrainer_options.learning_steps_total  = 200000;
                tdtrainer_options.learning_steps_burnin = 3000;
                tdtrainer_options.epsilon_min           = 0.05;
                tdtrainer_options.epsilon_test_time     = 0.00;
                tdtrainer_options.layer_defs            = layer_defs;
                tdtrainer_options.options = opt;

                DeepQLearn brain = new DeepQLearn(num_inputs, num_actions, tdtrainer_options);
                qAgent = new QAgent(brain);
            }
            qAgent.startlearn();
            new Thread(() => {
                while (true)
                {
                    if (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond % 31 /*arbitrary*/ == 0)
                    {
                        using (FileStream fstream = new FileStream(qAgentBrainPath, FileMode.Create, FileAccess.Write, FileShare.ReadWrite)) {
                            new BinaryFormatter().Serialize(fstream, qAgent);
                        }
                    }
                    qAgent.tick();
                }
            }).Start();
        }
示例#24
0
 public void Reset(QAgent agent)
 {
     Actions = agent.GetQActions().AsReadOnly();
 }
示例#25
0
        public LabyrinthModule() : base("/labyrinth")
        {
            QAgent <int, string> agent = new QAgent <int, string>();

            agent.RequestActionsForState = i =>
            {
                var res = new List <string>()
                {
                    "up", "down", "left", "right"
                };
                int y = i / 10;
                int x = i - y * 10;
                if (x == 0)
                {
                    res.Remove("left");
                }
                if (x == 4)
                {
                    res.Remove("right");
                }
                if (y == 4)
                {
                    res.Remove("down");
                }
                if (y == 0)
                {
                    res.Remove("up");
                }


                return(res);
            };


            Post("/predict", o =>
            {
                var name   = this.Request.Query["name"].ToString();
                var reader = new StreamReader(Request.Body);
                var state  = reader.ReadToEnd();
                if (File.Exists($"agent{name}.json"))
                {
                    agent.Deserialize(File.ReadAllText($"agent{name}.json"));
                }

                return(agent.GetAction(int.Parse(state)).ToString());
            });

            Post("/learn", o =>
            {
                var name = this.Request.Query["name"].ToString();
                if (File.Exists($"agent{name}.json"))
                {
                    agent.Deserialize(File.ReadAllText($"agent{name}.json"));
                }

                agent.UpdateState(int.Parse(this.Request.Form["prevState"].ToString()), this.Request.Form["action"].ToString(), int.Parse(this.Request.Form["reward"].ToString()), int.Parse(this.Request.Form["newState"].ToString()));

                File.WriteAllText($"agent{name}.json", agent.Serialize());
                return("OK");
            });
            Post("/overlay", o =>
            {
                var name = this.Request.Query["name"].ToString();
                if (File.Exists($"agent{name}.json"))
                {
                    agent.Deserialize(File.ReadAllText($"agent{name}.json"));
                }

                double[][] res = new double[5][];

                for (int y = 0; y < 5; y++)
                {
                    res[y] = new double[5];
                    for (int x = 0; x < 5; x++)
                    {
                        var state = y * 10 + x;
                        res[y][x] = agent.GetStateMaxScore(state);
                    }
                }

                return(JsonConvert.SerializeObject(res));
            });
        }
示例#26
0
        public VirusInterface(Virus virus, int tilesize = 20, bool immediateAI = false, params String[] names)
        {
            InitializeComponent();
            this.virus       = virus;
            this.tileSize    = tilesize;
            this.immediateAI = immediateAI;
            this.MouseClick += MouseClickHandler1;
            this.Size        = new Size(
                virus.Size * tileSize + 17,
                virus.Size * tileSize + 55);
            this.names.Add("Player 0");
            this.names.AddRange(names);
            while (this.names.Count < virus.Players + 1)
            {
                this.names.Add("Player " + this.names.Count);
            }
            //Save("Lalalafil");
            agents = new Agent[this.names.Count];
            int n = 1;

            for (byte i = 1; i < this.names.Count; i++)
            {
                String p = this.names[i];
                switch (p)
                {
                case "QAI":
                    agents[i] = new QAgent(i);
                    if (File.Exists("TrainingData.Q") && File.Exists("TrainingData.N"))
                    {
                        ((QAgent)agents[i]).Load("TrainingData");
                        ((QAgent)agents[i]).TurnOffExploration();
                        ((QAgent)agents[i]).TurnOffLearning();
                    }
                    this.names[i] = "AI " + n;
                    n++;
                    break;

                case "AnnAI":
                    agents[i]     = new AnnAgent(false, virus.Size, i);
                    this.names[i] = "AI " + n;
                    n++;
                    break;

                case "MinimaxAI":
                    agents[i]     = new MinimaxAgent(4, i);
                    this.names[i] = "AI " + n;
                    n++;
                    break;

                case "MiniMaxMixAI":
                    if (File.Exists("TrainingData.Q"))
                    {
                        agents[i] = new MiniMaxMixAgent("TrainingData", 2, i);
                    }
                    else
                    {
                        agents[i] = new BruteForceAgent(i);
                    }
                    this.names[i] = "AI " + n;
                    n++;
                    break;

                case "MixedAI":
                    agents[i]     = new MixedAgent(0.5, false, i);
                    this.names[i] = "AI " + n;
                    n++;
                    break;

                case "BruteAI":
                    agents[i]     = new BruteForceAgent(i);
                    this.names[i] = "AI " + n;
                    n++;
                    break;

                case "RandomAI":
                    agents[i]     = new RandomAgent(i);
                    this.names[i] = "AI " + n;
                    n++;
                    break;

                case "SimpleAI":
                    agents[i]     = new SimpleAgent(i);
                    this.names[i] = "AI " + n;
                    n++;
                    break;
                }
            }

            message = this.names[1] + "'s turn";

            colors    = new Color[virus.Players + 1];
            colors[0] = Color.White;
            colors[1] = Color.FromArgb(128, 160, 255);
            colors[2] = Color.FromArgb(96, 255, 96);
            if (virus.Players > 2)
            {
                colors[3] = Color.FromArgb(255, 96, 96);
            }
            if (virus.Players > 3)
            {
                colors[4] = Color.FromArgb(255, 255, 64);
            }
            Random rand = new Random();

            for (int i = 5; i <= virus.Players; i++)
            {
                colors[i] = Color.FromArgb(rand.Next(256), rand.Next(256), rand.Next(256));
            }
        }
示例#27
0
        public override void OnActionTaken(QAgent agent, QAction action, QState state)
        {
            var distToGoal = (((GridWoman)agent).transform.position - Goal.Position).magnitude;

            _distScores.Add(1 / (distToGoal + 1));
        }
示例#28
0
 public override void OnActionTaken(QAgent agent, QAction action, QState state)
 {
 }
示例#29
0
        private void startLearning_Click(object sender, EventArgs e)
        {
            if (qAgent == null)
            {
                var num_inputs      = 27; // 9 eyes, each sees 3 numbers (wall, green, red thing proximity)
                var num_actions     = 5;  // 5 possible angles agent can turn
                var temporal_window = 4;  // amount of temporal memory. 0 = agent lives in-the-moment :)
                var network_size    = num_inputs * temporal_window + num_actions * temporal_window + num_inputs;

                var layer_defs = new List <LayerDefinition>();

                // the value function network computes a value of taking any of the possible actions
                // given an input state. Here we specify one explicitly the hard way
                // but user could also equivalently instead use opt.hidden_layer_sizes = [20,20]
                // to just insert simple relu hidden layers.
                layer_defs.Add(new LayerDefinition {
                    type = "input", out_sx = 1, out_sy = 1, out_depth = network_size
                });
                layer_defs.Add(new LayerDefinition {
                    type = "fc", num_neurons = 96, activation = "relu"
                });
                layer_defs.Add(new LayerDefinition {
                    type = "fc", num_neurons = 96, activation = "relu"
                });
                layer_defs.Add(new LayerDefinition {
                    type = "fc", num_neurons = 96, activation = "relu"
                });
                layer_defs.Add(new LayerDefinition {
                    type = "regression", num_neurons = num_actions
                });

                // options for the Temporal Difference learner that trains the above net
                // by backpropping the temporal difference learning rule.
                //var opt = new Options { method="sgd", learning_rate=0.01, l2_decay=0.001, momentum=0.9, batch_size=10, l1_decay=0.001 };
                var opt = new Options {
                    method = "adadelta", l2_decay = 0.001, batch_size = 10
                };

                var tdtrainer_options = new TrainingOptions();
                tdtrainer_options.temporal_window       = temporal_window;
                tdtrainer_options.experience_size       = 30000;
                tdtrainer_options.start_learn_threshold = 1000;
                tdtrainer_options.gamma = 0.7;
                tdtrainer_options.learning_steps_total  = 200000;
                tdtrainer_options.learning_steps_burnin = 3000;
                tdtrainer_options.epsilon_min           = 0.05;
                tdtrainer_options.epsilon_test_time     = 0.00;
                tdtrainer_options.layer_defs            = layer_defs;
                tdtrainer_options.options = opt;

                var brain = new DeepQLearn(num_inputs, num_actions, tdtrainer_options);
                qAgent = new QAgent(brain, canvas.Width, canvas.Height);
            }
            else
            {
                qAgent.startlearn();
            }

            if (workerThread == null)
            {
                workerThread = new Thread(new ThreadStart(BackgroundThread));
                workerThread.Start();
            }
        }
示例#30
0
        static void TrainQ(int size, byte qnumber, QAgent agent, String opponent, String logname, String savename, int iterations, int saveinterval = 360)
        {
            Virus        virus = new Virus(2, size);
            int          wins = 0, wins2 = 0;
            byte         oppnumber = qnumber == 1 ? (byte)2 : (byte)1;
            Agent        opp       = new BruteForceAgent(oppnumber);
            StreamWriter writer    = new StreamWriter(logname);

            for (int i = 1; i <= iterations; i++)
            {
                switch (opponent)
                {
                case "brute":
                    break;

                case "minimax4":
                    opp = new MinimaxAgent(4, oppnumber);
                    break;

                case "minimax3":
                    opp = new MinimaxAgent(3, oppnumber);
                    break;

                case "minimax2":
                    opp = new MinimaxAgent(2, oppnumber);
                    break;

                default:
                    opp = new BruteForceAgent(oppnumber);
                    break;
                }

                int winner = RunGame(virus, qnumber == 1 ? (Agent)agent : opp, qnumber == 2 ? (Agent)agent : opp);
                wins  += winner == 1 ? 1 : 0;
                wins2 += winner == 1 ? 1 : 0;

                if (i % 100 == 0)
                {
                    if (agent.RandomRate == 0)
                    {
                        writer.WriteLine(wins2);
                    }
                    wins2 = 0;
                }
                if (i % saveinterval == 0)
                {
                    agent.Save(savename);
                    Console.WriteLine("Iteration: " + i);
                    Console.WriteLine("Wins: " + wins);
                    wins = 0;
                    if (agent.RandomRate > 0)
                    {
                        agent.TurnOffExploration();
                        agent.TurnOffLearning();
                        for (int j = 1; j <= 1000; j++)
                        {
                            virus  = new Virus(2, size);
                            winner = RunGame(virus, qnumber == 1 ? (Agent)agent : opp, qnumber == 2 ? (Agent)agent : opp);
                            wins  += winner == 1 ? 1 : 0;
                        }
                        writer.WriteLine(wins);
                        wins = 0;
                        agent.TurnOnExploration();
                        agent.TurnOnLearning();
                    }
                }
                virus = new Virus(2, size);
            }
            writer.Close();
        }