예제 #1
0
        private int[] Radar(Prostredie env)
        {
            int[] scan = new int[8];

            // Hore
            if (!JeMimoAreny(currentPos.x, currentPos.y - 1, 10, 10))
            {
                scan[0] = env.prostredie[currentPos.y - 1][currentPos.x].id;
            }

            // Vpravo-Hore
            if (!JeMimoAreny(currentPos.x + 1, currentPos.y - 1, 10, 10))
            {
                scan[1] = env.prostredie[currentPos.y - 1][currentPos.x + 1].id;
            }

            // Vpravo
            if (!JeMimoAreny(currentPos.x + 1, currentPos.y, 10, 10))
            {
                scan[2] = env.prostredie[currentPos.y][currentPos.x + 1].id;
            }

            // Vpravo-Dole
            if (!JeMimoAreny(currentPos.x + 1, currentPos.y + 1, 10, 10))
            {
                scan[3] = env.prostredie[currentPos.y + 1][currentPos.x + 1].id;
            }

            // Dole
            if (!JeMimoAreny(currentPos.x, currentPos.y + 1, 10, 10))
            {
                scan[4] = env.prostredie[currentPos.y + 1][currentPos.x].id;
            }

            // Vlavo-Dole
            if (!JeMimoAreny(currentPos.x - 1, currentPos.y + 1, 10, 10))
            {
                scan[5] = env.prostredie[currentPos.y + 1][currentPos.x - 1].id;
            }

            // Vlavo
            if (!JeMimoAreny(currentPos.x - 1, currentPos.y, 10, 10))
            {
                scan[6] = env.prostredie[currentPos.y][currentPos.x - 1].id;
            }

            // Vlavo-Hore
            if (!JeMimoAreny(currentPos.x - 1, currentPos.y - 1, 10, 10))
            {
                scan[7] = env.prostredie[currentPos.y - 1][currentPos.x - 1].id;
            }

            return(scan);
        }
예제 #2
0
        public void reset(Prostredie env, int t = 0, bool training = true)
        {
            // Vymaz vsetky jablka a miny
            env.NahradObjekty(Jablko.Tag, new Cesta());
            env.NahradObjekty(Mina.Tag, new Cesta());

            if (training == true)
            {
                var idx = r.Next(0, 3);
                this.currentPos = new Vector2(Prostredie.startPositionX_training[idx], Prostredie.startPositionY_training[idx]);

                apple_count = r.Next(2, 5) + 1;
                for (int i = 0; i < apple_count; i++)
                {
                    env.GenerateItem(new Jablko());
                }

                mine_count = r.Next(0, 3) + 1;
                for (int i = 0; i < mine_count; i++)
                {
                    env.GenerateItem(new Mina());
                }
            }
            else
            {
                this.currentPos = new Vector2(this.testing_startsPos[t].x, this.testing_startsPos[t].y);

                for (int i = 0; i < this.testing_applesPos[t].Length; i++)
                {
                    env.prostredie[this.testing_applesPos[t][i].y][this.testing_applesPos[t][i].x] = new Jablko();
                }

                for (int i = 0; i < this.testing_minesPos[t].Length; i++)
                {
                    env.prostredie[this.testing_minesPos[t][i].y][this.testing_minesPos[t][i].x] = new Mina();
                }
            }

            stav = new AI.QLearning.QState
            {
                PositionX  = currentPos.x,
                PositionY  = currentPos.y,
                stateRadar = Radar(env)
            };

            this.apples = 0;
            this.mines  = 0;
        }
예제 #3
0
        public bool AktualizujAgenta(Prostredie env, bool ucenie, double eps, out float odmena)
        {
            bool isValid;
            int  akcia;

            // Vyber akciu
            var akcia_max = qBrain.NajdiMaxAkciu(stav);

            /****************************************************/
            /*                 Agent vykona akciu               */
            /****************************************************/
            // Ak existuje vedomost
            if (akcia_max != null)
            {
                // explore
                if (ucenie && r.NextDouble() < eps)
                {
                    isValid = sample(out akcia);
                }
                else
                {
                    akcia   = akcia_max.Value;
                    isValid = Pohyb((EAkcie)akcia, 10, 10);
                }
            }
            // Ak neexistuje este vedomost
            else
            {
                isValid = sample(out akcia);
                // ... vytvor zaznam o najdenom stave
                qBrain.Qtable.Add(stav, new float[] { 0f, 0f, 0f, 0f });
            }

            if (isValid)
            {
                odmena = env.Hodnotenie(currentPos.x, currentPos.y);
            }
            else
            {
                odmena = -1.0f;
            }

            /****************************************************/
            /*                      Feedback                    */
            /****************************************************/
            var novyStav = new AI.QLearning.QState
            {
                PositionX  = currentPos.x,
                PositionY  = currentPos.y,
                stateRadar = Radar(env)
            };

            if (ucenie)
            {
                var buducaAkcia    = qBrain.NajdiMaxAkciu(novyStav);
                var buducaQhodnota = 0f;

                // Aktualizuj Qtable hodnotu pre [s;a]
                if (buducaAkcia != null)
                {
                    buducaQhodnota = qBrain.Qtable[novyStav][buducaAkcia.Value];
                }

                qBrain.Aktualizuj(stav, akcia, odmena, buducaQhodnota);
            }

            this.stav = novyStav;

            // Agent zobral jablko
            if (env.prostredie[currentPos.y][currentPos.x].id == Jablko.Tag)
            {
                env.prostredie[currentPos.y][currentPos.x] = new Cesta();
                this.apples += 1;
            }

            // Agent aktivoval minu
            if (env.prostredie[currentPos.y][currentPos.x].id == Mina.Tag)
            {
                env.prostredie[currentPos.y][currentPos.x] = new Cesta();
                this.mines += 1;
            }

            return(isValid);
        }
예제 #4
0
        private static Statistics run(Prostredie env, Agent a, int episodes, int steps, bool training = true)
        {
            Statistics stat    = new Statistics();
            var        epsilon = training == true ? 1.0f : 0.0f;
            float      score;
            int        is_end;

            // Trening agenta
            for (int episode = 0, step; episode < episodes; episode++)
            {
                var watch = System.Diagnostics.Stopwatch.StartNew();

                a.reset(env, episode, training);

                is_end = 0;
                score  = 0;

                for (step = 0; step < steps; step++)
                {
                    if (training == false)
                    {
                        Console.Clear();
                        env.Vypis(a.currentPos.x, a.currentPos.y);
                        Thread.Sleep(200);
                    }

                    var isValid = a.AktualizujAgenta(env, training, epsilon, out float odmena);
                    score += odmena;

                    // ukonci hru ak nasiel ciel
                    if (env.prostredie[a.currentPos.y][a.currentPos.x].id == Vychod.Tag)
                    {
                        is_end = 100;
                        break;
                    }
                    else if (isValid == false)
                    {
                        break;
                    }
                }

                watch.Stop();

                if (epsilon >= 0.01f)
                {
                    epsilon *= epsilon_decay;
                }

                /*if ((episode % 1000) == 0)
                 * {
                 *  Console.WriteLine($"\nepsilon: {epsilon}, epoch: {episode}/{episodes}");
                 *  Console.WriteLine($"Pocet naucenych stavov: {a.PocetUlozenychStavov}\n");
                 *  Console.WriteLine($"apples: {a.apples}/{a.apple_count}, mines: {a.mines}/{a.mine_count}");
                 * }*/

                // log only testing phase
                if (training == false)
                {
                    var apple = (a.apples / (float)a.apple_count) * 100.0f;
                    var mine  = (a.mines / (float)a.mine_count) * 100.0f;

                    log_file.WriteLine($"{episode};{score};{step};{watch.Elapsed.TotalMilliseconds * 1000};{apple};{mine};{is_end}");
                    //Console.WriteLine($"{apple};{mine};{is_end}");

                    stat.append(apple, mine, is_end);
                }
            }

            return(stat);
        }