private int[] Radar(Prostredie env) { int[] scan = new int[8]; // Hore if (!JeMimoAreny(currentPos.x, currentPos.y - 1, 10, 10)) { scan[0] = env.prostredie[currentPos.y - 1][currentPos.x].id; } // Vpravo-Hore if (!JeMimoAreny(currentPos.x + 1, currentPos.y - 1, 10, 10)) { scan[1] = env.prostredie[currentPos.y - 1][currentPos.x + 1].id; } // Vpravo if (!JeMimoAreny(currentPos.x + 1, currentPos.y, 10, 10)) { scan[2] = env.prostredie[currentPos.y][currentPos.x + 1].id; } // Vpravo-Dole if (!JeMimoAreny(currentPos.x + 1, currentPos.y + 1, 10, 10)) { scan[3] = env.prostredie[currentPos.y + 1][currentPos.x + 1].id; } // Dole if (!JeMimoAreny(currentPos.x, currentPos.y + 1, 10, 10)) { scan[4] = env.prostredie[currentPos.y + 1][currentPos.x].id; } // Vlavo-Dole if (!JeMimoAreny(currentPos.x - 1, currentPos.y + 1, 10, 10)) { scan[5] = env.prostredie[currentPos.y + 1][currentPos.x - 1].id; } // Vlavo if (!JeMimoAreny(currentPos.x - 1, currentPos.y, 10, 10)) { scan[6] = env.prostredie[currentPos.y][currentPos.x - 1].id; } // Vlavo-Hore if (!JeMimoAreny(currentPos.x - 1, currentPos.y - 1, 10, 10)) { scan[7] = env.prostredie[currentPos.y - 1][currentPos.x - 1].id; } return(scan); }
public void reset(Prostredie env, int t = 0, bool training = true) { // Vymaz vsetky jablka a miny env.NahradObjekty(Jablko.Tag, new Cesta()); env.NahradObjekty(Mina.Tag, new Cesta()); if (training == true) { var idx = r.Next(0, 3); this.currentPos = new Vector2(Prostredie.startPositionX_training[idx], Prostredie.startPositionY_training[idx]); apple_count = r.Next(2, 5) + 1; for (int i = 0; i < apple_count; i++) { env.GenerateItem(new Jablko()); } mine_count = r.Next(0, 3) + 1; for (int i = 0; i < mine_count; i++) { env.GenerateItem(new Mina()); } } else { this.currentPos = new Vector2(this.testing_startsPos[t].x, this.testing_startsPos[t].y); for (int i = 0; i < this.testing_applesPos[t].Length; i++) { env.prostredie[this.testing_applesPos[t][i].y][this.testing_applesPos[t][i].x] = new Jablko(); } for (int i = 0; i < this.testing_minesPos[t].Length; i++) { env.prostredie[this.testing_minesPos[t][i].y][this.testing_minesPos[t][i].x] = new Mina(); } } stav = new AI.QLearning.QState { PositionX = currentPos.x, PositionY = currentPos.y, stateRadar = Radar(env) }; this.apples = 0; this.mines = 0; }
public bool AktualizujAgenta(Prostredie env, bool ucenie, double eps, out float odmena) { bool isValid; int akcia; // Vyber akciu var akcia_max = qBrain.NajdiMaxAkciu(stav); /****************************************************/ /* Agent vykona akciu */ /****************************************************/ // Ak existuje vedomost if (akcia_max != null) { // explore if (ucenie && r.NextDouble() < eps) { isValid = sample(out akcia); } else { akcia = akcia_max.Value; isValid = Pohyb((EAkcie)akcia, 10, 10); } } // Ak neexistuje este vedomost else { isValid = sample(out akcia); // ... vytvor zaznam o najdenom stave qBrain.Qtable.Add(stav, new float[] { 0f, 0f, 0f, 0f }); } if (isValid) { odmena = env.Hodnotenie(currentPos.x, currentPos.y); } else { odmena = -1.0f; } /****************************************************/ /* Feedback */ /****************************************************/ var novyStav = new AI.QLearning.QState { PositionX = currentPos.x, PositionY = currentPos.y, stateRadar = Radar(env) }; if (ucenie) { var buducaAkcia = qBrain.NajdiMaxAkciu(novyStav); var buducaQhodnota = 0f; // Aktualizuj Qtable hodnotu pre [s;a] if (buducaAkcia != null) { buducaQhodnota = qBrain.Qtable[novyStav][buducaAkcia.Value]; } qBrain.Aktualizuj(stav, akcia, odmena, buducaQhodnota); } this.stav = novyStav; // Agent zobral jablko if (env.prostredie[currentPos.y][currentPos.x].id == Jablko.Tag) { env.prostredie[currentPos.y][currentPos.x] = new Cesta(); this.apples += 1; } // Agent aktivoval minu if (env.prostredie[currentPos.y][currentPos.x].id == Mina.Tag) { env.prostredie[currentPos.y][currentPos.x] = new Cesta(); this.mines += 1; } return(isValid); }
private static Statistics run(Prostredie env, Agent a, int episodes, int steps, bool training = true) { Statistics stat = new Statistics(); var epsilon = training == true ? 1.0f : 0.0f; float score; int is_end; // Trening agenta for (int episode = 0, step; episode < episodes; episode++) { var watch = System.Diagnostics.Stopwatch.StartNew(); a.reset(env, episode, training); is_end = 0; score = 0; for (step = 0; step < steps; step++) { if (training == false) { Console.Clear(); env.Vypis(a.currentPos.x, a.currentPos.y); Thread.Sleep(200); } var isValid = a.AktualizujAgenta(env, training, epsilon, out float odmena); score += odmena; // ukonci hru ak nasiel ciel if (env.prostredie[a.currentPos.y][a.currentPos.x].id == Vychod.Tag) { is_end = 100; break; } else if (isValid == false) { break; } } watch.Stop(); if (epsilon >= 0.01f) { epsilon *= epsilon_decay; } /*if ((episode % 1000) == 0) * { * Console.WriteLine($"\nepsilon: {epsilon}, epoch: {episode}/{episodes}"); * Console.WriteLine($"Pocet naucenych stavov: {a.PocetUlozenychStavov}\n"); * Console.WriteLine($"apples: {a.apples}/{a.apple_count}, mines: {a.mines}/{a.mine_count}"); * }*/ // log only testing phase if (training == false) { var apple = (a.apples / (float)a.apple_count) * 100.0f; var mine = (a.mines / (float)a.mine_count) * 100.0f; log_file.WriteLine($"{episode};{score};{step};{watch.Elapsed.TotalMilliseconds * 1000};{apple};{mine};{is_end}"); //Console.WriteLine($"{apple};{mine};{is_end}"); stat.append(apple, mine, is_end); } } return(stat); }