public PawnMove TakeTurn(TicTacToeBoard t) { if (Policies.ContainsKey(t.GetBoardState())) { PawnMove moveToTake = Policies[t.GetBoardState()].OrderByDescending(x => x.Utility).ToList()[0]; if (Rnd.Range(1, 101) <= ExplorationRate) { moveToTake = t.GetAvailableMoves()[Rnd.Range(0, t.GetAvailableMoves().Count)]; } TakenMoves.Add(moveToTake); if (Spectation) { string res = "My available moves: \n"; foreach (PawnMove item in Policies[t.GetBoardState()].OrderByDescending(x => x.Utility).ToList()) { res += $"[{item.X},{item.Y}]({item.Utility}) \n"; } Console.WriteLine(res); } return(moveToTake); } else { Policies.Add(t.GetBoardState(), t.GetAvailableMoves()); PawnMove moveToTake; moveToTake = Policies[t.GetBoardState()].OrderByDescending(x => x.Utility).ToList()[0]; TakenMoves.Add(moveToTake); return(moveToTake); } }
public PawnMove TakeTurn(TicTacToeBoard t) { List <PawnMove> moves = t.GetAvailableMoves(); return(moves[Rnd.Range(0, moves.Count)]); }
public void PlayTicTacToe() { TicTacToeBoard t = new TicTacToeBoard(); RandomAI randy = new RandomAI(); RandomAI randy2 = new RandomAI(); QLearnAITICTACTOE Qubert = new QLearnAITICTACTOE(); QLearnAITICTACTOE Qubert1 = new QLearnAITICTACTOE(); int player1win = 0; int player2win = 0; int stalemate = 0; int player1winBM = 0; int player2winBM = 0; int stalemateBM = 0; List <WinRatio> winratios = new List <WinRatio>(); for (int i = 0; i < 1000000; i++) { if (i % 10000 == 0) { winratios.Add(new WinRatio() { Player1 = player1winBM, Player2 = player2winBM, Draw = stalemateBM }); stalemate++; player1winBM = 0; player2winBM = 0; stalemateBM = 0; } t = new TicTacToeBoard(); while (t.CheckForWinner() == 0) { PawnMove mov = randy.TakeTurn(t); t.TTTBoard[mov.X, mov.Y] = 1; if (t.CheckForWinner() == 0) { mov = Qubert.TakeTurn(t); t.TTTBoard[mov.X, mov.Y] = 2; } } if (t.CheckForWinner() == 1) { // t.DrawBoard(); // Console.WriteLine("Player 1 Win!"); // Console.WriteLine(); player1win++; player1winBM++; Qubert.Reward(-1); Qubert1.Reward(1); } if (t.CheckForWinner() == 2) { //t.DrawBoard(); //Console.WriteLine("Player 2 Win!"); //Console.WriteLine(); player2win++; player2winBM++; Qubert.Reward(1); Qubert1.Reward(-1); } if (t.CheckForWinner() == 3) { // t.DrawBoard(); //Console.WriteLine("Noone"); // Console.WriteLine(); stalemate++; stalemateBM++; Qubert.Reward(.1f); Qubert1.Reward(.1f); } } Console.WriteLine("Hello World!"); Console.WriteLine("Player 1 victories: " + player1win); Console.WriteLine("Player 2 victories: " + player2win); Console.WriteLine("Draws " + stalemate); string res = "Player 1 stats: "; foreach (WinRatio item in winratios) { res += item.Player1 + "/"; } Console.WriteLine(res); res = "Player 2 stats: "; foreach (WinRatio item in winratios) { res += item.Player2 + "/"; } Console.WriteLine(res); res = "Draws: "; Console.WriteLine(Qubert.Policies.Count + "/19663 Known boardstates"); Console.WriteLine("" + ((float)Qubert.Policies.Count / 19663) * 100 + "% discovered"); foreach (WinRatio item in winratios) { res += item.Draw + "/"; } Console.WriteLine(res); Qubert.ExplorationRate = 0; Qubert1.ExplorationRate = 0; Qubert.Spectation = true; for (int i = 0; i < 10; i++) { t = new TicTacToeBoard(); while (t.CheckForWinner() == 0) { t.DrawBoard(); Console.WriteLine("Choose X coordinate"); int playerX = int.Parse(Console.ReadLine()); Console.WriteLine("Choose Y coordinate"); int playerY = int.Parse(Console.ReadLine()); t.TTTBoard[playerX, playerY] = 1; if (t.CheckForWinner() == 0) { PawnMove mov = Qubert.TakeTurn(t); t.TTTBoard[mov.X, mov.Y] = 2; } } if (t.CheckForWinner() == 1) { t.DrawBoard(); Console.WriteLine("Player 1 Win!"); Console.WriteLine(); player1win++; player1winBM++; Qubert.Reward(-1); Qubert1.Reward(1); } if (t.CheckForWinner() == 2) { t.DrawBoard(); Console.WriteLine("Player 2 Win!"); Console.WriteLine(); player2win++; player2winBM++; Qubert.Reward(1); Qubert1.Reward(-1); } if (t.CheckForWinner() == 3) { t.DrawBoard(); Console.WriteLine("Noone"); Console.WriteLine(); stalemate++; stalemateBM++; Qubert.Reward(-0.01f); Qubert1.Reward(-0.01f); } } }