private void Border_MouseUp(object sender, MouseEventArgs e) { if (CurrentBoard.IsGameOver) { return; } Network network = GetValidNetwork(); if (network != null) { Bot = new NeuralNetBot(Checker.Green, network, 0); } else if (Mode != GameMode.HumanVHuman) { return; } int column = (int)(sender as Border).GetValue(Grid.ColumnProperty); if (IsColumnFull(column)) { return; } if (Mode == GameMode.HumanVComputer) { double score; int column2; CurrentBoard.AddChecker(Checker, column); if (CurrentBoard.IsGameOver) { AddChecker(Checker, column, updateBoard: false); GameOverAnimation(); return; } Bot.SelectMove(CurrentBoard, out column2, out score); CurrentBoard.AddChecker(Board.Toggle(Checker), column2); BatchAddCheckers(Checker, new List <int> { column, column2 }, updateBoard: false); } else { AddChecker(Checker, column); Checker = Board.Toggle(Checker); } if (CurrentBoard.IsGameOver) { GameOverAnimation(); } }
public void SelectMove(Board board, out int column, out double score, int depth = 1) { // Lambda percent of the time, select a random move. if (LambdaType == LambdaType.Threshold && RANDOM.NextDouble() <= Lambda) { int[] cols = Enumerable.Range(0, board.Columns).Where(x => !board.IsColumnFull(x)).ToArray(); column = cols[RANDOM.Next(cols.Count())]; board.AddChecker(MyColor, column); score = EvaluateBoard(board); board.RemoveChecker(column); return; } List <double> columnEvaluations; recSelectMove(board, depth, true, out column, out score, out columnEvaluations); //we pick a move randomly, using a probability distribution such that the moves with the "best" board positions have a // higher probability of being selected higher values of Lambda mean choices will have more equal probability, even if they had different // low values of Lambda will have the opposite effect Lambda should be positive number. Otherwise, no exploration will take place. // If non-positive, just return the "best" move now, to avoid divide-by-zero type issues. if (LambdaType == LambdaType.ProbabilityDistribution && Lambda > 0) { double sum = 0.0; double[] weights = new double[columnEvaluations.Count]; for (int i = 0; i < columnEvaluations.Count; i++) { // the closer this column's evaluation to the "best", the // greater weight it will have double w = 1 / (Lambda + (score - columnEvaluations[i])); weights[i] = w; sum += w; } double r = RANDOM.NextDouble() * sum; int c; for (c = 0; c + 1 < weights.Length; c++) { r -= weights[c]; if (r <= 0) { break; } } column = c; score = columnEvaluations[c]; } }
/// <summary> /// Parses the validation set from connect-4 8-ply database. /// </summary> /// <returns>Validation set</returns> public static List <Example> Parse() { ValidationSet.Clear(); using (StringReader reader = new StringReader(Properties.Resources.connect_4)) { string line; while ((line = reader.ReadLine()) != null) { string[] values = line.Split(','); Board board = new Board(); for (int i = 0; i < values.Length - 1; ++i) { string x = values[i].ToLower().Trim(); Checker checker = Checker.Empty; switch (x) { case "x": checker = Checker.Blue; break; case "o": checker = Checker.Green; break; case "b": checker = Checker.Empty; break; } // Format of linear board data in connect-4.txt is bottom to top, left to right board.AddChecker(checker, i / 6); } // In connect-4.txt, it is X's turn to go next, which means // player O has just went. Player O == Green, therefore // we use Checker.Green in the following line. Example example = Transform.ToNormalizedExample(board, Checker.Green); string result = values[values.Length - 1].ToLower().Trim(); // Current values denote next player that goes will be guaranteed to win/lose/draw given he/she plays optimally... // We need to normalize this for our network... Ie, the label should instead denote if last player that went for given board position win/loses/ties if he/she plays optimally. GameResult gr = result == "win" ? GameResult.Loss : result == "loss" ? GameResult.Win : GameResult.Draw; example.Labels.Add(Transform.ToValue(gr)); ValidationSet.Add(example); } } return(ValidationSet); }
/// <summary> /// Parses the validation set from connect-4 8-ply database. /// </summary> /// <returns>Validation set</returns> public static List<Example> Parse() { ValidationSet.Clear(); using (StringReader reader = new StringReader(Properties.Resources.connect_4)) { string line; while ((line = reader.ReadLine()) != null) { string[] values = line.Split(','); Board board = new Board(); for (int i = 0; i < values.Length - 1; ++i) { string x = values[i].ToLower().Trim(); Checker checker = Checker.Empty; switch (x) { case "x": checker = Checker.Blue; break; case "o": checker = Checker.Green; break; case "b": checker = Checker.Empty; break; } // Format of linear board data in connect-4.txt is bottom to top, left to right board.AddChecker(checker, i/6); } // In connect-4.txt, it is X's turn to go next, which means // player O has just went. Player O == Green, therefore // we use Checker.Green in the following line. Example example = Transform.ToNormalizedExample(board, Checker.Green); string result = values[values.Length - 1].ToLower().Trim(); // Current values denote next player that goes will be guaranteed to win/lose/draw given he/she plays optimally... // We need to normalize this for our network... Ie, the label should instead denote if last player that went for given board position win/loses/ties if he/she plays optimally. GameResult gr = result == "win" ? GameResult.Loss : result == "loss" ? GameResult.Win : GameResult.Draw; example.Labels.Add(Transform.ToValue(gr)); ValidationSet.Add(example); } } return ValidationSet; }
public void recSelectMove(Board board, int depth, bool max, out int column, out double score, out List <double> columnEvaluations) { int bestX = 0; columnEvaluations = Enumerable.Repeat(double.NegativeInfinity, board.Columns).ToList(); double bestV = max ? Double.NegativeInfinity : Double.PositiveInfinity;; for (int x = 0; x < board.Columns; x++) { if (!board.IsColumnFull(x)) { board.AddChecker(MyColor, x); int col; double v; if (depth <= 1 || board.IsGameOver) { col = x; v = EvaluateBoard(board); } else { List <double> ignore = (new double[board.Columns]).ToList(); recSelectMove(board, depth - 1, !max, out col, out v, out ignore); } board.RemoveChecker(x); columnEvaluations[x] = v; if (v > bestV && max || v < bestV && !max) { bestV = v; bestX = col; } } } column = bestX; score = bestV; }
public void recSelectMove(Board board, int depth, bool max, out int column, out double score, out List<double> columnEvaluations) { int bestX = 0; columnEvaluations = Enumerable.Repeat(double.NegativeInfinity, board.Columns).ToList(); double bestV = max ? Double.NegativeInfinity : Double.PositiveInfinity; ; for (int x = 0; x < board.Columns; x++) { if (!board.IsColumnFull(x)) { board.AddChecker(MyColor, x); int col; double v; if (depth <= 1 || board.IsGameOver) { col = x; v = EvaluateBoard(board); } else { List<double> ignore = (new double[board.Columns]).ToList(); recSelectMove(board, depth - 1, !max, out col, out v, out ignore); } board.RemoveChecker(x); columnEvaluations[x] = v; if (v > bestV && max || v < bestV && !max) { bestV = v; bestX = col; } } } column = bestX; score = bestV; }
/// <summary> /// Simulate a game until completion. /// </summary> /// <param name="board">Starting board that the bots will play on. This need not be empty!</param> /// <param name="network">Neural network that provides the AI for gameplay.</param> /// <returns>Trace of game sequence, each board state stored as a Neural Net Example</returns> public List <Example> Play(Board board, Network network) { Bot allen = new NeuralNetBot(Checker.Blue, network); // <-- you know he will win :) Bot jason = new NeuralNetBot(Checker.Green, network); List <Example> trace = new List <Example>(); Turns = 0; Bot current = allen.MyColor == board.NextPlayer ? allen : jason; while (!board.IsGameOver) { int column; double score; current.SelectMove(board, out column, out score); Log(String.Format("{0} picks column {1} (Score: {2:f2})", (current == allen ? "Allen" : "Jason"), column, score)); board.AddChecker(current.MyColor, column); Example example = Transform.ToNormalizedExample(board, current.MyColor); example.Predictions.Add(score); trace.Add(example); current = (current == allen ? jason : allen); ++Turns; } if (Viewer != null) { Viewer.BatchAddCheckers(Checker.Blue, board.MoveHistory, completedBoard: board); } TotalTurns += Turns; Checker winner; if (board.TryGetWinner(out winner)) { //The game is over, there was a winner. //This means the last element of "trace" represents a won //board state (i.e. there is a four-in-a-row with color //'winner'). if (trace.Count > 0) { trace[trace.Count - 1].Predictions[0] = Transform.ToValue(GameResult.Win); } if (trace.Count > 1) { trace[trace.Count - 2].Predictions[0] = Transform.ToValue(GameResult.Loss); } if (winner == allen.MyColor) { Log("WINNER: Allen"); ++AllenWon; } else { Log("WINNER: Jason"); ++JasonWon; } } else { if (trace.Count > 0) { trace[trace.Count - 1].Predictions[0] = Transform.ToValue(GameResult.Draw); } if (trace.Count > 1) { trace[trace.Count - 2].Predictions[0] = Transform.ToValue(GameResult.Draw); } Log("TIE"); ++Ties; } ++TotalGames; Log(string.Format("Turns: {0} ({1:f2})", Turns, (double)TotalTurns / TotalGames)); Log(string.Format("Allen: {0}({1:f2}) Jason: {2}({3:f2}) Ties {4}({5:f2}) TOTAL: {6}", AllenWon, (double)AllenWon / TotalGames, JasonWon, (double)JasonWon / TotalGames, Ties, (double)Ties / TotalGames, TotalGames)); Log(""); List <Example> trace1 = new List <Example>(), trace2 = new List <Example>(); for (int i = 0; i < trace.Count; ++i) { if (i % 2 == 0) { trace1.Add(trace[i]); } else { trace2.Add(trace[i]); } } double lambda = .7; double alpha = .1; double gamma = .5; UpdateTraceLabels(trace1, lambda, alpha, gamma); UpdateTraceLabels(trace2, lambda, alpha, gamma); return(trace1.Union(trace2).ToList()); }
public void SelectMove(Board board, out int column, out double score, int depth = 1) { // Lambda percent of the time, select a random move. if (LambdaType == LambdaType.Threshold && RANDOM.NextDouble() <= Lambda) { int[] cols = Enumerable.Range(0, board.Columns).Where(x => !board.IsColumnFull(x)).ToArray(); column = cols[RANDOM.Next(cols.Count())]; board.AddChecker(MyColor, column); score = EvaluateBoard(board); board.RemoveChecker(column); return; } List<double> columnEvaluations; recSelectMove(board, depth, true, out column, out score, out columnEvaluations); //we pick a move randomly, using a probability distribution such that the moves with the "best" board positions have a // higher probability of being selected higher values of Lambda mean choices will have more equal probability, even if they had different // low values of Lambda will have the opposite effect Lambda should be positive number. Otherwise, no exploration will take place. // If non-positive, just return the "best" move now, to avoid divide-by-zero type issues. if (LambdaType == LambdaType.ProbabilityDistribution && Lambda > 0) { double sum = 0.0; double[] weights = new double[columnEvaluations.Count]; for (int i = 0; i < columnEvaluations.Count; i++) { // the closer this column's evaluation to the "best", the // greater weight it will have double w = 1 / (Lambda + (score - columnEvaluations[i])); weights[i] = w; sum += w; } double r = RANDOM.NextDouble() * sum; int c; for (c = 0; c + 1 < weights.Length; c++) { r -= weights[c]; if (r <= 0) break; } column = c; score = columnEvaluations[c]; } }
/// <summary> /// Simulate a game until completion. /// </summary> /// <param name="board">Starting board that the bots will play on. This need not be empty!</param> /// <param name="network">Neural network that provides the AI for gameplay.</param> /// <returns>Trace of game sequence, each board state stored as a Neural Net Example</returns> public List<Example> Play(Board board, Network network) { Bot allen = new NeuralNetBot(Checker.Blue, network); // <-- you know he will win :) Bot jason = new NeuralNetBot(Checker.Green, network); List<Example> trace = new List<Example>(); Turns = 0; Bot current = allen.MyColor == board.NextPlayer ? allen : jason; while (!board.IsGameOver) { int column; double score; current.SelectMove(board, out column, out score); Log(String.Format("{0} picks column {1} (Score: {2:f2})", (current == allen ? "Allen" : "Jason"), column, score)); board.AddChecker(current.MyColor, column); Example example = Transform.ToNormalizedExample(board, current.MyColor); example.Predictions.Add(score); trace.Add(example); current = (current == allen ? jason : allen); ++Turns; } if (Viewer != null) Viewer.BatchAddCheckers(Checker.Blue, board.MoveHistory,completedBoard:board); TotalTurns += Turns; Checker winner; if (board.TryGetWinner(out winner)) { //The game is over, there was a winner. //This means the last element of "trace" represents a won //board state (i.e. there is a four-in-a-row with color //'winner'). if (trace.Count > 0) trace[trace.Count - 1].Predictions[0] = Transform.ToValue(GameResult.Win); if (trace.Count > 1) trace[trace.Count - 2].Predictions[0] = Transform.ToValue(GameResult.Loss); if (winner == allen.MyColor) { Log("WINNER: Allen"); ++AllenWon; } else { Log("WINNER: Jason"); ++JasonWon; } } else { if (trace.Count > 0) trace[trace.Count - 1].Predictions[0] = Transform.ToValue(GameResult.Draw); if (trace.Count > 1) trace[trace.Count - 2].Predictions[0] = Transform.ToValue(GameResult.Draw); Log("TIE"); ++Ties; } ++TotalGames; Log(string.Format("Turns: {0} ({1:f2})", Turns, (double)TotalTurns / TotalGames)); Log(string.Format("Allen: {0}({1:f2}) Jason: {2}({3:f2}) Ties {4}({5:f2}) TOTAL: {6}", AllenWon, (double)AllenWon / TotalGames, JasonWon, (double)JasonWon / TotalGames, Ties, (double)Ties / TotalGames, TotalGames)); Log(""); List<Example> trace1 = new List<Example>(), trace2 = new List<Example>(); for (int i = 0; i < trace.Count; ++i) { if (i % 2 == 0) trace1.Add(trace[i]); else trace2.Add(trace[i]); } double lambda = .7; double alpha = .1; double gamma = .5; UpdateTraceLabels(trace1, lambda, alpha, gamma); UpdateTraceLabels(trace2, lambda, alpha, gamma); return trace1.Union(trace2).ToList(); }