public void Actions_NoneWhenWinnerOnBoard() { var expected = new TicTacToeAction[0]; var actual = new TicTacToeState(new[] { TicTacToePlayer.X, TicTacToePlayer.X, TicTacToePlayer.X, null, null, null, null, null, null }, TicTacToePlayer.O).Actions.ToList(); AssertActionsEqual(expected, actual); }
public void GivenASingleStateAndAction_HighestValueAction_ReturnsOnlyAction() { var values = new ActionValueTable(); var state = Board.CreateEmptyBoard(); var action = new TicTacToeAction(); values.Set(state, action, 1.0); Assert.AreEqual(action, values.HighestValueAction(state)); }
public void AverageReturnFrom_ReturnsAverage() { var returns = new Returns(); var emptyBoard = Board.CreateEmptyBoard(); var action = new TicTacToeAction(); returns.Add(emptyBoard, action, 2); returns.Add(emptyBoard, action, 4); Assert.AreEqual(3, returns.AverageReturnFrom(emptyBoard, action)); }
public void CreatingInvalidState_Throws() { _env.SetState(Board.CreateFromString("x |" + " |" + " ")); var placeXAtTopLeft = new TicTacToeAction { Position = 1, Tile = BoardTile.X }; Assert.Throws <InvalidOperationException>(() => _env.Step(placeXAtTopLeft)); }
public void RewardIs0_WhenGameIsNotOver() { var placeOAtMiddleRight = new TicTacToeAction { Position = 5, Tile = BoardTile.X }; // act var observation = _env.Step(placeOAtMiddleRight); // assert Assert.AreEqual(0, observation.Reward); }
public void DuplicateBoards_AreTreatedAsEqual() { var values = new ActionValueTable(); var emptyBoard1 = Board.CreateEmptyBoard(); var emptyBoard2 = Board.CreateEmptyBoard(); var action1 = new TicTacToeAction(); var action2 = new TicTacToeAction(); values.Set(emptyBoard1, action1, 1.0); values.Set(emptyBoard2, action2, 2.0); Assert.AreEqual(action2, values.HighestValueAction(emptyBoard1)); Assert.AreEqual(action2, values.HighestValueAction(emptyBoard2)); }
public void HighestValueAction_ReturnsHighestValueAction() { var values = new ActionValueTable(); var state = Board.CreateEmptyBoard(); var action0 = new TicTacToeAction(); var action1 = new TicTacToeAction(); var action2 = new TicTacToeAction(); values.Set(state, action0, 0); values.Set(state, action1, 1); values.Set(state, action2, 2); Assert.AreEqual(action2, values.HighestValueAction(state)); }
public void RewardIs1_ForWin() { _env.SetState(Board.CreateFromString("xx |" + "oo |" + " ")); var placeXAtTopRight = new TicTacToeAction { Position = 2, Tile = BoardTile.X }; // act var observation = _env.Step(placeXAtTopRight); // assert Assert.AreEqual(1, observation.Reward); }
public void RewardIsNegative1_ForLoss() { var board = Board.CreateFromString("xx |" + "oo |" + " ", BoardTile.O); _env.SetState(board); var placeOAtMiddleRight = new TicTacToeAction { Position = 5, Tile = BoardTile.O }; // act var observation = _env.Step(placeOAtMiddleRight); // assert Assert.AreEqual(-1, observation.Reward); }
public void DuplicateActions_AreTreatedAsEqual() { var values = new ActionValueTable(); var board = Board.CreateEmptyBoard(); var action1 = new TicTacToeAction { Position = 2, Tile = BoardTile.X }; var action2 = new TicTacToeAction { Position = 2, Tile = BoardTile.X }; values.Set(board, action1, 1.0); values.Set(board, action2, 2.0); var expectedHighestValueAction = new TicTacToeAction { Position = 2, Tile = BoardTile.X }; Assert.AreEqual(expectedHighestValueAction, values.HighestValueAction(board)); }
public TicTacToeEnvironmentStep Step(TicTacToeAction action) { try { DoAction(action); } catch (Exception e) { throw new InvalidOperationException(e.Message); } if (!CurrentState.IsValid()) { throw new InvalidOperationException($"Action caused invalid state: '{CurrentState}'"); } if (!CurrentState.IsGameOver) { DoAction(_opponent.GetAction(CurrentState)); } var reward = 0.0; if (CurrentState.Winner() == BoardTile.X) { reward = 1.0; } if (CurrentState.Winner() == BoardTile.O) { reward = -1.0; } return(new TicTacToeEnvironmentStep { Board = CurrentState, Reward = reward }); }
public void Step_DoesAgentAndOpponentMoves() { var placeXAtTopLeft = new TicTacToeAction { Position = 0, Tile = BoardTile.X }; var placeOAtTopMiddle = new TicTacToeAction { Position = 1, Tile = BoardTile.O }; _opponent.GetAction(Arg.Any <Board>()) .Returns(placeOAtTopMiddle); var expectedBoard = Board .CreateEmptyBoard() .DoAction(placeXAtTopLeft) .DoAction(placeOAtTopMiddle); // act var observation = _env.Step(placeXAtTopLeft); // assert Assert.AreEqual(expectedBoard.ToString(), observation.Board.ToString()); }
private void DoAction(TicTacToeAction action) { CurrentState = CurrentState.DoAction(action); }