public void FirstActionIsRandom() { var environment = new TicTacToeEnvironment(new FirstAvailableSlotPlayer(BoardTile.O)); var innerPolicy = new MonteCarloTicTacToeAgent(BoardTile.X); var distinctFirstActions = Enumerable.Range(0, 10) .Select(_ => new ExploringStartPolicy(innerPolicy).GetAction(environment)) .Distinct(); Assert.Greater(distinctFirstActions.Count(), 1); }
public void Saves_And_Loads() { var agent = new MonteCarloTicTacToeAgent(BoardTile.X); var opponent = new FirstAvailableSlotPlayer(BoardTile.O); agent.Train(opponent, 1); var path = $"{nameof(MonteCarloTicTacToeAgentTests)}.{nameof(Saves_And_Loads)}.agent.json"; agent.SaveTrainedValues("asdf", path); var stateActionTable = PolicyFileIo.LoadStateActionTable(path); Assert.NotNull(stateActionTable); }