private Intent GetBestIntent(State currentState) { float max = float.MinValue; Intent bestIntent = currentState.ticTacToePolicy; for (int i = 0; i < 9; ++i) { if (ticTacToeController.GridIsEmpty(ticTacToeController.GetPositionFromIntent((Intent)i), currentState.currentGrid)) { Dictionary <State, float> possibleStates = GetPossibleStatesFromIntent(currentState, (Intent)i); float totalValue = 0; foreach (var possibleState in possibleStates) { totalValue += possibleState.Key.stateValue * possibleState.Value; } if (totalValue > max) { max = totalValue; bestIntent = (Intent)i; } } } return(bestIntent); }
private Intent GetRandomValidIntent(Cell[][] currentGrid) { Intent rdmIntent = Intent.Tile0; bool intentValid; int iter = 0; do { ++iter; rdmIntent = (Intent)Random.Range(0, 9); intentValid = ticTacToeController.GridIsEmpty(ticTacToeController.GetPositionFromIntent(rdmIntent), currentGrid); } while (!intentValid && iter < 20); return(rdmIntent); }
private Cell[][] ApplyIntentToGrid(Intent intent, Cell[][] grid, CellType type) { switch (intent) { case Intent.Tile0: grid[0][2].cellTicTacToeType = type; break; case Intent.Tile1: grid[1][2].cellTicTacToeType = type; break; case Intent.Tile2: grid[2][2].cellTicTacToeType = type; break; case Intent.Tile3: grid[0][1].cellTicTacToeType = type; break; case Intent.Tile4: grid[1][1].cellTicTacToeType = type; break; case Intent.Tile5: grid[2][1].cellTicTacToeType = type; break; case Intent.Tile6: grid[0][0].cellTicTacToeType = type; break; case Intent.Tile7: grid[1][0].cellTicTacToeType = type; break; case Intent.Tile8: grid[2][0].cellTicTacToeType = type; break; } return(grid); }
private bool MonteCarloImprovement(State currentState) { bool policyStable = true; foreach (var state in _allStates) { Intent tempPolicy = state.ticTacToePolicy; state.ticTacToePolicy = GetBestIntent(state); if (tempPolicy != state.ticTacToePolicy) { policyStable = false; } } if (!policyStable) { MonteCarloPrediction(currentState, episodesNumber, everyVisit, onPolicy); } return(policyStable); }
private State GetNextState(State currentState, Intent intent) { Cell[][] grid = CopyGrid(currentState.currentGrid); grid = ApplyIntentToGrid(intent, grid, CellType.Circle); /*for (int i = 0; i < grid.Length; i++) * { * for (int j = 0; j < grid[0].Length; j++) * { * Debug.Log(grid[i][j].cellTicTacToeType); * } * }*/ State nextState = GetStateFromGrid(grid); if (nextState != null) { //Debug.Log("pasnew"); return(nextState); } else { //Debug.Log("new"); Intent rdmIntent = GetRandomValidIntent(grid); if (ticTacToeController.GridIsEmpty(ticTacToeController.GetPositionFromIntent(rdmIntent), grid)) { Vector3 player1Pos = ticTacToeController.GetPositionFromIntent(rdmIntent); grid[(int)player1Pos.x][(int)player1Pos.z].cellTicTacToeType = CellType.Cross; } State newState = new State(); newState.currentGrid = grid; newState.ticTacToePolicy = GetRandomValidIntent(grid); newState.stateValue = 0; _allStates.Add(newState); return(newState); } }
private Dictionary <State, float> GetPossibleStatesFromIntent(State currentState, Intent intent) { Dictionary <State, float> possibleStates = new Dictionary <State, float>(); Cell[][] simulatedGrid = CopyGrid(currentState.currentGrid); simulatedGrid = ApplyIntentToGrid(intent, simulatedGrid, CellType.Circle); int i = 0; int possibilities = 0; for (i = 0; i < 9; i++) { if (ticTacToeController.GridIsEmpty(ticTacToeController.GetPositionFromIntent((Intent)i), simulatedGrid)) { ++possibilities; } } for (i = 0; i < 9; i++) { if (ticTacToeController.GridIsEmpty(ticTacToeController.GetPositionFromIntent((Intent)i), simulatedGrid)) { Cell[][] copyGrid = CopyGrid(simulatedGrid); copyGrid = ApplyIntentToGrid((Intent)i, copyGrid, CellType.Cross); State possibleState = GetStateFromGrid(copyGrid); if (possibleState == null) { possibleState = new State() { stateValue = 0.0f, currentGrid = copyGrid, ticTacToePolicy = GetRandomValidIntent(copyGrid), nS = 0.0f, returnS = 0.0f }; } possibleStates.Add(possibleState, 1.0f / possibilities); } } return(possibleStates); }
private bool MonteCarloPrediction(State currentState, int iteration, bool everyVisit = false, bool onPolicy = false) { //initialisation foreach (var state in _allStates) { state.nS = 0; state.returnS = 0; } //simulation for (int i = 0; i < iteration; ++i) { _simulatedSARs = new List <SAR>(); SimulateGame(currentState); float g = 0; for (int j = _simulatedSARs.Count - 2; j >= 0; --j) { g += _simulatedSARs[j + 1].reward; bool isContained = false; if (!everyVisit) { for (int k = 0; k < j; k++) { if (_simulatedSARs[k] == _simulatedSARs[j]) { isContained = true; break; } } } if (!everyVisit && !isContained || everyVisit) { _simulatedSARs[j].state.returnS += g; ++_simulatedSARs[j].state.nS; } } if (onPolicy) { foreach (var state in _allStates) { state.stateValue = state.returnS / state.nS; } /*foreach (var state in _allStates) * { * state.ticTacToePolicy = GetBestIntent(state); * }*/ bool policyStable = true; foreach (var state in _allStates) { if (state == currentState) { Debug.Log("updateStatePolicy"); } Intent tempPolicy = state.ticTacToePolicy; state.ticTacToePolicy = GetBestIntent(state); if (tempPolicy != state.ticTacToePolicy) { policyStable = false; } } /*currentState.ticTacToePolicy = GetBestIntent(currentState); * return true;*/ return(policyStable); } } if (!onPolicy) { foreach (var state in _allStates) { state.stateValue = state.returnS / state.nS; } bool policyStable = true; foreach (var state in _allStates) { if (state == currentState) { Debug.Log("updateStatePolicy"); } Intent tempPolicy = state.ticTacToePolicy; state.ticTacToePolicy = GetBestIntent(state); if (tempPolicy != state.ticTacToePolicy) { policyStable = false; } } return(policyStable); /*currentState.ticTacToePolicy = GetBestIntent(currentState); * return true;*/ } return(false); }