public bool PolicyImprovement() { bool policyStable = true; IPlayer fakePlayer = new GridWoldPlayer(); foreach (GameStateWithAction gameStateWithAction in gameStateWithActions) { Vector2Int currentPos = gameStateWithAction.gameState.GetPos(); ICell cell = worldCells[currentPos.x][currentPos.y]; if ((goalX == currentPos.x && goalY == currentPos.y) || cell.GetCellType() == CellType.Obstacle) { continue; } fakePlayer.SetCell(worldCells[currentPos.x][currentPos.y]); Intent intentToPlay = gameStateWithAction.intent; int maxValue = 0; Intent intentWithBestValue = GetIntentWithBestValue(gameStateWithAction, fakePlayer).intent; gameStateWithAction.intent = intentWithBestValue; if (intentToPlay != intentWithBestValue) { Debug.Log("State " + gameStateWithAction.gameState.GetPos() + " has " + intentToPlay + " redirect to " + intentWithBestValue); policyStable = false; } } if (!policyStable) { Debug.Log(">>>>>>>>> Call PolicyEvaluation"); PolicyEvaluation(); } return(policyStable); }
public void ValueIteration() { IPlayer fakePlayer = new GridWoldPlayer(); float tetha = 0.1f; float delta; float gamma = 0.8f; int safeLoopIteration = 0; do { ++safeLoopIteration; delta = 0; foreach (GameStateWithAction gameStateWithAction in gameStateWithActions) { Vector2Int currentPos = gameStateWithAction.gameState.GetPos(); ICell currentCell = worldCells[currentPos.x][currentPos.y]; fakePlayer.SetCell(currentCell); if ((goalX == currentPos.x && goalY == currentPos.y) || currentCell.GetCellType() == CellType.Obstacle) { continue; } float temp = gameStateWithAction.gameState.GetValue(); float newValue = 0; for (int i = 1; i < 5; ++i) { if (GridWORDOGame.CanMove(fakePlayer, worldCells, (Intent)i)) { IGameState nextGameState = gameStateWithAction.GetNextState(gameStates, (Intent)i); float nextReward = worldCells[nextGameState.GetPos().x][nextGameState.GetPos().y].GetReward(); float tempValue = 1 * nextReward + (gamma * nextGameState.GetValue()); if (tempValue > newValue) { newValue = tempValue; } } } gameStateWithAction.gameState.SetValue(newValue); delta = Math.Max(delta, Math.Abs(temp - gameStateWithAction.gameState.GetValue())); } } while (delta >= tetha && safeLoopIteration < 5000); if (safeLoopIteration >= 5000) { Debug.LogError("Safe loop iteration trigger, exit valueIteration"); return; } foreach (GameStateWithAction gameStateWithAction in gameStateWithActions) { Vector2Int currentPos = gameStateWithAction.gameState.GetPos(); fakePlayer.SetCell(worldCells[currentPos.x][currentPos.y]); gameStateWithAction.intent = GetIntentWithBestValue(gameStateWithAction, fakePlayer).intent; } }
public void InitIntent(int maxX, int maxY, int goalX, int goalY, List <List <ICell> > cells) { IPlayer fakePlayer = new GridWoldPlayer(); for (int i = 0; i < maxX; ++i) { for (int j = 0; j < maxY; ++j) { GridWorldState gridWorldState = new GridWorldState(); gridWorldState.SetPos(new Vector2Int(i, j)); if (i == goalX && j == goalY) { gridWorldState.SetValue(1000); } else { gridWorldState.SetValue(0); } gameStates.Add(gridWorldState); fakePlayer.SetCell(cells[i][j]); int move; bool canMove = false; do { move = Random.Range(1, 5); switch ((Intent)move) { case Intent.WantToGoTop: canMove = (bool)fakePlayer?.WantToGoTop(cells); break; case Intent.WantToGoBot: canMove = (bool)fakePlayer?.WantToGoBot(cells); break; case Intent.WantToGoLeft: canMove = (bool)fakePlayer?.WantToGoLeft(cells); break; case Intent.WantToGoRight: canMove = (bool)fakePlayer?.WantToGoRight(cells); break; } } while (!canMove); Dictionary <Intent, float> intentByProba = new Dictionary <Intent, float> { { (Intent)move, 1 } }; gameStateWithActions.Add(new GameStateWithAction { intent = (Intent)move, gameState = gridWorldState, intentProbability = intentByProba }); } } }