Example #1
0
        public bool PolicyImprovement()
        {
            bool    policyStable = true;
            IPlayer fakePlayer   = new GridWoldPlayer();

            foreach (GameStateWithAction gameStateWithAction in gameStateWithActions)
            {
                Vector2Int currentPos = gameStateWithAction.gameState.GetPos();
                ICell      cell       = worldCells[currentPos.x][currentPos.y];
                if ((goalX == currentPos.x && goalY == currentPos.y) || cell.GetCellType() == CellType.Obstacle)
                {
                    continue;
                }

                fakePlayer.SetCell(worldCells[currentPos.x][currentPos.y]);

                Intent intentToPlay = gameStateWithAction.intent;

                int    maxValue            = 0;
                Intent intentWithBestValue = GetIntentWithBestValue(gameStateWithAction, fakePlayer).intent;
                gameStateWithAction.intent = intentWithBestValue;

                if (intentToPlay != intentWithBestValue)
                {
                    Debug.Log("State " + gameStateWithAction.gameState.GetPos() + " has " + intentToPlay + " redirect to " + intentWithBestValue);
                    policyStable = false;
                }
            }

            if (!policyStable)
            {
                Debug.Log(">>>>>>>>> Call PolicyEvaluation");
                PolicyEvaluation();
            }

            return(policyStable);
        }
Example #2
0
        public void ValueIteration()
        {
            IPlayer fakePlayer = new GridWoldPlayer();

            float tetha = 0.1f;
            float delta;
            float gamma = 0.8f;

            int safeLoopIteration = 0;

            do
            {
                ++safeLoopIteration;
                delta = 0;

                foreach (GameStateWithAction gameStateWithAction in gameStateWithActions)
                {
                    Vector2Int currentPos  = gameStateWithAction.gameState.GetPos();
                    ICell      currentCell = worldCells[currentPos.x][currentPos.y];
                    fakePlayer.SetCell(currentCell);

                    if ((goalX == currentPos.x && goalY == currentPos.y) || currentCell.GetCellType() == CellType.Obstacle)
                    {
                        continue;
                    }

                    float temp = gameStateWithAction.gameState.GetValue();

                    float newValue = 0;

                    for (int i = 1; i < 5; ++i)
                    {
                        if (GridWORDOGame.CanMove(fakePlayer, worldCells, (Intent)i))
                        {
                            IGameState nextGameState = gameStateWithAction.GetNextState(gameStates, (Intent)i);

                            float nextReward = worldCells[nextGameState.GetPos().x][nextGameState.GetPos().y].GetReward();
                            float tempValue  = 1 * nextReward + (gamma * nextGameState.GetValue());

                            if (tempValue > newValue)
                            {
                                newValue = tempValue;
                            }
                        }
                    }

                    gameStateWithAction.gameState.SetValue(newValue);

                    delta = Math.Max(delta, Math.Abs(temp - gameStateWithAction.gameState.GetValue()));
                }
            } while (delta >= tetha && safeLoopIteration < 5000);

            if (safeLoopIteration >= 5000)
            {
                Debug.LogError("Safe loop iteration trigger, exit valueIteration");
                return;
            }

            foreach (GameStateWithAction gameStateWithAction in gameStateWithActions)
            {
                Vector2Int currentPos = gameStateWithAction.gameState.GetPos();
                fakePlayer.SetCell(worldCells[currentPos.x][currentPos.y]);

                gameStateWithAction.intent = GetIntentWithBestValue(gameStateWithAction, fakePlayer).intent;
            }
        }
Example #3
0
        public void InitIntent(int maxX, int maxY, int goalX, int goalY, List <List <ICell> > cells)
        {
            IPlayer fakePlayer = new GridWoldPlayer();

            for (int i = 0; i < maxX; ++i)
            {
                for (int j = 0; j < maxY; ++j)
                {
                    GridWorldState gridWorldState = new GridWorldState();
                    gridWorldState.SetPos(new Vector2Int(i, j));

                    if (i == goalX && j == goalY)
                    {
                        gridWorldState.SetValue(1000);
                    }
                    else
                    {
                        gridWorldState.SetValue(0);
                    }

                    gameStates.Add(gridWorldState);

                    fakePlayer.SetCell(cells[i][j]);

                    int  move;
                    bool canMove = false;
                    do
                    {
                        move = Random.Range(1, 5);
                        switch ((Intent)move)
                        {
                        case Intent.WantToGoTop:
                            canMove = (bool)fakePlayer?.WantToGoTop(cells);
                            break;

                        case Intent.WantToGoBot:
                            canMove = (bool)fakePlayer?.WantToGoBot(cells);
                            break;

                        case Intent.WantToGoLeft:
                            canMove = (bool)fakePlayer?.WantToGoLeft(cells);
                            break;

                        case Intent.WantToGoRight:
                            canMove = (bool)fakePlayer?.WantToGoRight(cells);
                            break;
                        }
                    } while (!canMove);

                    Dictionary <Intent, float> intentByProba = new Dictionary <Intent, float> {
                        { (Intent)move, 1 }
                    };

                    gameStateWithActions.Add(new GameStateWithAction
                    {
                        intent            = (Intent)move,
                        gameState         = gridWorldState,
                        intentProbability = intentByProba
                    });
                }
            }
        }