Beispiel #1
0
        public IntentWithValueState GetIntentWithBestValue(GameStateWithAction gameStateWithAction, IPlayer fakePlayer)
        {
            float  maxValue            = float.MinValue;
            Intent intentWithBestValue = Intent.Nothing;

            for (int i = 1; i < 5; ++i)
            {
                bool canMove = GridWORDOGame.CanMove(fakePlayer, worldCells, (Intent)i);

                if (canMove)
                {
                    IGameState gameState = gameStateWithAction.GetNextState(gameStates, (Intent)i);

                    if (maxValue < gameState.GetValue())
                    {
                        maxValue            = gameState.GetValue();
                        intentWithBestValue = (Intent)i;
                    }
                }
            }

            return(new IntentWithValueState
            {
                intent = intentWithBestValue,
                value = maxValue
            });
        }
Beispiel #2
0
        public void ValueIteration()
        {
            IPlayer fakePlayer = new GridWoldPlayer();

            float tetha = 0.1f;
            float delta;
            float gamma = 0.8f;

            int safeLoopIteration = 0;

            do
            {
                ++safeLoopIteration;
                delta = 0;

                foreach (GameStateWithAction gameStateWithAction in gameStateWithActions)
                {
                    Vector2Int currentPos  = gameStateWithAction.gameState.GetPos();
                    ICell      currentCell = worldCells[currentPos.x][currentPos.y];
                    fakePlayer.SetCell(currentCell);

                    if ((goalX == currentPos.x && goalY == currentPos.y) || currentCell.GetCellType() == CellType.Obstacle)
                    {
                        continue;
                    }

                    float temp = gameStateWithAction.gameState.GetValue();

                    float newValue = 0;

                    for (int i = 1; i < 5; ++i)
                    {
                        if (GridWORDOGame.CanMove(fakePlayer, worldCells, (Intent)i))
                        {
                            IGameState nextGameState = gameStateWithAction.GetNextState(gameStates, (Intent)i);

                            float nextReward = worldCells[nextGameState.GetPos().x][nextGameState.GetPos().y].GetReward();
                            float tempValue  = 1 * nextReward + (gamma * nextGameState.GetValue());

                            if (tempValue > newValue)
                            {
                                newValue = tempValue;
                            }
                        }
                    }

                    gameStateWithAction.gameState.SetValue(newValue);

                    delta = Math.Max(delta, Math.Abs(temp - gameStateWithAction.gameState.GetValue()));
                }
            } while (delta >= tetha && safeLoopIteration < 5000);

            if (safeLoopIteration >= 5000)
            {
                Debug.LogError("Safe loop iteration trigger, exit valueIteration");
                return;
            }

            foreach (GameStateWithAction gameStateWithAction in gameStateWithActions)
            {
                Vector2Int currentPos = gameStateWithAction.gameState.GetPos();
                fakePlayer.SetCell(worldCells[currentPos.x][currentPos.y]);

                gameStateWithAction.intent = GetIntentWithBestValue(gameStateWithAction, fakePlayer).intent;
            }
        }