示例#1
0
    public void UpdateQValue(GameState observedPlay, Cell.CellOwner owner)
    {
        int reward = GameManager.I.LearningSession.Reward(owner);

        if (owner == Cell.CellOwner.Agent1)
        {
            if (!QDictionaryAgent1.ContainsKey(observedPlay))
            {
                QDictionaryAgent1[observedPlay] = 0;
            }

            float newQ = QDictionaryAgent1[observedPlay]
                         + LearningRate * (reward + DiscountFactor * CheckBestQValueAtGameState(GameManager.I.GetCellsOwner(GameManager.I.Cells), owner) - QDictionaryAgent1[observedPlay]);

            QDictionaryAgent1[observedPlay] = newQ;
        }
        else
        {
            if (!QDictionaryAgent2.ContainsKey(observedPlay))
            {
                QDictionaryAgent2[observedPlay] = 0;
            }

            float newQ = QDictionaryAgent2[observedPlay]
                         + LearningRate * (reward + DiscountFactor * CheckBestQValueAtGameState(GameManager.I.GetCellsOwner(GameManager.I.Cells), owner) - QDictionaryAgent2[observedPlay]);

            QDictionaryAgent2[observedPlay] = newQ;
        }
    }
示例#2
0
    private float CheckBestQValueAtGameState(Cell.CellOwner[] owners, Cell.CellOwner owner)
    {
        float tempMaxQ1 = 0;
        float tempMaxQ2 = 0;

        for (int i = 0; i < 9; i++)
        {
            GameState gameState = new GameState(owners, i);

            if (owner == Cell.CellOwner.Agent1)
            {
                if (QDictionaryAgent1.ContainsKey(gameState))
                {
                    if (QDictionaryAgent1[gameState] > tempMaxQ1)
                    {
                        tempMaxQ1 = QDictionaryAgent1[gameState];
                    }
                }
            }
            else
            {
                if (QDictionaryAgent2.ContainsKey(gameState))
                {
                    if (QDictionaryAgent2[gameState] > tempMaxQ2)
                    {
                        tempMaxQ2 = QDictionaryAgent2[gameState];
                    }
                }
            }
        }

        return(owner == Cell.CellOwner.Agent1 ? tempMaxQ1 : tempMaxQ2);
    }
示例#3
0
    public GameState CheckBestActionAtGameState(Cell.CellOwner[] owners, Cell.CellOwner owner)
    {
        GameState        tempBestAction    = null;
        float            tempMaxQ          = int.MinValue;
        List <GameState> similarGameStates = new List <GameState>();

        if (owner == Cell.CellOwner.Agent1)
        {
            for (int i = 0; i < 9; i++)
            {
                GameState gameState = new GameState(owners, i);

                if (IsValidAction(gameState))
                {
                    if (QDictionaryAgent1.ContainsKey(gameState))
                    {
                        if (QDictionaryAgent1[gameState] == 0)
                        {
                            similarGameStates.Add(gameState);
                        }
                        else if (QDictionaryAgent1[gameState] > tempMaxQ)
                        {
                            tempMaxQ       = QDictionaryAgent1[gameState];
                            tempBestAction = gameState;
                        }
                    }
                    else
                    {
                        similarGameStates.Add(gameState);
                        QDictionaryAgent1[gameState] = 0;
                    }
                }
            }
        }
        else
        {
            for (int i = 0; i < 9; i++)
            {
                GameState gameState = new GameState(owners, i);

                if (IsValidAction(gameState))
                {
                    if (QDictionaryAgent2.ContainsKey(gameState))
                    {
                        if (QDictionaryAgent2[gameState] == 0)
                        {
                            similarGameStates.Add(gameState);
                        }
                        else if (QDictionaryAgent2[gameState] > tempMaxQ)
                        {
                            tempMaxQ       = QDictionaryAgent2[gameState];
                            tempBestAction = gameState;
                        }
                    }
                    else
                    {
                        similarGameStates.Add(gameState);
                        QDictionaryAgent2[gameState] = 0;
                    }
                }
            }
        }

        if (tempBestAction == null)
        {
            if (similarGameStates.Count > 0)
            {
                return(similarGameStates[Random.Range(0, similarGameStates.Count - 1)]);
            }
        }

        return(tempBestAction);
    }