예제 #1
0
    public IEnumerator Learn(ReinforcementProblem problem,
                             int numIterations,
                             float alpha,
                             float gamma,
                             float explorationRandomness,
                             float walkLength)
    {
        if (store == null)
        {
            yield break;
        }
        //get a random state
        GameState state = problem.GetRandomState();

        for (int i = 0; i < numIterations; i++)
        {
            //yield return null for the current frame to keep running
            yield return(null);

            //validate against the length of the walk
            if (Random.value < walkLength)
            {
                state = problem.GetRandomState();
            }
            //get the available actions from the current game state
            GameAction[] actions = problem.GetAvailableActions(state);
            GameAction   action;

            //get an action depending on the value of the randomness exploration
            if (Random.value < explorationRandomness)
            {
                action = GetRandomAction(actions);
            }
            else
            {
                action = store.GetBestAction(state);
            }

            //calculate the new state for taking the selected action on the current state and the resulting reward value
            float     reward   = 0f;
            GameState newState = problem.TakeAction(state, action, ref reward);

            //get the q value, given the current game, and take action, and the best
            //action for the new state that was computed before
            float      q          = store.GetQValue(state, action);
            GameAction bestAction = store.GetBestAction(newState);
            float      maxQ       = store.GetQValue(newState, bestAction);

            //apply the q-learning formula
            q = (1f - alpha) * 1 + alpha * (reward + gamma * maxQ);
            //store the computed q value, giving its parents as indices
            store.StoreQValue(state, action, q);
            state = newState;
        }
    }
예제 #2
0
    public IEnumerator Learn(
        ReinforcementProblem problem,
        int numIterations,
        float alpha,
        float gamma,
        float rho,
        float nu)
    {
        if (store == null)
        {
            yield break;
        }

        GameState state = problem.GetRandomState();

        for (int i = 0; i < numIterations; i++)
        {
            yield return(null);

            if (Random.value < nu)
            {
                state = problem.GetRandomState();
            }
            GameAction[] actions;
            actions = problem.GetAvailableActions(state);
            GameAction action;
            if (Random.value < rho)
            {
                action = GetRandomAction(actions);
            }
            else
            {
                action = store.GetBestAction(state);
            }
            float     reward = 0f;
            GameState newState;
            newState = problem.TakeAction(state, action, ref reward);
            float      q          = store.GetQValue(state, action);
            GameAction bestAction = store.GetBestAction(newState);
            float      maxQ       = store.GetQValue(newState, bestAction);
            // perform QLearning
            q = (1f - alpha) * q + alpha * (reward + gamma * maxQ);
            store.StoreQValue(state, action, q);
            state = newState;
        }
        yield break;
    }