Ejemplo n.º 1
0
    public IEnumerator Learn(ReinforcementProblem problem,
                             int numIterations,
                             float alpha,
                             float gamma,
                             float explorationRandomness,
                             float walkLength)
    {
        if (store == null)
        {
            yield break;
        }
        //get a random state
        GameState state = problem.GetRandomState();

        for (int i = 0; i < numIterations; i++)
        {
            //yield return null for the current frame to keep running
            yield return(null);

            //validate against the length of the walk
            if (Random.value < walkLength)
            {
                state = problem.GetRandomState();
            }
            //get the available actions from the current game state
            GameAction[] actions = problem.GetAvailableActions(state);
            GameAction   action;

            //get an action depending on the value of the randomness exploration
            if (Random.value < explorationRandomness)
            {
                action = GetRandomAction(actions);
            }
            else
            {
                action = store.GetBestAction(state);
            }

            //calculate the new state for taking the selected action on the current state and the resulting reward value
            float     reward   = 0f;
            GameState newState = problem.TakeAction(state, action, ref reward);

            //get the q value, given the current game, and take action, and the best
            //action for the new state that was computed before
            float      q          = store.GetQValue(state, action);
            GameAction bestAction = store.GetBestAction(newState);
            float      maxQ       = store.GetQValue(newState, bestAction);

            //apply the q-learning formula
            q = (1f - alpha) * 1 + alpha * (reward + gamma * maxQ);
            //store the computed q value, giving its parents as indices
            store.StoreQValue(state, action, q);
            state = newState;
        }
    }
Ejemplo n.º 2
0
    public IEnumerator Learn(
        ReinforcementProblem problem,
        int numIterations,
        float alpha,
        float gamma,
        float rho,
        float nu)
    {
        if (store == null)
        {
            yield break;
        }

        GameState state = problem.GetRandomState();

        for (int i = 0; i < numIterations; i++)
        {
            yield return(null);

            if (Random.value < nu)
            {
                state = problem.GetRandomState();
            }
            GameAction[] actions;
            actions = problem.GetAvailableActions(state);
            GameAction action;
            if (Random.value < rho)
            {
                action = GetRandomAction(actions);
            }
            else
            {
                action = store.GetBestAction(state);
            }
            float     reward = 0f;
            GameState newState;
            newState = problem.TakeAction(state, action, ref reward);
            float      q          = store.GetQValue(state, action);
            GameAction bestAction = store.GetBestAction(newState);
            float      maxQ       = store.GetQValue(newState, bestAction);
            // perform QLearning
            q = (1f - alpha) * q + alpha * (reward + gamma * maxQ);
            store.StoreQValue(state, action, q);
            state = newState;
        }
        yield break;
    }
Ejemplo n.º 3
0
        // Updates the store by investigating the problem
        // nu [0..99]
        public void QLearning(ReinforcementProblem problem, float alpha, float gamma, float rho, float nu)
        {
            // Pick a new state every once in a while
            Random rnd = new Random();

            float random = (float)rnd.NextDouble();

            if (random < nu)
            {
                current_state = problem.getRandomState();
            }

            List <Action> avaliableActions = ReinforcementProblem.getAvaliableActions(current_state);

            int numberofactions = rnd.Next(0, avaliableActions.Count);

            Action bestAction;

            random = (float)rnd.NextDouble();

            if (random < rho)
            {
                bestAction = avaliableActions[numberofactions];
            }
            else
            {
                bestAction = store.getBestAction(current_state);
            }

            Pair par = problem.takeAction(current_state, bestAction);

            // Get the current q from the store
            float Q = store.getQValue(current_state, bestAction);

            // Get the q of the best action from the new state
            float maxQ = store.getQValue(par.new_state, store.getBestAction(par.new_state));

            // Perform the q learning
            Q = (1 - alpha) * Q + alpha * (par.reward + gamma * maxQ);

            // Store the new Q-value
            store.storeQValue(current_state, bestAction, Q);

            // And update the state
            current_state = par.new_state;
        }
Ejemplo n.º 4
0
        private void Start()
        {
            // get all obstacles and save them into obstacles
            getBadPositions();

            // get all mid positions
            positions = new MidPoint[10, 10];
            calculate_mid_positions(positions);

            // save all mid positions to a file

            //var data_positions = JsonWriter.Serialize(positions);
            //var streamWriter = new StreamWriter(@"D:\midpositions.json");
            //streamWriter.Write(data_positions);
            //streamWriter.Close();

            // generate all possible states
            createStates();

            // initialize q-learning matrix
            store = new QValueStore(n_states, n_actions);

            // read q-learning matrix
            store.readMatrix();

            // initialize problem
            problem = new ReinforcementProblem();

            //store.printQValueStore();

            // one iteration test
            //QLearning(problem, 1, 1, 0, 0, 0);

            hp_text_object      = GameObject.Find("HP").GetComponent <Text>();
            energy_text_object  = GameObject.Find("Energy").GetComponent <Text>();
            actions_text_object = GameObject.Find("Actions").GetComponent <Text>();
        }