/* * returns the maximum Q value From the q table according to the next state */ float maxQ(ObjectState nextState) { float maxValue = float.MinValue; bool[] possibleActions = controller.getAvailableActions(nextState.getState()); float[] Qactions = (float[])Q[nextState.getId()]; for (int i = 0; i < possibleActions.Length; i++) { if (possibleActions[i]) { float value = Qactions[i]; if (value > maxValue) { maxValue = value; } } } /* bool[] possibleActions = controller.getAvailableActions(nextState.getState()); * float[] Qactions = (float[])Q[nextState.getId()]; * if (possibleActions.Length > 0) { * maxValue= * }*/ return(maxValue); } // end of maxQ
/* * returns the action with the highest q value */ /*int bestAction(ObjectState s) { * * updateStates(); * * int action = 0; * float maxValue = float.MinValue; * * bool[] possibleActions = controller.getAvailableActions(s.getState()); * float[] Qactions = (float[])Q[s.getId()]; * for (int i = 0; i < possibleActions.Length; i++) { * if (possibleActions[i]) { * float value = Qactions[i]; * if (value > maxValue) { // > or >= * maxValue = value; * action = i; * } * } * } * return action; * } // end of bestAction */ /* * best action test */ public int bestAction(ObjectState s) { int action = 0; float maxValue = float.MinValue; bool[] possibleActions = controller.getAvailableActions(s.getState()); List <int> actions = new List <int>(); for (int i = 0; i < possibleActions.Length; i++) { if (possibleActions[i]) { actions.Add(i); } } float[] Qactions = (float[])Q[s.getId()]; // add an element of randomness for exploration if (Random.Range(0.0f, 1.0f) < epsilon) { int random = Random.Range(0, actions.Count); action = actions[random]; } else { // get a list of all the max q. // in case that the max q is shared with more than one action List <int> bestActions = new List <int>(); foreach (int a in actions) { float tempValue = Qactions[a]; if (tempValue > maxValue) { maxValue = tempValue; bestActions.Clear(); bestActions.Add(a); } else if (tempValue == maxValue) { bestActions.Add(a); } } // choose a random action from the best ones if (bestActions.Count > 1) { int random = Random.Range(0, bestActions.Count); action = bestActions[random]; } else if (bestActions.Count == 1) { action = bestActions[0]; } } return(action); }// end of ba
} // end of Constructor /* * updates the q table according to the selected action and the current state * rl = reinforcement learning */ public void rl(ObjectState oldState, int action, ObjectState newState) { updateStates(); // add state in the array if new state // Q(s,a) = Q(s,a) + a * (r(s,a) + g * maxQ(s') - Q(s,a)) float q = getQvalue(oldState.getId(), action); //float q = Q[state,action]; float maxq = maxQ(newState); //float maxQ = maxQ (state); int r = controller.reward(newState.getState(), action); //float value = q + alpha * (r + gamma * maxq - q);//float value = q + alpha * (r + gamma * maxQ - q); float value = q + alpha * (r + gamma * 1 - q); Debug.Log("q: " + q + " maxq: " + maxq); Debug.Log("reward: " + r + " value: " + value); updateQvalue(oldState.getId(), action, value); // q[oldstate,action] = value addMemory(oldState.getId(), action); maintainMemory(); totalReward += r; // repeat q process for memory /*if (memory.Count > 1 && (Mathf.Abs(r)>10)) { * for (int i = memory.Count - 2; i > 0; i--) { * if (r - memoryRewardRed > 0) { * r -= memoryRewardRed; * } * else if (r + memoryRewardRed < 0) { * r += memoryRewardRed; * } * else { * r = 0; * return; * } * //r = 0; //test * * int[] newestMem = (int[])memory[i + 1]; // new memory * int newMemoryState = newestMem[0]; // new state * * int[] oldestMem = (int[])memory[i]; // old memory * int oldMemoryState = oldestMem[0]; // old state * int oldMemoryAction = oldestMem[1]; // old action * * float qMemory = getQvalue(oldMemoryState, oldMemoryAction); //float q = Q[state,action]; * float maxQMemory = maxQ(newMemoryState); //float maxQ = maxQ (state); * * float memoryValue = qMemory + alpha * (r + gamma * maxQMemory - qMemory);//float value = q + alpha * (r + gamma * maxQ - q); * * updateQvalue(oldMemoryState, oldMemoryAction, memoryValue); // q[oldstate,action] = value * } * } */ } // end of rl
}// end of ba int randomAction(ObjectState s) { updateStates(); int state = 0; bool[] possibleActions = controller.getAvailableActions(s.getState()); for (int i = 0; i < possibleActions.Length; i++) { int random = Random.Range(0, possibleActions.Length - 1); if (possibleActions[random]) { state = random; break; } } return(state); } // end of randomaction