} // end of Constructor /* * updates the q table according to the selected action and the current state * rl = reinforcement learning */ public void rl(ObjectState oldState, int action, ObjectState newState) { updateStates(); // add state in the array if new state // Q(s,a) = Q(s,a) + a * (r(s,a) + g * maxQ(s') - Q(s,a)) float q = getQvalue(oldState.getId(), action); //float q = Q[state,action]; float maxq = maxQ(newState); //float maxQ = maxQ (state); int r = controller.reward(newState.getState(), action); //float value = q + alpha * (r + gamma * maxq - q);//float value = q + alpha * (r + gamma * maxQ - q); float value = q + alpha * (r + gamma * 1 - q); Debug.Log("q: " + q + " maxq: " + maxq); Debug.Log("reward: " + r + " value: " + value); updateQvalue(oldState.getId(), action, value); // q[oldstate,action] = value addMemory(oldState.getId(), action); maintainMemory(); totalReward += r; // repeat q process for memory /*if (memory.Count > 1 && (Mathf.Abs(r)>10)) { * for (int i = memory.Count - 2; i > 0; i--) { * if (r - memoryRewardRed > 0) { * r -= memoryRewardRed; * } * else if (r + memoryRewardRed < 0) { * r += memoryRewardRed; * } * else { * r = 0; * return; * } * //r = 0; //test * * int[] newestMem = (int[])memory[i + 1]; // new memory * int newMemoryState = newestMem[0]; // new state * * int[] oldestMem = (int[])memory[i]; // old memory * int oldMemoryState = oldestMem[0]; // old state * int oldMemoryAction = oldestMem[1]; // old action * * float qMemory = getQvalue(oldMemoryState, oldMemoryAction); //float q = Q[state,action]; * float maxQMemory = maxQ(newMemoryState); //float maxQ = maxQ (state); * * float memoryValue = qMemory + alpha * (r + gamma * maxQMemory - qMemory);//float value = q + alpha * (r + gamma * maxQ - q); * * updateQvalue(oldMemoryState, oldMemoryAction, memoryValue); // q[oldstate,action] = value * } * } */ } // end of rl
/* * returns the maximum Q value From the q table according to the next state */ float maxQ(ObjectState nextState) { float maxValue = float.MinValue; bool[] possibleActions = controller.getAvailableActions(nextState.getState()); float[] Qactions = (float[])Q[nextState.getId()]; for (int i = 0; i < possibleActions.Length; i++) { if (possibleActions[i]) { float value = Qactions[i]; if (value > maxValue) { maxValue = value; } } } /* bool[] possibleActions = controller.getAvailableActions(nextState.getState()); * float[] Qactions = (float[])Q[nextState.getId()]; * if (possibleActions.Length > 0) { * maxValue= * }*/ return(maxValue); } // end of maxQ
/* * returns the action with the highest q value */ /*int bestAction(ObjectState s) { * * updateStates(); * * int action = 0; * float maxValue = float.MinValue; * * bool[] possibleActions = controller.getAvailableActions(s.getState()); * float[] Qactions = (float[])Q[s.getId()]; * for (int i = 0; i < possibleActions.Length; i++) { * if (possibleActions[i]) { * float value = Qactions[i]; * if (value > maxValue) { // > or >= * maxValue = value; * action = i; * } * } * } * return action; * } // end of bestAction */ /* * best action test */ public int bestAction(ObjectState s) { int action = 0; float maxValue = float.MinValue; bool[] possibleActions = controller.getAvailableActions(s.getState()); List <int> actions = new List <int>(); for (int i = 0; i < possibleActions.Length; i++) { if (possibleActions[i]) { actions.Add(i); } } float[] Qactions = (float[])Q[s.getId()]; // add an element of randomness for exploration if (Random.Range(0.0f, 1.0f) < epsilon) { int random = Random.Range(0, actions.Count); action = actions[random]; } else { // get a list of all the max q. // in case that the max q is shared with more than one action List <int> bestActions = new List <int>(); foreach (int a in actions) { float tempValue = Qactions[a]; if (tempValue > maxValue) { maxValue = tempValue; bestActions.Clear(); bestActions.Add(a); } else if (tempValue == maxValue) { bestActions.Add(a); } } // choose a random action from the best ones if (bestActions.Count > 1) { int random = Random.Range(0, bestActions.Count); action = bestActions[random]; } else if (bestActions.Count == 1) { action = bestActions[0]; } } return(action); }// end of ba
// Update is called once per frame void Update() { compass = compass(); if (dummy) { dummyBehaviour(); return; } hunger -= 0.01f; if (stamina < 25 || hunger > 75) { moveSpeed = 2f; } if (hunger < 0) { // starved to death or sploded from too much food Dead(); } switch (currentState) { case State.Idle: rest(0.01f); break; case State.Seek: consumeStamina(0.01f); moveSpeed = SeekMoveSpeed; break; case State.Attack: consumeStamina(0.3f); moveSpeed = AttackMoveSpeed; break; } int[] states = generateStates(); ObjectState curState = stateArray.findState(states); if (curState == null) // check that state exist curState = stateArray.addState(states); if (oldState == null) { qAlgorithm.rl(curState, (int)currentAction, curState); } else if (oldState.getId() != curState.getId()) { qAlgorithm.rl(oldState, (int)currentAction, curState); } if (currentState == State.Dead) { dead = true; return; } bool[] availableActions = getAvailableActions((int)currentState); // get the available actions if (keyboard) {// if boolean variable is true use the keyboard commands Action action = keyboardActions(availableActions); selectAction((int)action); } else if (oldState!=curState) { Action action = intToAction(qAlgorithm.nextAction(curState));// get best action according to q table Debug.Log("selected action: " + action); selectAction((int)action); } oldState = curState; changeFieldOfView(); } // end of Update