public Vector2 ChooseAction() { //Choose a random number n between 0 and 1 float v = Random.Range(0f, 1f); Vector2 action = new Vector2(-1, -1); if (v < controller.epsilon) { //If n is less than the epsilon choose a random action action = controller.isSquare ? ActionSquare.ChooseRandomAction() : ActionHex.ChooseRandomAction((int)(controller.agentPosition.y) % 2); controller.epsilon = controller.epsilon > controller.minEplison ? controller.epsilon - controller.epsilonDecay : controller.minEplison; print("Random Action Chosen: " + action); } else { //Otherwise choose an action based on the max value in the QTable of the current state(platform position) float maxValue = -999; FindMaxValue((int)(controller.agentPosition.x + controller.agentPosition.y * n), out action, out maxValue); print("Action Chosen: " + action); } controller.steps++; if (controller.steps > controller.maxSteps) { controller.end = true; } return(action); }
//Move the Agent and update the Q-Value for the current state public void PerformAction(Vector2 action) { Vector2 agentPos = controller.agentPosition; int indexAction = -1; indexAction = controller.isSquare ? ActionSquare.GetIndexByVector(action) : ActionHex.GetIndexByVector(action, (int)(agentPos.y) % 2); float reward; float maxNextValue; //Transfrom 2D position (x,y) in 1D index (x+y*n) int curretStateIndex = (int)(agentPos.x + agentPos.y * controller.n); //Update Agent position (current position + new action) agentPos += action; int nextStateIndex = (int)(agentPos.x + agentPos.y * controller.n); //Update Agent position in the controller controller.agentPosition += action; int x = (int)agentPos.x; int y = (int)agentPos.y; if (controller.checkIndexBoundaries(x, y)) { controller.previousPlatform = controller.currentPlatform; controller.currentPlatform = controller.platforms[x, y]; Vector3 pos = controller.currentPlatform.transform.position + Vector3.back * 0.1f; controller.agent.transform.position = pos; reward = controller.currentPlatform.reward; controller.totalReward += reward; //The min reward can be taken only once if (controller.currentPlatform.minRewardPoint) { controller.currentPlatform.DisableMinReward(); } Vector2 any; FindMaxValue(nextStateIndex, out any, out maxNextValue); } else { //Out of the platform boundaries controller.end = true; reward = -10; maxNextValue = 0; print("Out of Boundary"); } float oldQvalue = qTable[curretStateIndex, indexAction]; //Bellman Equation qTable[curretStateIndex, indexAction] = oldQvalue + controller.learningRate * (reward + controller.discountRate * maxNextValue - oldQvalue); controller.end = controller.end ? controller.end : controller.currentPlatform.CheckGameState(); }
public void FindMaxValue(int index, out Vector2 action, out float maxValue) { maxValue = -999; int indexAction = -1; //Future action that maximize the reward Vector2 maxValueAction = new Vector2(-1, -1); for (int i = 0; i < controller.nActions; i++) { if (maxValue < qTable[index, i]) { maxValue = qTable[index, i]; indexAction = i; } } action = controller.isSquare ? ActionSquare.GetVectorByIndex(indexAction) : ActionHex.GetVectorByIndex(indexAction, (int)(controller.agentPosition.y) % 2); }