//Check available movements from a cell and returns one of them randomly. private Movement GetRandomMovement(int fromCell) { List <Movement> possibleMoves = LabryntRules.GetPossibleMovements(fromCell); int randomIndex = Random.Range(0, possibleMoves.Count); return(possibleMoves[randomIndex]); }
//Gives you the max Q-Value possible from a determined state (cell). private double CalculateEstimate(int nextCell) { double estimate = 0; if (!LabryntRules.IsFinalState(nextCell)) { Movement bestMove = GetBestMovement(nextCell); estimate = qMat.GetQValue(nextCell, bestMove); } return(estimate); }
//Checks the Q-Matrix and returns the best move from the available ones. //Best movement is decided by choosing the one with the highest q-value. If the highest value appears in more //than one move, chooses randomly between those moves private Movement GetBestMovement(int fromCell) { List <Movement> bestMovements = new List <Movement>(); List <Movement> possibleMovements = LabryntRules.GetPossibleMovements(fromCell); //There is at least one move from all cells (terminal states stop the algorithm before getting here) //so it is saved to compare with the rest. bestMovements.Add(possibleMovements[0]); double highestQValue = qMat.GetQValue(fromCell, possibleMovements[0]); //Compare to other values, keep a list with the movements with highest q-value. for (int x = 1; x < possibleMovements.Count; x++) { double qValue = qMat.GetQValue(fromCell, possibleMovements[x]); if (highestQValue == qValue) { bestMovements.Add(possibleMovements[x]); } else if (highestQValue < qValue) { bestMovements.Clear(); bestMovements.Add(possibleMovements[x]); highestQValue = qValue; } } //If more than one has the highest value, choos randomly between them. int index = 0; if (bestMovements.Count > 1) { index = Random.Range(0, bestMovements.Count); } return(bestMovements[index]); }
// Update is called once per frame void Update() { if (!paused) { timer += Time.deltaTime; float dt = speedSelector.GetInverse(); if (timer > dt) { //Calculate how many choices we need according to the speed selector. int loops = (int)(timer / dt); while (loops > 0) { if (restart) { InitialiseAlgorithm(); restart = false; newEpisode = true; } if (newEpisode) { SetValuesForNewEpisode(); newEpisode = false; } Movement currentMove; //Choose action (a) based on policy (p) if (ShouldExplore()) { currentMove = GetRandomMovement(currentCell); } else { currentMove = GetBestMovement(currentCell); } //Observe reward (r) int reward = LabryntRules.GetReward(currentCell, currentMove); double oldQValue = qMat.GetQValue(currentCell, currentMove); //Do an estimation of the best Q-value to obtain from next state. int nextCell = LabryntRules.GetLandingCell(currentCell, currentMove); double estimation = CalculateEstimate(nextCell); //Recalculate Q-value for the current cell based on estimation. double updatedQValue = CalculateNewQValue(oldQValue, alpha, gamma, reward, estimation); qMat.SetQValue(currentCell, currentMove, updatedQValue); cumulativeReward += reward; currentCell = nextCell; UpdatePlayerPositionInDisplay(); //Now we update the display with the new player position. //If the player landed in an final state cell, we need to update epsilon and start a new episode. if (LabryntRules.IsFinalState(currentCell)) { //Change epsilon parameter so it shifts from exploration to explotation gradually between episodes. if (epsilon > 0.3) { epsilon *= epsilonDecay1; } if (epsilon < 0.3) { epsilon *= epsilonDecay2; } newEpisode = true; } loops--; } timer -= dt; } } }