//Best to return direction and the q_opt value public Qvalue_Direction_Pair FindVOpt(List <int> currstate, List <float> weight) { //For each direction calculate Q_opt(currstate+[Direction Vector]) and pic the largest. float maxValue = -Mathf.Infinity; Direction bestdir = Direction.STAY; foreach (Direction dir in System.Enum.GetValues(typeof(Direction))) { List <int> tempstate = copyState(currstate); //Supposed to Copy state tempstate = addActionToCurrState(tempstate, dir); float Q_opt = linearCombination(weight, tempstate); if (Q_opt > maxValue) { maxValue = Q_opt; bestdir = dir; } } Qvalue_Direction_Pair returnValue = new Qvalue_Direction_Pair(); returnValue.dir = bestdir; returnValue.qvalue = maxValue; Debug.Log(bestdir); return(returnValue); }
//Best to return direction and the q_opt value private Qvalue_Direction_Pair FindVOpt(List <int> currstate, List <float> weight) { //For each direction calculate Q_opt(currstate+[Direction Vector]) and pic the largest. float maxValue = 0.0f; Direction bestdir = Direction.STAY; foreach (Direction dir in System.Enum.GetValues(typeof(Direction))) { // Debug.Log("Here lies the curr state"); // string currstateString = ""; // foreach (var x in currstate) { // currstateString += "" + x; // } // Debug.Log(currstateString); List <int> tempstate = copyState(currstate); //Supposed to Copy state tempstate = addActionToCurrState(tempstate, dir); // Debug.Log("Here lies the second curr state"); // string currRound2stateString = ""; // foreach (var x in currstate) { // currRound2stateString += "" + x; // } // Debug.Log(currRound2stateString); // Debug.Log("Here lies the temp state"); // string tempString = ""; // foreach (var x in tempstate) { // tempString += "" + x; // } // Debug.Log(tempString); float Q_opt = linearCombination(weight, tempstate); if (Q_opt > maxValue) { maxValue = Q_opt; bestdir = dir; } } Qvalue_Direction_Pair returnValue = new Qvalue_Direction_Pair(); returnValue.dir = bestdir; returnValue.qvalue = maxValue; return(returnValue); }
public void MakeMove() { // foreach (var i in weight) { // Debug.Log (i); // } //CurrentState <Alywn's function> RLGameState rlGameState = new RLGameState(); List <int> currstate = rlGameState.GetCurrentFAState(); // foreach (var i in currstate) { // Debug.Log (i); // } //First Change: MakeChoice. Qvalue_Direction_Pair qvalueDirectionPair = FindVOpt(currstate, weight); Direction ourChoice = qvalueDirectionPair.dir; float randFloat = Random.Range(0.0f, 1.0f); if (randFloat < epsilon) { List <Direction> epsilonList = new List <Direction>(); if (currstate[1] == 0) { epsilonList.Add(Direction.FRONT); } if (currstate[3] == 0) { epsilonList.Add(Direction.LEFT); } if (currstate[4] == 0) { epsilonList.Add(Direction.RIGHT); } if (currstate[6] == 0) { epsilonList.Add(Direction.BACK); } epsilonList.Add(Direction.STAY); int rand = Random.Range(0, epsilonList.Count); ourChoice = epsilonList[rand]; } bool successfullymoved = successfullyMovedPos(ourChoice); manualMoveAllObjects(); //Get Vopt For new State Qvalue_Direction_Pair Vopt = FindVOpt(rlGameState.GetCurrentFAState(), weight); float vopt = Vopt.qvalue; float r = 0; //+8 for forward, if (ourChoice == Direction.FRONT /* && successfullymoved*/) { r += 7; } else if (ourChoice == Direction.BACK /* && successfullymoved*/) { r -= 9; } else if (ourChoice == Direction.STAY) { r -= 8; } if (GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().IsDead()) { r -= 300; } float eta = 0.01f; float constant = eta * (qvalueDirectionPair.qvalue - (r + vopt)); weight = updateWeight(weight, addActionToCurrState(currstate, ourChoice), constant); // var weightstring = ""; // foreach (var dickhead in weight) { // weightstring += dickhead; // } // Debug.Log ("WeightSTRING: " + weightstring); if (GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().IsDead()) { //Save Weight Vector at death saveWeight(); saveScore(); GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().RestartGame(); } Moved = true; }
public void FindBestMove() { RLGameState rlGameState = new RLGameState(); List <int> currstate = rlGameState.GetCurrentFAState(); Qvalue_Direction_Pair qvalueDirectionPair = FindVOpt(currstate, weight); // initialization for the colliders playerCollider = gameState.GetPlayer(); carColliders = gameState.GetCarColliders(playerCollider, lookRadius); logColliders = gameState.GetLogColliders(playerCollider, lookRadius); var depth = depthSetting; var bestMove = recurseFunction(0, depth, playerCollider.transform.position, true); var move = (Direction)System.Enum.Parse(typeof(Direction), bestMove[1]); movePlayer(move); GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().clip(); manualMoveAllObjects(); var switchup = gameState.isPlayerDead(playerCollider, logColliders, carColliders); var r = reward(move, switchup); List <int> some = rlGameState.GetCurrentFAState(); Qvalue_Direction_Pair Vopt = FindVOpt(some, weight); float vopt = Vopt.qvalue; //state | move | reward | statedash // 0,0,0,0,0,0,0,0,0,0,0,0|1|0.233| 0,0,0,0,0,0,0,0,0 float eta = 0.01f; float constant = eta * (qvalueDirectionPair.qvalue - (r + vopt)); weight = updateWeight(weight, addActionToCurrState(currstate, move), constant); saveWeight(); string state = ""; for (int i = 0; i < currstate.Count; i++) { if (i == currstate.Count - 1) { state += currstate [i]; } else { state += currstate[i] + ","; } } state += "|"; state += (int)move + ""; state += "|"; state += r + ""; state += "|"; for (int i = 0; i < some.Count; i++) { if (i == (some.Count - 1)) { state += some [i]; } else { state += some[i] + ","; } } saveData(state); clearStates(); // after finding a best move, clear all states and refind another Moved = true; }