private float findVopt(RLGameState rlGameState) { //Get Vopt For new State float Vopt = -Mathf.Infinity; foreach (Direction dir in System.Enum.GetValues(typeof(Direction))) { var key = stateActionToString(rlGameState.GetCurrentState(), dir); if (!qvalues.ContainsKey(key)) { continue; // qvalues [key] = 0; } if (Vopt < qvalues[key]) { Vopt = qvalues[key]; } } if (Vopt == -Mathf.Infinity) { Vopt = 0; } return(Vopt); }
public void MakeMove() { // Get all the current game states RLGameState rlGameState = new RLGameState(); List <int> currstate = rlGameState.GetCurrentState(); // MakeChoice. Direction ourChoice = makeDeterministicChoice(currstate); float randFloat = Random.Range(0.0f, 1.0f); if (randFloat < epsilon) { List <Direction> epsilonList = new List <Direction>(); if (currstate[1] == 0) { epsilonList.Add(Direction.FRONT); } if (currstate[3] == 0) { epsilonList.Add(Direction.LEFT); } if (currstate[4] == 0) { epsilonList.Add(Direction.RIGHT); } if (currstate[6] == 0) { epsilonList.Add(Direction.BACK); } epsilonList.Add(Direction.STAY); int rand = Random.Range(0, epsilonList.Count); ourChoice = epsilonList[rand]; } bool successfullymoved = successfullyMovedPos(ourChoice); manualMoveAllObjects(); //Get Vopt For new State float Vopt = findVopt(rlGameState); // if Dead -> Save the q values to a text file. (and later reload it.) // Get Reward float r = 0; if (ourChoice == Direction.FRONT) { countFront++; r += 7; } else if (ourChoice == Direction.BACK) { countFront--; r -= 9; } else if (ourChoice == Direction.STAY) { r -= 8; } // discourage dying by penalizing a lot of points if (GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().IsDead()) { r -= 300; } float eta = 0.01f; var key = stateActionToString(currstate, ourChoice); if (!qvalues.ContainsKey(key)) { qvalues[key] = 0; } // Q learning Function qvalues[key] = (1 - eta) * qvalues[key] + eta * (r + discountFactor * Vopt); if (GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().IsDead()) { saveDictionary(); saveIteration(); saveHighestScore(); saveScore(); GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().RestartGame(); } Moved = true; }
public void MakeMove() { // foreach (var i in weight) { // Debug.Log (i); // } //CurrentState <Alywn's function> RLGameState rlGameState = new RLGameState(); List <int> currstate = rlGameState.GetCurrentFAState(); // foreach (var i in currstate) { // Debug.Log (i); // } //First Change: MakeChoice. Qvalue_Direction_Pair qvalueDirectionPair = FindVOpt(currstate, weight); Direction ourChoice = qvalueDirectionPair.dir; float randFloat = Random.Range(0.0f, 1.0f); if (randFloat < epsilon) { List <Direction> epsilonList = new List <Direction>(); if (currstate[1] == 0) { epsilonList.Add(Direction.FRONT); } if (currstate[3] == 0) { epsilonList.Add(Direction.LEFT); } if (currstate[4] == 0) { epsilonList.Add(Direction.RIGHT); } if (currstate[6] == 0) { epsilonList.Add(Direction.BACK); } epsilonList.Add(Direction.STAY); int rand = Random.Range(0, epsilonList.Count); ourChoice = epsilonList[rand]; } bool successfullymoved = successfullyMovedPos(ourChoice); manualMoveAllObjects(); //Get Vopt For new State Qvalue_Direction_Pair Vopt = FindVOpt(rlGameState.GetCurrentFAState(), weight); float vopt = Vopt.qvalue; float r = 0; //+8 for forward, if (ourChoice == Direction.FRONT /* && successfullymoved*/) { r += 7; } else if (ourChoice == Direction.BACK /* && successfullymoved*/) { r -= 9; } else if (ourChoice == Direction.STAY) { r -= 8; } if (GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().IsDead()) { r -= 300; } float eta = 0.01f; float constant = eta * (qvalueDirectionPair.qvalue - (r + vopt)); weight = updateWeight(weight, addActionToCurrState(currstate, ourChoice), constant); // var weightstring = ""; // foreach (var dickhead in weight) { // weightstring += dickhead; // } // Debug.Log ("WeightSTRING: " + weightstring); if (GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().IsDead()) { //Save Weight Vector at death saveWeight(); saveScore(); GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().RestartGame(); } Moved = true; }
public void MakeMove() { //CurrentState <Alywn's function> RLGameState rlGameState = new RLGameState(); List <int> currstate = rlGameState.GetCurrentState(); //First Change: MakeChoice. Direction ourChoice = makeDeterministicChoice(currstate); float randFloat = Random.Range(0.0f, 1.0f); if (randFloat < epsilon) { List <Direction> epsilonList = new List <Direction>(); if (currstate[1] == 0) { epsilonList.Add(Direction.FRONT); } if (currstate[3] == 0) { epsilonList.Add(Direction.LEFT); } if (currstate[4] == 0) { epsilonList.Add(Direction.RIGHT); } if (currstate[6] == 0) { epsilonList.Add(Direction.BACK); } epsilonList.Add(Direction.STAY); int rand = Random.Range(0, epsilonList.Count); ourChoice = epsilonList [rand]; } bool successfullymoved = successfullyMovedPos(ourChoice); manualMoveAllObjects(); //Get Vopt For new State float Vopt = findVopt(rlGameState); // Nick Told us // if Dead -> Save the q values to a text file. (and later reload it.) //Get Reward <Alywn's function> float r = 0; //+8 for forward, if (ourChoice == Direction.FRONT /* && successfullymoved*/) { countFront++; r += 7; //-9 for backward, } else if (ourChoice == Direction.BACK /* && successfullymoved*/) { countFront--; r -= 9; } else if (ourChoice == Direction.STAY) { r -= 8; } /*if (!successfullymoved){ * r -= 50; * }*/ //-1000 if dead, if (GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().IsDead()) { r -= 300; } //+10 every 5 streets /*if (countFront == 5) { * r += 50; * countFront = 0; * } */ //-D if the road infront of the player is a river (Distance to closest log) //Debug.Log("REWARD: " + r); //Calculate Eta? float eta = 0.01f; var key = stateActionToString(currstate, ourChoice); if (!qvalues.ContainsKey(key)) { qvalues [key] = 0; } //Q learning Function //qvalues[key] -= eta * (qvalues[key] - (r + discountFactor * Vopt)); qvalues[key] = (1 - eta) * qvalues[key] + eta * (r + discountFactor * Vopt); //Debug.Log (temp + " == " + qvalues [key]); if (GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl> ().IsDead()) { saveDictionary(); saveIteration(); saveHighestScore(); saveScore(); GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().RestartGame(); } Moved = true; }
public void FindBestMove() { RLGameState rlGameState = new RLGameState(); List <int> currstate = rlGameState.GetCurrentFAState(); Qvalue_Direction_Pair qvalueDirectionPair = FindVOpt(currstate, weight); // initialization for the colliders playerCollider = gameState.GetPlayer(); carColliders = gameState.GetCarColliders(playerCollider, lookRadius); logColliders = gameState.GetLogColliders(playerCollider, lookRadius); var depth = depthSetting; var bestMove = recurseFunction(0, depth, playerCollider.transform.position, true); var move = (Direction)System.Enum.Parse(typeof(Direction), bestMove[1]); movePlayer(move); GameObject.FindGameObjectWithTag("Player").GetComponent <PlayerControl>().clip(); manualMoveAllObjects(); var switchup = gameState.isPlayerDead(playerCollider, logColliders, carColliders); var r = reward(move, switchup); List <int> some = rlGameState.GetCurrentFAState(); Qvalue_Direction_Pair Vopt = FindVOpt(some, weight); float vopt = Vopt.qvalue; //state | move | reward | statedash // 0,0,0,0,0,0,0,0,0,0,0,0|1|0.233| 0,0,0,0,0,0,0,0,0 float eta = 0.01f; float constant = eta * (qvalueDirectionPair.qvalue - (r + vopt)); weight = updateWeight(weight, addActionToCurrState(currstate, move), constant); saveWeight(); string state = ""; for (int i = 0; i < currstate.Count; i++) { if (i == currstate.Count - 1) { state += currstate [i]; } else { state += currstate[i] + ","; } } state += "|"; state += (int)move + ""; state += "|"; state += r + ""; state += "|"; for (int i = 0; i < some.Count; i++) { if (i == (some.Count - 1)) { state += some [i]; } else { state += some[i] + ","; } } saveData(state); clearStates(); // after finding a best move, clear all states and refind another Moved = true; }