public double getQval(State state, Action currplayer1Act, Action player2act) { double currQ_val; var Qkey = new keyTable(state, currplayer1Act, player2act); if (!Q.TryGetValue(Qkey, out currQ_val)) { currQ_val = 1.0; } return(currQ_val); }
public void UpdateQValue(State currState, State nextState, Action currplayer1Act, Action player2act, double currPlayer1reward) { double currQ_val; var Qkey = new keyTable(currState, currplayer1Act, player2act); if (!Q.TryGetValue(Qkey, out currQ_val)) { //Default Q Value is 1.0 currQ_val = 1.0; } //Update value using foe function var newStateVal = FoeQVal.GetValue(nextState, this); var newQVal = (1 - learning_Rate) * currQ_val + learning_Rate * (currPlayer1reward + gama * newStateVal); Q[Qkey] = newQVal; }
private double getMaxQval(State nextState) { var maxQval = double.MinValue; foreach (Action action in Enum.GetValues(typeof(Action))) { double currQ_val; var Qkey = new keyTable(nextState, action); if (!Q.TryGetValue(Qkey, out currQ_val)) { //Default Q Value is 1.0 currQ_val = 1.0; } maxQval = maxQval > currQ_val ? maxQval : currQ_val; } return(maxQval); }
public double UpdateQValue(State state, State nextState, Action currplayer1Act, double currPlayer1reward) { double currQ_val; var Qkey = new keyTable(state, currplayer1Act); if (!Q.TryGetValue(Qkey, out currQ_val)) { //Default Q Value is 1.0 currQ_val = 1.0; } //Update value table with current state var nextStateV = getMaxQval(nextState); //Q value update var updatedQValue = (1 - learning_Rate) * currQ_val + learning_Rate * (currPlayer1reward + gama * nextStateV); Q[Qkey] = updatedQValue; return(updatedQValue); }
private double getMaxQval(State state) { var maxQval = double.MinValue; foreach (Action currplayer1Act in Enum.GetValues(typeof(Action))) { foreach (Action Player2_ac in Enum.GetValues(typeof(Action))) { double currQ_val; var Qkey = new keyTable(state, currplayer1Act, Player2_ac); if (!Q.TryGetValue(Qkey, out currQ_val)) { currQ_val = 1.0; } maxQval = maxQval > currQ_val ? maxQval : currQ_val; } } return(maxQval); }
public void UpdateQValue(State currState, State nextState, Action currplayer1Act, Action player2act, double currPlayer1reward, double Player2reward, bool done) { var Qkey = new keyTable(currState, currplayer1Act, player2act); var currQ_val = getCurrPQvalue(currState, currplayer1Act, player2act); var player2CurrQ_val = getPlayer2Qval(currState, currplayer1Act, player2act); Tuple <double, double> newStateVal = new Tuple <double, double>(0.0, 0.0); if (!done) { //Update value using foe function newStateVal = CorrValue.GetValue(nextState, this); } var nextQValueCurrent = (1 - learningrate) * currQ_val + learningrate * (currPlayer1reward + gama * newStateVal.Item1); var nextQValueOpponent = (1 - learningrate) * player2CurrQ_val + learningrate * (Player2reward + gama * newStateVal.Item2); Q[Qkey] = nextQValueCurrent; Player2Q[Qkey] = nextQValueOpponent; //Decay learning_Rate ++totalcount; learningrate = learningrate / (1 + 0.0000000001 * ++totalcount) > 0.001 ? learningrate / (1 + 0.0000000001 * ++totalcount) : 0.001; }