public double getQval(State state, Action currplayer1Act, Action player2act)
        {
            double currQ_val;
            var    Qkey = new keyTable(state, currplayer1Act, player2act);

            if (!Q.TryGetValue(Qkey, out currQ_val))
            {
                currQ_val = 1.0;
            }

            return(currQ_val);
        }
Esempio n. 2
0
        public void UpdateQValue(State currState, State nextState, Action currplayer1Act, Action player2act, double currPlayer1reward)
        {
            double currQ_val;
            var    Qkey = new keyTable(currState, currplayer1Act, player2act);

            if (!Q.TryGetValue(Qkey, out currQ_val))
            {
                //Default Q Value is 1.0
                currQ_val = 1.0;
            }

            //Update value using foe function
            var newStateVal = FoeQVal.GetValue(nextState, this);

            var newQVal = (1 - learning_Rate) * currQ_val + learning_Rate * (currPlayer1reward + gama * newStateVal);

            Q[Qkey] = newQVal;
        }
Esempio n. 3
0
        private double getMaxQval(State nextState)
        {
            var maxQval = double.MinValue;

            foreach (Action action in Enum.GetValues(typeof(Action)))
            {
                double currQ_val;
                var    Qkey = new keyTable(nextState, action);

                if (!Q.TryGetValue(Qkey, out currQ_val))
                {
                    //Default Q Value is 1.0
                    currQ_val = 1.0;
                }

                maxQval = maxQval > currQ_val ? maxQval : currQ_val;
            }

            return(maxQval);
        }
Esempio n. 4
0
        public double UpdateQValue(State state, State nextState, Action currplayer1Act, double currPlayer1reward)
        {
            double currQ_val;
            var    Qkey = new keyTable(state, currplayer1Act);

            if (!Q.TryGetValue(Qkey, out currQ_val))
            {
                //Default Q Value is 1.0
                currQ_val = 1.0;
            }

            //Update value table with current state
            var nextStateV = getMaxQval(nextState);

            //Q value update
            var updatedQValue = (1 - learning_Rate) * currQ_val + learning_Rate * (currPlayer1reward + gama * nextStateV);

            Q[Qkey] = updatedQValue;

            return(updatedQValue);
        }
        private double getMaxQval(State state)
        {
            var maxQval = double.MinValue;

            foreach (Action currplayer1Act in Enum.GetValues(typeof(Action)))
            {
                foreach (Action Player2_ac in Enum.GetValues(typeof(Action)))
                {
                    double currQ_val;
                    var    Qkey = new keyTable(state, currplayer1Act, Player2_ac);

                    if (!Q.TryGetValue(Qkey, out currQ_val))
                    {
                        currQ_val = 1.0;
                    }

                    maxQval = maxQval > currQ_val ? maxQval : currQ_val;
                }
            }

            return(maxQval);
        }
Esempio n. 6
0
        public void UpdateQValue(State currState, State nextState, Action currplayer1Act, Action player2act, double currPlayer1reward, double Player2reward, bool done)
        {
            var Qkey             = new keyTable(currState, currplayer1Act, player2act);
            var currQ_val        = getCurrPQvalue(currState, currplayer1Act, player2act);
            var player2CurrQ_val = getPlayer2Qval(currState, currplayer1Act, player2act);

            Tuple <double, double> newStateVal = new Tuple <double, double>(0.0, 0.0);

            if (!done)
            {
                //Update value using foe function
                newStateVal = CorrValue.GetValue(nextState, this);
            }

            var nextQValueCurrent  = (1 - learningrate) * currQ_val + learningrate * (currPlayer1reward + gama * newStateVal.Item1);
            var nextQValueOpponent = (1 - learningrate) * player2CurrQ_val + learningrate * (Player2reward + gama * newStateVal.Item2);

            Q[Qkey]        = nextQValueCurrent;
            Player2Q[Qkey] = nextQValueOpponent;

            //Decay learning_Rate
            ++totalcount;
            learningrate = learningrate / (1 + 0.0000000001 * ++totalcount) > 0.001 ? learningrate / (1 + 0.0000000001 * ++totalcount) : 0.001;
        }