public override double update(StateTransition <stateType, actionType> transition)
        {
            stats.cumulativeReward += transition.reward;

            double q_s_a = value(transition.oldState, transition.action);

            if (!Qtable.ContainsKey(transition.newState))
            {
                Qtable.Add(transition.newState, new Dictionary <actionType, double>(actionComparer));
                foreach (actionType act in availableActions)
                {
                    Qtable[transition.newState].Add(act, defaultQ);
                }
            }
            if (!Qtable.ContainsKey(transition.oldState))
            {
                Qtable.Add(transition.oldState, new Dictionary <actionType, double>(actionComparer));
                foreach (actionType act in availableActions)
                {
                    Qtable[transition.oldState].Add(act, defaultQ);
                }
            }
            double maxNewQ = Qtable[transition.newState].Values.Max();

            Qtable[transition.oldState][transition.action] = q_s_a + alpha * (transition.reward + gamma * maxNewQ - q_s_a);
            double newVal = Qtable[transition.oldState][transition.action];

            return(Math.Abs(newVal - q_s_a));
        }
        private void updateQ(stateType state, actionType action)
        {
            double P = T.GetStateValueTable(state, action).Values.Sum();

            if (P == 0)
            {
                return;
            }

            double newQ = 0, maxQ = 0;
            double T_s_a_s2;

            foreach (stateType s2 in T.GetStateValueTable(state, action).Keys)
            {
                if (!Qtable.ContainsKey(s2))
                {
                    Qtable.Add(s2, new Dictionary <actionType, double>(actionComparer));
                    foreach (actionType act in availableActions)
                    {
                        Qtable[s2].Add(act, defaultQ);
                    }
                }
                maxQ = Qtable[s2].Values.Max();


                double thisT    = T.Get(state, action, s2);
                double thisR    = R.Get(state, action, s2).Average();
                double thisProb = thisT / P;
                newQ += thisProb * (thisR + gamma * maxQ);
            }

            if (!Qtable.ContainsKey(state))
            {
                Qtable.Add(state, new Dictionary <actionType, double>(actionComparer));
                foreach (actionType act in availableActions)
                {
                    Qtable[state].Add(act, defaultQ);
                }
            }

            Qtable[state][action] = newQ;
        }