public virtual ReinforcementLearningAction Step(ReinforcementLearningEnvironment env, ReinforcementLearningPolicy policy)
        {
            
            ReinforcementLearningState state = env.State();

            float reward = env.Reward();

            ReinforcementLearningAction action = policy.ActionForState(state, qFunction);

            if ((prevState != null) && (prevAction != null))
            {
                ReinforcementLearningAction bestAction;
                float Qtp1max;
                qFunction.GetBestActionAndUtilityForState(state, out bestAction, out Qtp1max);

                float Qt = qFunction.Evaluate(prevState, prevAction);

                float deltaQ = Alpha * (reward + Discount * Qtp1max - Qt);

                if (float.IsNaN(deltaQ)) throw new Exception();

                qFunction.ModifyValue(prevState, prevAction, deltaQ);
            }

            prevAction = action;
            prevState = state;
           
            return action;
        }        
        public override ReinforcementLearningAction Step(ReinforcementLearningEnvironment env, ReinforcementLearningPolicy policy)
        {

            ReinforcementLearningState state = env.State();

            float reward = env.Reward();

            ReinforcementLearningAction action = policy.ActionForState(state, qFunction);

            if ((prevState != null) && (prevAction != null))
            {
                ReinforcementLearningAction bestAction;
                float Qtp1max;
                qFunction.GetBestActionAndUtilityForState(state, out bestAction, out Qtp1max);

                float Qt = qFunction.Evaluate(prevState, prevAction);

                float newQ = Alpha * (reward + Discount * Qtp1max - Qt) + Qt;

                InvertedPendulumESIGMNQStore qs = ((InvertedPendulumESIGMNQStore)qFunction);

                qs.ReplaceValue((InvertedPendulumState)prevState, (InvertedPendulumAction2)prevAction, newQ);
            }

            prevAction = action;
            prevState = state;

            return action;
        }