public override ReinforcementLearningAction ActionForState(ReinforcementLearningState state, ReinforcementLearningQStore qFunction)
 {
     Random r = new Random();
     if (r.NextDouble() > EPSILON)
     {
         return new InvertedPendulumAction2((float)r.NextDouble());
     }
     else
     {                
         ReinforcementLearningAction ret;
         float value;
         qFunction.GetBestActionAndUtilityForState(state, out ret, out value);
         return ret;
     }
     
 }
        public virtual ReinforcementLearningAction ActionForState(ReinforcementLearningState state, ReinforcementLearningQStore qFunction)
        {
            ReinforcementLearningAction[] actions;
            float[] probs;
            ActionProbabilities(state, qFunction, out actions, out probs);

            float rand = (float)r.NextDouble();

            int i = 0;
            float sum = 0;
            do
            {
                sum += probs[i]; ++i;
            }
            while ((i < actions.Length) && (sum < rand));

            return actions[i - 1];
        }
        public override void ActionProbabilities(ReinforcementLearningState state, ReinforcementLearningQStore qFunction, out ReinforcementLearningAction[] actions, out float[] probabilities)
        {
            ReinforcementLearningAction action;
            float utility;

            actions = state.GetActions();
            probabilities = new float[actions.Length];

            qFunction.GetBestActionAndUtilityForState(state, out action, out utility);

            for (int i = 0; i < actions.Length; ++i)
            {
                if (actions[i].Equals(action))
                {
                    probabilities[i] = Epsilon;
                }
                else probabilities[i] = (1 - Epsilon) / (actions.Length - 1);
            }            
        }
 public new ReinforcementLearningAction ActionForState(ReinforcementLearningState state, ReinforcementLearningQStore qFunction)
 {
     ReinforcementLearningAction action;
     float utility;
     qFunction.GetBestActionAndUtilityForState(state, out action, out utility);
     return action;
 }
 public abstract void ActionProbabilities(ReinforcementLearningState state, ReinforcementLearningQStore qFunction, out ReinforcementLearningAction[] actions, out float[] probabilities);
 public void Initialize(ReinforcementLearningEnvironment env)
 {
     qFunction = CreateQStore(env);
 }
        public override void ActionProbabilities(ReinforcementLearningState state, ReinforcementLearningQStore qFunction, out ReinforcementLearningAction[] actions, out float[] probabilities)
        {
            actions = state.GetActions();
            probabilities = new float[actions.Length];
            float maxq = float.MinValue;
            for(int i=0; i<actions.Length; ++i)
            {
                float q = qFunction.Evaluate(state, actions[i]);
                probabilities[i] = q;
                if (q > maxq) maxq = q;
            }

            float sum = 0;

            for (int i = 0; i < actions.Length; ++i)
            {
                probabilities[i] = (float)Math.Exp((probabilities[i] - maxq) / Temperature);
                sum += probabilities[i];                
            }

            for (int i = 0; i < actions.Length; ++i)
            {
                probabilities[i] /= sum;                
            }
        }
 public override void ActionProbabilities(ReinforcementLearningState state, ReinforcementLearningQStore qFunction, out ReinforcementLearningAction[] actions, out float[] probabilities)
 {
     throw new NotImplementedException();
 }