public void GetBestActionAndUtilityForState(ReinforcementLearningState state, out ReinforcementLearningAction action, out float value)
        {
            
            InvertedPendulumAction2 bestaction = null;
            float bestq = float.MinValue;
            for (int i = 0; i < 10; ++i)
            {
                InvertedPendulumAction2 act = new InvertedPendulumAction2((float)r.NextDouble() * 2 - 1);
                float q = Evaluate(state, act);
                if (q > bestq)
                {
                    bestq = q;
                    bestaction = act;
                }
            }

            action = bestaction;
            value = bestq;   
        }
 public void ReplaceValue(InvertedPendulumState state, InvertedPendulumAction2 action, float value)
 {
     //TODO! delta??*
     igmn.Train(new Vector(new double[] { state.a, state.w, action.action, value }));
 }