예제 #1
0
 public abstract bool IsGoalState(State s);
예제 #2
0
 public override double TransitionProbability(Action a, State sTag)
 {
     RaceCarState sTagApply = new RaceCarState(this);
     sTagApply.Apply((VelocityAction)a, true);
     if (sTag.Equals(sTagApply))
         return RaceTrack.ACTION_SUCCESS_PROBABILITY;
     RaceCarState sTagNoApply = new RaceCarState(this);
     sTagNoApply.Apply((VelocityAction)a, false);
     if (sTag.Equals(sTagNoApply))
         return 1 - RaceTrack.ACTION_SUCCESS_PROBABILITY;
     return 0.0;
 }
 public double ValueAt(State s)
 {
     //your code here
     return ViByS[s];
 }
        private double update(State s)
        {
            double maxV = ViByS[s];
            Action maxA = null;
            foreach (Action a in m_dDomain.Actions)
            {
                double sum = 0;
                foreach (State stag in s.Successors(a))
                    sum += s.TransitionProbability(a, stag) * ViByS[stag];
                double tmp = s.Reward(a) + (m_dDomain.DiscountFactor * sum);

               // save max
                if ((tmp >= maxV) && (!s.Apply(a).Equals(s)))
                {
                    maxV = tmp;
                    maxA = a;
                }
            }
            if (maxA != null)
            {
                double delta = maxV - ViByS[s];
                ViByS[s] = maxV;
                ViBySActions[s] = maxA;
                return Math.Abs(delta);
            }
            return 0;
        }
예제 #5
0
 public abstract Action GetAction(State s);
 public override Action GetAction(State s)
 {
     //your code here
     return ViBySActions[s];
 }
예제 #7
0
 public abstract double TransitionProbability(Action a, State sTag);
예제 #8
0
 public override Action GetAction(State s)
 {
     int idx = RandomGenerator.Next(m_lActions.Count);
     return m_lActions[idx];
 }
 // calc the formula for Vi+1(s)
 private double updateValueIter(State s)
 {
     double maxV = Double.MinValue;
     Action maxA = null;
     foreach (Action a in m_dDomain.Actions)
     {
         // clac formula for action a
         double sum = 0;
         foreach (State stag in s.Successors(a))
             sum += s.TransitionProbability(a, stag) * ViByS[stag];
         double tmp = s.Reward(a) + m_dDomain.DiscountFactor * sum;
         // save max
         if((tmp >= maxV)){
             maxV = tmp;
             maxA = a;
         }
     }
     if (maxA != null)
     {
         Vi_1ByS[s] = maxV;
         ViBySActions[s] = maxA;
         return Math.Abs(Vi_1ByS[s] - ViByS[s]);
     }
     return 0;
 }
예제 #10
0
 private Action epsilonGreedy(State s,double depsilon)
 {
     if ( RandomGenerator.NextDouble() > depsilon)
         return m_dDomain.Actions.ElementAt(RandomGenerator.Next(m_dDomain.Actions.Count()));
     else
         return findMaxQA(s);
 }
예제 #11
0
 private Action findMaxQA(State j)
 {
     List<Action> actions = new List<Action>();
     double maxQA = double.MinValue;
     foreach (Action a in m_dDomain.Actions)
         if (Q[j][a] > maxQA)
             maxQA = Q[j][a];
     foreach (Action a in m_dDomain.Actions)
         if (Q[j][a] == maxQA)
             actions.Add(a);
     int idx = RandomGenerator.Next(actions.Count);
     return actions[idx];
 }
예제 #12
0
 private double MaxR(State stag)
 {
     double maxR = double.MinValue;
     foreach (Action a in m_dDomain.Actions)
         maxR = Math.Max(maxR,Q[stag][a] );
     return maxR;
 }
예제 #13
0
 public override bool IsGoalState(State s)
 {
     return IsGoalState((RaceCarState)s);
 }