public abstract bool IsGoalState(State s);
public override double TransitionProbability(Action a, State sTag) { RaceCarState sTagApply = new RaceCarState(this); sTagApply.Apply((VelocityAction)a, true); if (sTag.Equals(sTagApply)) return RaceTrack.ACTION_SUCCESS_PROBABILITY; RaceCarState sTagNoApply = new RaceCarState(this); sTagNoApply.Apply((VelocityAction)a, false); if (sTag.Equals(sTagNoApply)) return 1 - RaceTrack.ACTION_SUCCESS_PROBABILITY; return 0.0; }
public double ValueAt(State s) { //your code here return ViByS[s]; }
private double update(State s) { double maxV = ViByS[s]; Action maxA = null; foreach (Action a in m_dDomain.Actions) { double sum = 0; foreach (State stag in s.Successors(a)) sum += s.TransitionProbability(a, stag) * ViByS[stag]; double tmp = s.Reward(a) + (m_dDomain.DiscountFactor * sum); // save max if ((tmp >= maxV) && (!s.Apply(a).Equals(s))) { maxV = tmp; maxA = a; } } if (maxA != null) { double delta = maxV - ViByS[s]; ViByS[s] = maxV; ViBySActions[s] = maxA; return Math.Abs(delta); } return 0; }
public abstract Action GetAction(State s);
public override Action GetAction(State s) { //your code here return ViBySActions[s]; }
public abstract double TransitionProbability(Action a, State sTag);
public override Action GetAction(State s) { int idx = RandomGenerator.Next(m_lActions.Count); return m_lActions[idx]; }
// calc the formula for Vi+1(s) private double updateValueIter(State s) { double maxV = Double.MinValue; Action maxA = null; foreach (Action a in m_dDomain.Actions) { // clac formula for action a double sum = 0; foreach (State stag in s.Successors(a)) sum += s.TransitionProbability(a, stag) * ViByS[stag]; double tmp = s.Reward(a) + m_dDomain.DiscountFactor * sum; // save max if((tmp >= maxV)){ maxV = tmp; maxA = a; } } if (maxA != null) { Vi_1ByS[s] = maxV; ViBySActions[s] = maxA; return Math.Abs(Vi_1ByS[s] - ViByS[s]); } return 0; }
private Action epsilonGreedy(State s,double depsilon) { if ( RandomGenerator.NextDouble() > depsilon) return m_dDomain.Actions.ElementAt(RandomGenerator.Next(m_dDomain.Actions.Count())); else return findMaxQA(s); }
private Action findMaxQA(State j) { List<Action> actions = new List<Action>(); double maxQA = double.MinValue; foreach (Action a in m_dDomain.Actions) if (Q[j][a] > maxQA) maxQA = Q[j][a]; foreach (Action a in m_dDomain.Actions) if (Q[j][a] == maxQA) actions.Add(a); int idx = RandomGenerator.Next(actions.Count); return actions[idx]; }
private double MaxR(State stag) { double maxR = double.MinValue; foreach (Action a in m_dDomain.Actions) maxR = Math.Max(maxR,Q[stag][a] ); return maxR; }
public override bool IsGoalState(State s) { return IsGoalState((RaceCarState)s); }