private double update(State s) { double maxV = ViByS[s]; Action maxA = null; foreach (Action a in m_dDomain.Actions) { double sum = 0; foreach (State stag in s.Successors(a)) { sum += s.TransitionProbability(a, stag) * ViByS[stag]; } double tmp = s.Reward(a) + (m_dDomain.DiscountFactor * sum); // save max if ((tmp >= maxV) && (!s.Apply(a).Equals(s))) { maxV = tmp; maxA = a; } } if (maxA != null) { double delta = maxV - ViByS[s]; ViByS[s] = maxV; ViBySActions[s] = maxA; return(Math.Abs(delta)); } return(0); }
// calc the formula for Vi+1(s) private double updateValueIter(State s) { double maxV = Double.MinValue; Action maxA = null; foreach (Action a in m_dDomain.Actions) { // clac formula for action a double sum = 0; foreach (State stag in s.Successors(a)) { sum += s.TransitionProbability(a, stag) * ViByS[stag]; } double tmp = s.Reward(a) + m_dDomain.DiscountFactor * sum; // save max if ((tmp >= maxV)) { maxV = tmp; maxA = a; } } if (maxA != null) { Vi_1ByS[s] = maxV; ViBySActions[s] = maxA; return(Math.Abs(Vi_1ByS[s] - ViByS[s])); } return(0); }
public double Sarsa(double dEpsilon, int cTrials, int cStepsPerTrial) { double dSumRewards = 0.0; //your code here initV0(); initQ(); for (int j = 0; j < cTrials; j++) { State s = m_dDomain.StartState, stag; Action a = epsilonGreedy(s, dEpsilon); double alpha = 0.7; int t = 1; while (!m_dDomain.IsGoalState(s) && t <= cStepsPerTrial) { double r = s.Reward(a); dSumRewards += r; stag = s.Apply(a); Action atag = epsilonGreedy(stag, dEpsilon); Q[s][a] = Q[s][a] + alpha * (r + m_dDomain.DiscountFactor * Q[stag][atag] - Q[s][a]); s = stag; a = atag; t++; // alpha = alpha /t; } } foreach (State ss in m_dDomain.States) { ViBySActions[ss] = findMaxQA(ss); } Debug.WriteLine("\nDone computing ADR"); return(dSumRewards); }
private double CompOneExperimant(Policy p, int cStepsPerTrial) { State s = StartState; double r = 0; int i = 0; while (!IsGoalState(s) && i <= cStepsPerTrial) { Action a = p.GetAction(s); r += Math.Pow(DiscountFactor, i) * s.Reward(a); i++; foreach (State stag in States) { if (s.TransitionProbability(a, stag) != 0) { s = stag; } } } return(r); }
private double update(State s) { double maxV = ViByS[s]; Action maxA = null; foreach (Action a in m_dDomain.Actions) { double sum = 0; foreach (State stag in s.Successors(a)) sum += s.TransitionProbability(a, stag) * ViByS[stag]; double tmp = s.Reward(a) + (m_dDomain.DiscountFactor * sum); // save max if ((tmp >= maxV) && (!s.Apply(a).Equals(s))) { maxV = tmp; maxA = a; } } if (maxA != null) { double delta = maxV - ViByS[s]; ViByS[s] = maxV; ViBySActions[s] = maxA; return Math.Abs(delta); } return 0; }
// calc the formula for Vi+1(s) private double updateValueIter(State s) { double maxV = Double.MinValue; Action maxA = null; foreach (Action a in m_dDomain.Actions) { // clac formula for action a double sum = 0; foreach (State stag in s.Successors(a)) sum += s.TransitionProbability(a, stag) * ViByS[stag]; double tmp = s.Reward(a) + m_dDomain.DiscountFactor * sum; // save max if((tmp >= maxV)){ maxV = tmp; maxA = a; } } if (maxA != null) { Vi_1ByS[s] = maxV; ViBySActions[s] = maxA; return Math.Abs(Vi_1ByS[s] - ViByS[s]); } return 0; }