private double update(State s) { double maxV = ViByS[s]; Action maxA = null; foreach (Action a in m_dDomain.Actions) { double sum = 0; foreach (State stag in s.Successors(a)) { sum += s.TransitionProbability(a, stag) * ViByS[stag]; } double tmp = s.Reward(a) + (m_dDomain.DiscountFactor * sum); // save max if ((tmp >= maxV) && (!s.Apply(a).Equals(s))) { maxV = tmp; maxA = a; } } if (maxA != null) { double delta = maxV - ViByS[s]; ViByS[s] = maxV; ViBySActions[s] = maxA; return(Math.Abs(delta)); } return(0); }
// calc the formula for Vi+1(s) private double updateValueIter(State s) { double maxV = Double.MinValue; Action maxA = null; foreach (Action a in m_dDomain.Actions) { // clac formula for action a double sum = 0; foreach (State stag in s.Successors(a)) { sum += s.TransitionProbability(a, stag) * ViByS[stag]; } double tmp = s.Reward(a) + m_dDomain.DiscountFactor * sum; // save max if ((tmp >= maxV)) { maxV = tmp; maxA = a; } } if (maxA != null) { Vi_1ByS[s] = maxV; ViBySActions[s] = maxA; return(Math.Abs(Vi_1ByS[s] - ViByS[s])); } return(0); }
private double CompOneExperimant(Policy p, int cStepsPerTrial) { State s = StartState; double r = 0; int i = 0; while (!IsGoalState(s) && i <= cStepsPerTrial) { Action a = p.GetAction(s); r += Math.Pow(DiscountFactor, i) * s.Reward(a); i++; foreach (State stag in States) { if (s.TransitionProbability(a, stag) != 0) { s = stag; } } } return(r); }
private double update(State s) { double maxV = ViByS[s]; Action maxA = null; foreach (Action a in m_dDomain.Actions) { double sum = 0; foreach (State stag in s.Successors(a)) sum += s.TransitionProbability(a, stag) * ViByS[stag]; double tmp = s.Reward(a) + (m_dDomain.DiscountFactor * sum); // save max if ((tmp >= maxV) && (!s.Apply(a).Equals(s))) { maxV = tmp; maxA = a; } } if (maxA != null) { double delta = maxV - ViByS[s]; ViByS[s] = maxV; ViBySActions[s] = maxA; return Math.Abs(delta); } return 0; }
// calc the formula for Vi+1(s) private double updateValueIter(State s) { double maxV = Double.MinValue; Action maxA = null; foreach (Action a in m_dDomain.Actions) { // clac formula for action a double sum = 0; foreach (State stag in s.Successors(a)) sum += s.TransitionProbability(a, stag) * ViByS[stag]; double tmp = s.Reward(a) + m_dDomain.DiscountFactor * sum; // save max if((tmp >= maxV)){ maxV = tmp; maxA = a; } } if (maxA != null) { Vi_1ByS[s] = maxV; ViBySActions[s] = maxA; return Math.Abs(Vi_1ByS[s] - ViByS[s]); } return 0; }