private double update(State s) { double maxV = ViByS[s]; Action maxA = null; foreach (Action a in m_dDomain.Actions) { double sum = 0; foreach (State stag in s.Successors(a)) { sum += s.TransitionProbability(a, stag) * ViByS[stag]; } double tmp = s.Reward(a) + (m_dDomain.DiscountFactor * sum); // save max if ((tmp >= maxV) && (!s.Apply(a).Equals(s))) { maxV = tmp; maxA = a; } } if (maxA != null) { double delta = maxV - ViByS[s]; ViByS[s] = maxV; ViBySActions[s] = maxA; return(Math.Abs(delta)); } return(0); }
// calc the formula for Vi+1(s) private double updateValueIter(State s) { double maxV = Double.MinValue; Action maxA = null; foreach (Action a in m_dDomain.Actions) { // clac formula for action a double sum = 0; foreach (State stag in s.Successors(a)) { sum += s.TransitionProbability(a, stag) * ViByS[stag]; } double tmp = s.Reward(a) + m_dDomain.DiscountFactor * sum; // save max if ((tmp >= maxV)) { maxV = tmp; maxA = a; } } if (maxA != null) { Vi_1ByS[s] = maxV; ViBySActions[s] = maxA; return(Math.Abs(Vi_1ByS[s] - ViByS[s])); } return(0); }
private double update(State s) { double maxV = ViByS[s]; Action maxA = null; foreach (Action a in m_dDomain.Actions) { double sum = 0; foreach (State stag in s.Successors(a)) sum += s.TransitionProbability(a, stag) * ViByS[stag]; double tmp = s.Reward(a) + (m_dDomain.DiscountFactor * sum); // save max if ((tmp >= maxV) && (!s.Apply(a).Equals(s))) { maxV = tmp; maxA = a; } } if (maxA != null) { double delta = maxV - ViByS[s]; ViByS[s] = maxV; ViBySActions[s] = maxA; return Math.Abs(delta); } return 0; }
// calc the formula for Vi+1(s) private double updateValueIter(State s) { double maxV = Double.MinValue; Action maxA = null; foreach (Action a in m_dDomain.Actions) { // clac formula for action a double sum = 0; foreach (State stag in s.Successors(a)) sum += s.TransitionProbability(a, stag) * ViByS[stag]; double tmp = s.Reward(a) + m_dDomain.DiscountFactor * sum; // save max if((tmp >= maxV)){ maxV = tmp; maxA = a; } } if (maxA != null) { Vi_1ByS[s] = maxV; ViBySActions[s] = maxA; return Math.Abs(Vi_1ByS[s] - ViByS[s]); } return 0; }