Beispiel #1
0
        private double update(State s)
        {
            double maxV = ViByS[s];
            Action maxA = null;

            foreach (Action a in m_dDomain.Actions)
            {
                double sum = 0;
                foreach (State stag in s.Successors(a))
                {
                    sum += s.TransitionProbability(a, stag) * ViByS[stag];
                }
                double tmp = s.Reward(a) + (m_dDomain.DiscountFactor * sum);

                // save max
                if ((tmp >= maxV) && (!s.Apply(a).Equals(s)))
                {
                    maxV = tmp;
                    maxA = a;
                }
            }
            if (maxA != null)
            {
                double delta = maxV - ViByS[s];
                ViByS[s]        = maxV;
                ViBySActions[s] = maxA;
                return(Math.Abs(delta));
            }
            return(0);
        }
Beispiel #2
0
        // calc the formula for Vi+1(s)
        private double updateValueIter(State s)
        {
            double maxV = Double.MinValue;
            Action maxA = null;

            foreach (Action a in m_dDomain.Actions)
            {
                // clac formula for action a
                double sum = 0;
                foreach (State stag in s.Successors(a))
                {
                    sum += s.TransitionProbability(a, stag) * ViByS[stag];
                }
                double tmp = s.Reward(a) + m_dDomain.DiscountFactor * sum;
                // save max
                if ((tmp >= maxV))
                {
                    maxV = tmp;
                    maxA = a;
                }
            }
            if (maxA != null)
            {
                Vi_1ByS[s]      = maxV;
                ViBySActions[s] = maxA;
                return(Math.Abs(Vi_1ByS[s] - ViByS[s]));
            }
            return(0);
        }
Beispiel #3
0
        private double CompOneExperimant(Policy p, int cStepsPerTrial)
        {
            State  s = StartState;
            double r = 0;
            int    i = 0;

            while (!IsGoalState(s) && i <= cStepsPerTrial)
            {
                Action a = p.GetAction(s);
                r += Math.Pow(DiscountFactor, i) * s.Reward(a);
                i++;
                foreach (State stag in States)
                {
                    if (s.TransitionProbability(a, stag) != 0)
                    {
                        s = stag;
                    }
                }
            }
            return(r);
        }
        private double update(State s)
        {
            double maxV = ViByS[s];
            Action maxA = null;
            foreach (Action a in m_dDomain.Actions)
            {
                double sum = 0;
                foreach (State stag in s.Successors(a))
                    sum += s.TransitionProbability(a, stag) * ViByS[stag];
                double tmp = s.Reward(a) + (m_dDomain.DiscountFactor * sum);

               // save max
                if ((tmp >= maxV) && (!s.Apply(a).Equals(s)))
                {
                    maxV = tmp;
                    maxA = a;
                }
            }
            if (maxA != null)
            {
                double delta = maxV - ViByS[s];
                ViByS[s] = maxV;
                ViBySActions[s] = maxA;
                return Math.Abs(delta);
            }
            return 0;
        }
 // calc the formula for Vi+1(s)
 private double updateValueIter(State s)
 {
     double maxV = Double.MinValue;
     Action maxA = null;
     foreach (Action a in m_dDomain.Actions)
     {
         // clac formula for action a
         double sum = 0;
         foreach (State stag in s.Successors(a))
             sum += s.TransitionProbability(a, stag) * ViByS[stag];
         double tmp = s.Reward(a) + m_dDomain.DiscountFactor * sum;
         // save max
         if((tmp >= maxV)){
             maxV = tmp;
             maxA = a;
         }
     }
     if (maxA != null)
     {
         Vi_1ByS[s] = maxV;
         ViBySActions[s] = maxA;
         return Math.Abs(Vi_1ByS[s] - ViByS[s]);
     }
     return 0;
 }