Beispiel #1
0
        private double update(State s)
        {
            double maxV = ViByS[s];
            Action maxA = null;

            foreach (Action a in m_dDomain.Actions)
            {
                double sum = 0;
                foreach (State stag in s.Successors(a))
                {
                    sum += s.TransitionProbability(a, stag) * ViByS[stag];
                }
                double tmp = s.Reward(a) + (m_dDomain.DiscountFactor * sum);

                // save max
                if ((tmp >= maxV) && (!s.Apply(a).Equals(s)))
                {
                    maxV = tmp;
                    maxA = a;
                }
            }
            if (maxA != null)
            {
                double delta = maxV - ViByS[s];
                ViByS[s]        = maxV;
                ViBySActions[s] = maxA;
                return(Math.Abs(delta));
            }
            return(0);
        }
Beispiel #2
0
        public double Sarsa(double dEpsilon, int cTrials, int cStepsPerTrial)
        {
            double dSumRewards = 0.0;

            //your code here
            initV0();
            initQ();
            for (int j = 0; j < cTrials; j++)
            {
                State  s = m_dDomain.StartState, stag;
                Action a     = epsilonGreedy(s, dEpsilon);
                double alpha = 0.7;
                int    t     = 1;
                while (!m_dDomain.IsGoalState(s) && t <= cStepsPerTrial)
                {
                    double r = s.Reward(a);
                    dSumRewards += r;
                    stag         = s.Apply(a);
                    Action atag = epsilonGreedy(stag, dEpsilon);
                    Q[s][a] = Q[s][a] + alpha * (r + m_dDomain.DiscountFactor * Q[stag][atag] - Q[s][a]);
                    s       = stag;
                    a       = atag;
                    t++;
                    // alpha = alpha /t;
                }
            }
            foreach (State ss in m_dDomain.States)
            {
                ViBySActions[ss] = findMaxQA(ss);
            }

            Debug.WriteLine("\nDone computing ADR");
            return(dSumRewards);
        }
        private double update(State s)
        {
            double maxV = ViByS[s];
            Action maxA = null;
            foreach (Action a in m_dDomain.Actions)
            {
                double sum = 0;
                foreach (State stag in s.Successors(a))
                    sum += s.TransitionProbability(a, stag) * ViByS[stag];
                double tmp = s.Reward(a) + (m_dDomain.DiscountFactor * sum);

               // save max
                if ((tmp >= maxV) && (!s.Apply(a).Equals(s)))
                {
                    maxV = tmp;
                    maxA = a;
                }
            }
            if (maxA != null)
            {
                double delta = maxV - ViByS[s];
                ViByS[s] = maxV;
                ViBySActions[s] = maxA;
                return Math.Abs(delta);
            }
            return 0;
        }