C# (CSharp) MarkovDecisionProcess State.Reward 예제들

프로그래밍 언어: C# (CSharp)

네임스페이스/패키지 이름: MarkovDecisionProcess

클래스/타입: State

메소드/함수: Reward

hotexamples.com에서의 예제들: 6

C# (CSharp) MarkovDecisionProcess State.Reward - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C# (CSharp)의 MarkovDecisionProcess.State.Reward에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Reward(4)

TransitionProbability(3)

Apply(2)

Successors(2)

Equals(1)

예제 #1

파일 보기

        private double update(State s)
        {
            double maxV = ViByS[s];
            Action maxA = null;

            foreach (Action a in m_dDomain.Actions)
            {
                double sum = 0;
                foreach (State stag in s.Successors(a))
                {
                    sum += s.TransitionProbability(a, stag) * ViByS[stag];
                }
                double tmp = s.Reward(a) + (m_dDomain.DiscountFactor * sum);

                // save max
                if ((tmp >= maxV) && (!s.Apply(a).Equals(s)))
                {
                    maxV = tmp;
                    maxA = a;
                }
            }
            if (maxA != null)
            {
                double delta = maxV - ViByS[s];
                ViByS[s]        = maxV;
                ViBySActions[s] = maxA;
                return(Math.Abs(delta));
            }
            return(0);
        }

예제 #2

파일 보기

        // calc the formula for Vi+1(s)
        private double updateValueIter(State s)
        {
            double maxV = Double.MinValue;
            Action maxA = null;

            foreach (Action a in m_dDomain.Actions)
            {
                // clac formula for action a
                double sum = 0;
                foreach (State stag in s.Successors(a))
                {
                    sum += s.TransitionProbability(a, stag) * ViByS[stag];
                }
                double tmp = s.Reward(a) + m_dDomain.DiscountFactor * sum;
                // save max
                if ((tmp >= maxV))
                {
                    maxV = tmp;
                    maxA = a;
                }
            }
            if (maxA != null)
            {
                Vi_1ByS[s]      = maxV;
                ViBySActions[s] = maxA;
                return(Math.Abs(Vi_1ByS[s] - ViByS[s]));
            }
            return(0);
        }

예제 #3

파일 보기

        public double Sarsa(double dEpsilon, int cTrials, int cStepsPerTrial)
        {
            double dSumRewards = 0.0;

            //your code here
            initV0();
            initQ();
            for (int j = 0; j < cTrials; j++)
            {
                State  s = m_dDomain.StartState, stag;
                Action a     = epsilonGreedy(s, dEpsilon);
                double alpha = 0.7;
                int    t     = 1;
                while (!m_dDomain.IsGoalState(s) && t <= cStepsPerTrial)
                {
                    double r = s.Reward(a);
                    dSumRewards += r;
                    stag         = s.Apply(a);
                    Action atag = epsilonGreedy(stag, dEpsilon);
                    Q[s][a] = Q[s][a] + alpha * (r + m_dDomain.DiscountFactor * Q[stag][atag] - Q[s][a]);
                    s       = stag;
                    a       = atag;
                    t++;
                    // alpha = alpha /t;
                }
            }
            foreach (State ss in m_dDomain.States)
            {
                ViBySActions[ss] = findMaxQA(ss);
            }

            Debug.WriteLine("\nDone computing ADR");
            return(dSumRewards);
        }

예제 #4

파일 보기

파일: Domain.cs 프로젝트: atiassa/recommend-2011

        private double CompOneExperimant(Policy p, int cStepsPerTrial)
        {
            State  s = StartState;
            double r = 0;
            int    i = 0;

            while (!IsGoalState(s) && i <= cStepsPerTrial)
            {
                Action a = p.GetAction(s);
                r += Math.Pow(DiscountFactor, i) * s.Reward(a);
                i++;
                foreach (State stag in States)
                {
                    if (s.TransitionProbability(a, stag) != 0)
                    {
                        s = stag;
                    }
                }
            }
            return(r);
        }

예제 #5

파일 보기

파일: PolicyValueFunction.cs 프로젝트: haozhuoran1991/recommend-2011

        private double update(State s)
        {
            double maxV = ViByS[s];
            Action maxA = null;
            foreach (Action a in m_dDomain.Actions)
            {
                double sum = 0;
                foreach (State stag in s.Successors(a))
                    sum += s.TransitionProbability(a, stag) * ViByS[stag];
                double tmp = s.Reward(a) + (m_dDomain.DiscountFactor * sum);

               // save max
                if ((tmp >= maxV) && (!s.Apply(a).Equals(s)))
                {
                    maxV = tmp;
                    maxA = a;
                }
            }
            if (maxA != null)
            {
                double delta = maxV - ViByS[s];
                ViByS[s] = maxV;
                ViBySActions[s] = maxA;
                return Math.Abs(delta);
            }
            return 0;
        }

예제 #6

파일 보기

파일: ValueFunction.cs 프로젝트: haozhuoran1991/recommend-2011

 // calc the formula for Vi+1(s)
 private double updateValueIter(State s)
 {
     double maxV = Double.MinValue;
     Action maxA = null;
     foreach (Action a in m_dDomain.Actions)
     {
         // clac formula for action a
         double sum = 0;
         foreach (State stag in s.Successors(a))
             sum += s.TransitionProbability(a, stag) * ViByS[stag];
         double tmp = s.Reward(a) + m_dDomain.DiscountFactor * sum;
         // save max
         if((tmp >= maxV)){
             maxV = tmp;
             maxA = a;
         }
     }
     if (maxA != null)
     {
         Vi_1ByS[s] = maxV;
         ViBySActions[s] = maxA;
         return Math.Abs(Vi_1ByS[s] - ViByS[s]);
     }
     return 0;
 }