コード例 #1
0
        /**
         * Passive reinforcement learning based on adaptive dynamic programming.
         *
         * @param percept
         *            a percept indicating the current state s' and reward signal
         *            r'.
         * @return an action
         */

        public override A execute(IPerceptStateReward <S> percept)
        {
            // if s' is new then U[s'] <- r'; R[s'] <- r'
            S      sDelta = percept.state();
            double rDelta = percept.reward();

            if (!U.ContainsKey(sDelta))
            {
                U.Put(sDelta, rDelta);
                R.Put(sDelta, rDelta);
            }
            // if s is not null then
            if (null != s)
            {
                // increment N<sub>sa</sub>[s,a] and N<sub>s'|sa</sub>[s',s,a]
                Pair <S, A> sa = new Pair <S, A>(s, a);
                Nsa.incrementFor(sa);
                NsDelta_sa.incrementFor(new Pair <S, Pair <S, A> >(sDelta, sa));
                // for each t such that N<sub>s'|sa</sub>[t,s,a] is nonzero do
                foreach (S t in mdp.states())
                {
                    Pair <S, Pair <S, A> > t_sa = new Pair <S, Pair <S, A> >(t, sa);
                    if (0 != NsDelta_sa.getCount(t_sa))
                    {
                        // P(t|s,a) <- N<sub>s'|sa</sub>[t,s,a] /
                        // N<sub>sa</sub>[s,a]
                        P.Put(t_sa, (double)NsDelta_sa.getCount(t_sa)
                              / (double)Nsa.getCount(sa));
                    }
                }
            }
            // U <- POLICY-EVALUATION(&pi;, U, mdp)
            U = policyEvaluation.evaluate(pi, U, mdp);
            // if s'.TERMINAL? then s,a <- null else s,a <- s',&pi;[s']
            if (isTerminal(sDelta))
            {
                s = default(S);
                a = default(A);
            }
            else
            {
                s = sDelta;
                a = pi.Get(sDelta);
            }
            // return a
            return(a);
        }