コード例 #1
0
        /* simulates your policy for a number of iterations multiple times, and computes the average reward obtained.
         * To generate a single trial:
         *   1. Sample a starting state s from the initial belief state.
         *   2. Repeat until goal is reached
         *        a) compute the action a for the belief state.
         *        b) sample the result of applying a to s, obtaining s'.
         *        c) sample an observation o based on a and s'
         *        d) compute the new belief state given your old belief state, a, and o.
         *        e) accumulate the reward
         * cStepsPerTrial = Number of iterations
         * cTrials = number of times of itrating cStepsPerTrial times. */
        public double ComputeAverageDiscountedReward(Policy p, int cTrials, int cStepsPerTrial)
        {
            double accumulatedReward = 0;

            for (int i = 1; i <= cTrials; i++)
            {
                int         remainingSteps = cStepsPerTrial;
                BeliefState bs             = InitialBelief;
                // step 1: Sample a starting state s from the initial belief state.
                State s = bs.RandomState();
                // step 2: Repeat until goal is reached
                while (!IsGoalState(s) && remainingSteps > 0)
                {
                    //step 2a: compute the action a for the belief state.
                    Action a = p.GetAction(bs);
                    //step 2b: sample the result of applying a to s, obtaining s'.
                    State  sTag   = s.Apply(a);
                    double reward = bs.Reward(a);
                    // step 2c: sample an observation o based on a and s(implemented with RandomObservation?)
                    Observation o = s.RandomObservation(a);
                    //step 2d: compute the new belief state given your old belief state, a, and o.
                    BeliefState newBeliefState = bs.Next(a, o);
                    bs = newBeliefState; //change bs for next iteration
                    //step 2e: accumulate the reward
                    accumulatedReward += reward;
                    s = sTag;
                    remainingSteps--;
                }
            }
            return(accumulatedReward / cTrials);
        }
コード例 #2
0
ファイル: Domain.cs プロジェクト: abadied/AI_POMDP
        public double ComputeAverageDiscountedReward(Policy p, int cTrials, int cStepsPerTrial)
        {
            //your code here
            double        to_ret  = 0.0;
            List <double> rewards = new List <double>();

            for (int i = 0; i < cTrials; i++)
            {
                State       target             = sampleInitialState();
                BeliefState currentBeliefState = InitialBelief;
                double      sumRewards         = 0.0;
                int         counter            = 0;
                //while ((!IsGoalState(target)))
                while ((!IsGoalState(target) && counter < cStepsPerTrial))
                {
                    Action a        = p.GetAction(currentBeliefState);
                    State  newState = target.Apply(a: a);
                    List <KeyValuePair <Observation, double> > probabilitiesForObservation = new List <KeyValuePair <Observation, double> >();
                    double sum = 0.0;
                    foreach (Observation obs in Observations)
                    {
                        double prob = newState.ObservationProbability(a: a, o: obs);
                        sum += prob;
                        probabilitiesForObservation.Add(new KeyValuePair <Observation, double>(obs, sum));
                    }
                    Observation newObservation = samplingObservations(probabilitiesForObservation);
                    double      reward         = currentBeliefState.Reward(a);
                    currentBeliefState = currentBeliefState.Next(a: a, o: newObservation);
                    sumRewards        += reward * Math.Pow(DiscountFactor, counter);
                    counter++;
                    target = newState;
                }
                rewards.Add(sumRewards);
            }

            foreach (double r in rewards)
            {
                to_ret += r;
            }
            return((to_ret) / cTrials);
        }