C# (CSharp) POMDP BeliefState.Reward примеры использования

Язык программирования: C# (CSharp)

Пространство имен/Пакет: POMDP

Класс/Тип: BeliefState

Метод/Функция: Reward

Примеров на hotexamples.com: 2

C# (CSharp) POMDP BeliefState.Reward - 2 примера найдено. Это лучшие примеры C# (CSharp) кода для POMDP.BeliefState.Reward, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Next(6)

AddBelief(3)

RandomState(3)

Reward(2)

Validate(2)

sampleState(2)

Beliefs(1)

RandomObservation(1)

Пример #1

Показать файл

        /* simulates your policy for a number of iterations multiple times, and computes the average reward obtained.
         * To generate a single trial:
         *   1. Sample a starting state s from the initial belief state.
         *   2. Repeat until goal is reached
         *        a) compute the action a for the belief state.
         *        b) sample the result of applying a to s, obtaining s'.
         *        c) sample an observation o based on a and s'
         *        d) compute the new belief state given your old belief state, a, and o.
         *        e) accumulate the reward
         * cStepsPerTrial = Number of iterations
         * cTrials = number of times of itrating cStepsPerTrial times. */
        public double ComputeAverageDiscountedReward(Policy p, int cTrials, int cStepsPerTrial)
        {
            double accumulatedReward = 0;

            for (int i = 1; i <= cTrials; i++)
            {
                int         remainingSteps = cStepsPerTrial;
                BeliefState bs             = InitialBelief;
                // step 1: Sample a starting state s from the initial belief state.
                State s = bs.RandomState();
                // step 2: Repeat until goal is reached
                while (!IsGoalState(s) && remainingSteps > 0)
                {
                    //step 2a: compute the action a for the belief state.
                    Action a = p.GetAction(bs);
                    //step 2b: sample the result of applying a to s, obtaining s'.
                    State  sTag   = s.Apply(a);
                    double reward = bs.Reward(a);
                    // step 2c: sample an observation o based on a and s(implemented with RandomObservation?)
                    Observation o = s.RandomObservation(a);
                    //step 2d: compute the new belief state given your old belief state, a, and o.
                    BeliefState newBeliefState = bs.Next(a, o);
                    bs = newBeliefState; //change bs for next iteration
                    //step 2e: accumulate the reward
                    accumulatedReward += reward;
                    s = sTag;
                    remainingSteps--;
                }
            }
            return(accumulatedReward / cTrials);
        }

Пример #2

Показать файл

Файл: Domain.cs Проект: abadied/AI_POMDP

        public double ComputeAverageDiscountedReward(Policy p, int cTrials, int cStepsPerTrial)
        {
            //your code here
            double        to_ret  = 0.0;
            List <double> rewards = new List <double>();

            for (int i = 0; i < cTrials; i++)
            {
                State       target             = sampleInitialState();
                BeliefState currentBeliefState = InitialBelief;
                double      sumRewards         = 0.0;
                int         counter            = 0;
                //while ((!IsGoalState(target)))
                while ((!IsGoalState(target) && counter < cStepsPerTrial))
                {
                    Action a        = p.GetAction(currentBeliefState);
                    State  newState = target.Apply(a: a);
                    List <KeyValuePair <Observation, double> > probabilitiesForObservation = new List <KeyValuePair <Observation, double> >();
                    double sum = 0.0;
                    foreach (Observation obs in Observations)
                    {
                        double prob = newState.ObservationProbability(a: a, o: obs);
                        sum += prob;
                        probabilitiesForObservation.Add(new KeyValuePair <Observation, double>(obs, sum));
                    }
                    Observation newObservation = samplingObservations(probabilitiesForObservation);
                    double      reward         = currentBeliefState.Reward(a);
                    currentBeliefState = currentBeliefState.Next(a: a, o: newObservation);
                    sumRewards        += reward * Math.Pow(DiscountFactor, counter);
                    counter++;
                    target = newState;
                }
                rewards.Add(sumRewards);
            }

            foreach (double r in rewards)
            {
                to_ret += r;
            }
            return((to_ret) / cTrials);
        }