C# (CSharp) POMDP BeliefState.Next Examples

Programming Language: C# (CSharp)

Namespace/Package Name: POMDP

Class/Type: BeliefState

Method/Function: Next

Examples at hotexamples.com: 6

C# (CSharp) POMDP BeliefState.Next - 6 examples found. These are the top rated real world C# (CSharp) examples of POMDP.BeliefState.Next extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Next(6)

AddBelief(3)

RandomState(3)

Reward(2)

Validate(2)

sampleState(2)

Beliefs(1)

RandomObservation(1)

Example #1

Show file

        /* simulates your policy for a number of iterations multiple times, and computes the average reward obtained.
         * To generate a single trial:
         *   1. Sample a starting state s from the initial belief state.
         *   2. Repeat until goal is reached
         *        a) compute the action a for the belief state.
         *        b) sample the result of applying a to s, obtaining s'.
         *        c) sample an observation o based on a and s'
         *        d) compute the new belief state given your old belief state, a, and o.
         *        e) accumulate the reward
         * cStepsPerTrial = Number of iterations
         * cTrials = number of times of itrating cStepsPerTrial times. */
        public double ComputeAverageDiscountedReward(Policy p, int cTrials, int cStepsPerTrial)
        {
            double accumulatedReward = 0;

            for (int i = 1; i <= cTrials; i++)
            {
                int         remainingSteps = cStepsPerTrial;
                BeliefState bs             = InitialBelief;
                // step 1: Sample a starting state s from the initial belief state.
                State s = bs.RandomState();
                // step 2: Repeat until goal is reached
                while (!IsGoalState(s) && remainingSteps > 0)
                {
                    //step 2a: compute the action a for the belief state.
                    Action a = p.GetAction(bs);
                    //step 2b: sample the result of applying a to s, obtaining s'.
                    State  sTag   = s.Apply(a);
                    double reward = bs.Reward(a);
                    // step 2c: sample an observation o based on a and s(implemented with RandomObservation?)
                    Observation o = s.RandomObservation(a);
                    //step 2d: compute the new belief state given your old belief state, a, and o.
                    BeliefState newBeliefState = bs.Next(a, o);
                    bs = newBeliefState; //change bs for next iteration
                    //step 2e: accumulate the reward
                    accumulatedReward += reward;
                    s = sTag;
                    remainingSteps--;
                }
            }
            return(accumulatedReward / cTrials);
        }

Example #2

Show file

        private List <BeliefState> GenerateB(int cBeliefs, Random rand)
        {
            List <BeliefState> B     = new List <BeliefState>();
            BeliefState        initB = m_dDomain.InitialBelief;
            int n = cBeliefs;

            while (n > 0)
            {
                Action      a     = (m_dDomain.GetRandomAction(rand));
                Observation oCurr = initB.RandomObservation(a);
                BeliefState bNext = initB.Next(a, oCurr);
                B.Add(bNext);
                initB = bNext;
                n--;
            }
            return(B);
        }

Example #3

Show file

        private List <BeliefState> SimulateTrial(Policy p, int cMaxSteps)
        {
            BeliefState        bsCurrent = m_dDomain.InitialBelief, bsNext = null;
            State              sCurrent = bsCurrent.RandomState(), sNext = null;
            Action             a        = null;
            Observation        o        = null;
            List <BeliefState> lBeliefs = new List <BeliefState>();

            while (!m_dDomain.IsGoalState(sCurrent) && lBeliefs.Count < cMaxSteps)
            {
                a         = p.GetAction(bsCurrent);
                sNext     = sCurrent.Apply(a);
                o         = sNext.RandomObservation(a);
                bsNext    = bsCurrent.Next(a, o);
                bsCurrent = bsNext;
                lBeliefs.Add(bsCurrent);
                sCurrent = sNext;
            }
            return(lBeliefs);
        }

Example #4

Show file

File: Domain.cs Project: abadied/AI_POMDP

        public double ComputeAverageDiscountedReward(Policy p, int cTrials, int cStepsPerTrial)
        {
            //your code here
            double        to_ret  = 0.0;
            List <double> rewards = new List <double>();

            for (int i = 0; i < cTrials; i++)
            {
                State       target             = sampleInitialState();
                BeliefState currentBeliefState = InitialBelief;
                double      sumRewards         = 0.0;
                int         counter            = 0;
                //while ((!IsGoalState(target)))
                while ((!IsGoalState(target) && counter < cStepsPerTrial))
                {
                    Action a        = p.GetAction(currentBeliefState);
                    State  newState = target.Apply(a: a);
                    List <KeyValuePair <Observation, double> > probabilitiesForObservation = new List <KeyValuePair <Observation, double> >();
                    double sum = 0.0;
                    foreach (Observation obs in Observations)
                    {
                        double prob = newState.ObservationProbability(a: a, o: obs);
                        sum += prob;
                        probabilitiesForObservation.Add(new KeyValuePair <Observation, double>(obs, sum));
                    }
                    Observation newObservation = samplingObservations(probabilitiesForObservation);
                    double      reward         = currentBeliefState.Reward(a);
                    currentBeliefState = currentBeliefState.Next(a: a, o: newObservation);
                    sumRewards        += reward * Math.Pow(DiscountFactor, counter);
                    counter++;
                    target = newState;
                }
                rewards.Add(sumRewards);
            }

            foreach (double r in rewards)
            {
                to_ret += r;
            }
            return((to_ret) / cTrials);
        }

Example #5

Show file

        private void SimulateTrial(Policy p, MazeViewer viewer)
        {
            BeliefState bsCurrent = InitialBelief, bsNext = null;
            State       sCurrent = bsCurrent.sampleState(), sNext = null;
            Action      a = null;
            Observation o = null;

            viewer.CurrentState  = (MazeState)sCurrent;
            viewer.CurrentBelief = bsCurrent;
            while (!IsGoalState(sCurrent))
            {
                a                         = p.GetAction(bsCurrent);
                sNext                     = sCurrent.Apply(a);
                o                         = sNext.RandomObservation(a);
                bsNext                    = bsCurrent.Next(a, o);
                bsCurrent                 = bsNext;
                sCurrent                  = sNext;
                viewer.CurrentState       = (MazeState)sCurrent;
                viewer.CurrentBelief      = bsCurrent;
                viewer.CurrentObservation = (MazeObservation)o;
                Thread.Sleep(500);
            }
        }

Example #6

Show file

         /**
         * a recursive function performing one trial with stepsLeft steps, we are given a policy p,
         * stepsLeft, current state state, and current belief state bs
         * 
         * a) compute the action for the belief state bs
         * b) sample the result of applying a to s, obtaining nextState.
         * c) sample an observation o based on a and nextState
         * d) compute the new belief state given your old belief state, a, and o.
         * e) call the function recursively with the same policy p, stepsLeft-1, the new state, and the new
         * belief state. finally we accumalate the reward.
         * 
         * 
         */
        private double calcTrialReward(Policy p, int stepsLeft, State state, BeliefState bs)
        {
            // If we are already in a goal state or no steps are left then the reward is 0
            if (IsGoalState(state) || stepsLeft == 0)
                return 0;

            //Calculating the action for the belief state based on the policy
            Action a = p.GetAction(bs);
            
            //applying a on state, resulting in a new state nextState
            State nextState = state.Apply(a);
            //The reward of performing a on state
            double reward = nextState.Reward(a);
            // We sample an observation based on nextState and a
            Observation o = nextState.RandomObservation(a);

            // Updating the reward, calling the function recursively so we continue the "forward search" to goal state 
            reward += this.DiscountFactor * calcTrialReward(p, stepsLeft-1, nextState, bs.Next(a,o));
            return reward;
        }