Пример #1
0
        public BeliefState Next(Action a, Observation o)
        {
            BeliefState bsNext = new BeliefState(m_dDomain);
            //your code here
            double normalizationFactor = 0.0;

            foreach (State stateTag in m_dDomain.States)
            {
                double updateProbabilityForState = 0.0;
                foreach (State state in m_dDomain.States) //or in States????????????
                {
                    if (state.Successors(a).Contains(stateTag))
                    {
                        double transitionProbability = state.TransitionProbability(a: a, sTag: stateTag);
                        double beliefOfstate         = this.m_dBeliefs[state];
                        updateProbabilityForState += transitionProbability * beliefOfstate;
                    }
                }
                updateProbabilityForState *= stateTag.ObservationProbability(a: a, o: o);
                bsNext.AddBelief(stateTag, updateProbabilityForState);
                normalizationFactor += updateProbabilityForState;
            }
            for (int i = 0; i < bsNext.m_dBeliefs.Keys.Count; i++)
            {
                State stateToNormalize = bsNext.m_dBeliefs.Keys.ElementAt(i);
                bsNext.m_dBeliefs[stateToNormalize] /= normalizationFactor;
            }
            Debug.Assert(bsNext.Validate());
            return(bsNext);
        }
Пример #2
0
        // t(b,a,b') = pr(b'| a,b) = (sum over all o in omega) pr(b'|a,o,b) * pr(o|a,b). lecture 13, page 3
        public BeliefState Next(Action a, Observation o)
        {
            BeliefState bsNext = new BeliefState(m_dDomain);

            //double sumOfBTag = 0;
            foreach (State sTag in m_dDomain.States)
            {
                double stateProbabilityInBtag;
                stateProbabilityInBtag = sTag.ObservationProbability(a, o) * transitionProbabilityForEachState(sTag, a) / probabilityOfObservationGivenAB(a, o);
                bsNext.AddBelief(sTag, stateProbabilityInBtag);
            }
            return(bsNext);
            //foreach (State sTag in m_dDomain.States)
            //{
            //    //pr(b'|a,o,b)
            //    double currBTag = CalculateBTagForEachState(sTag, a, o);
            //    sumOfBTag += currBTag;
            //    bsNext.AddBelief(sTag, currBTag);
            //}
            //foreach (State sTag in m_dDomain.States)
            //{
            //    bsNext.m_dBeliefs[sTag] = bsNext.m_dBeliefs[sTag]/sumOfBTag;
            //}
            //Debug.Assert(bsNext.Validate());
            //return bsNext;
        }
Пример #3
0
        //problem: at the end of getAction, avBest is still null, maybe m_lVectors is null?
        public override Action GetAction(BeliefState bs)
        {
            AlphaVector avBest = null;

            ValueOf(bs, m_lVectors, out avBest);
            return(avBest.Action);
        }
Пример #4
0
        /* simulates your policy for a number of iterations multiple times, and computes the average reward obtained.
         * To generate a single trial:
         *   1. Sample a starting state s from the initial belief state.
         *   2. Repeat until goal is reached
         *        a) compute the action a for the belief state.
         *        b) sample the result of applying a to s, obtaining s'.
         *        c) sample an observation o based on a and s'
         *        d) compute the new belief state given your old belief state, a, and o.
         *        e) accumulate the reward
         * cStepsPerTrial = Number of iterations
         * cTrials = number of times of itrating cStepsPerTrial times. */
        public double ComputeAverageDiscountedReward(Policy p, int cTrials, int cStepsPerTrial)
        {
            double accumulatedReward = 0;

            for (int i = 1; i <= cTrials; i++)
            {
                int         remainingSteps = cStepsPerTrial;
                BeliefState bs             = InitialBelief;
                // step 1: Sample a starting state s from the initial belief state.
                State s = bs.RandomState();
                // step 2: Repeat until goal is reached
                while (!IsGoalState(s) && remainingSteps > 0)
                {
                    //step 2a: compute the action a for the belief state.
                    Action a = p.GetAction(bs);
                    //step 2b: sample the result of applying a to s, obtaining s'.
                    State  sTag   = s.Apply(a);
                    double reward = bs.Reward(a);
                    // step 2c: sample an observation o based on a and s(implemented with RandomObservation?)
                    Observation o = s.RandomObservation(a);
                    //step 2d: compute the new belief state given your old belief state, a, and o.
                    BeliefState newBeliefState = bs.Next(a, o);
                    bs = newBeliefState; //change bs for next iteration
                    //step 2e: accumulate the reward
                    accumulatedReward += reward;
                    s = sTag;
                    remainingSteps--;
                }
            }
            return(accumulatedReward / cTrials);
        }
Пример #5
0
        //generate a new alpha vector from a belief state and an action
        private AlphaVector G(BeliefState bs, Action a)
        {
            AlphaVector avSum = new AlphaVector(a);
            AlphaVector avGMax = null;
            double      dValue = 0.0, dMaxValue = double.NegativeInfinity;

            foreach (Observation o in m_dDomain.Observations)
            {
                dMaxValue = double.NegativeInfinity;
                avGMax    = null;
                foreach (AlphaVector avCurrent in m_lVectors)
                {
                    AlphaVector avG = G(a, o, avCurrent);
                    dValue = avG.InnerProduct(bs);
                    if (dValue > dMaxValue)
                    {
                        dMaxValue = dValue;
                        avGMax    = avG;
                    }
                }
                avSum += avGMax;
            }
            avSum *= m_dDomain.DiscountFactor;
            AlphaVector avResult = new AlphaVector(a);

            foreach (State s in m_dDomain.States)
            {
                avResult[s] = avSum[s] + s.Reward(a);
            }
            return(avResult);
        }
Пример #6
0
 public BeliefState(BeliefState bs)
 {
     this.m_dDomain  = bs.m_dDomain;
     this.m_dBeliefs = new Dictionary <State, double>();
     foreach (KeyValuePair <State, double> p in bs.m_dBeliefs)
     {
         m_dBeliefs.Add(p.Key, p.Value);
     }
 }
Пример #7
0
        private void pruneAlphaVector(List <BeliefState> bsSet)
        {
            List <BeliefState> copyBset      = new List <BeliefState>(bsSet);
            List <AlphaVector> temp_lVectors = new List <AlphaVector>();

            while (copyBset.Any())
            {
                BeliefState _bs     = copyBset.ElementAt(0);
                AlphaVector _alpha  = backup(_bs);
                double      _reward = _alpha.InnerProduct(_bs);
                if (this.m_valueFunction[_bs].InnerProduct(_bs) < _reward)
                {
                    this.m_valueFunction[_bs] = _alpha;
                    temp_lVectors.Add(_alpha);
                    copyBset.Remove(_bs);
                    List <BeliefState> copyBset_inner = new List <BeliefState>(copyBset);
                    foreach (BeliefState temp_bs in copyBset_inner)
                    {
                        double __reward = _alpha.InnerProduct(temp_bs);
                        double curr_val = this.m_valueFunction[temp_bs].InnerProduct(temp_bs);
                        if (curr_val < __reward)
                        {
                            this.m_valueFunction[temp_bs] = _alpha;
                            copyBset.Remove(temp_bs);
                        }
                    }
                }

                else
                {
                    copyBset.Remove(_bs);
                    double      max_reward = double.NegativeInfinity;
                    AlphaVector max_alpha  = null;
                    foreach (AlphaVector alpha in m_lVectors)
                    {
                        double reward = alpha.InnerProduct(_bs);
                        if (reward > max_reward)
                        {
                            max_reward = reward;
                            max_alpha  = alpha;
                        }
                    }
                    if (!temp_lVectors.Contains(max_alpha))
                    {
                        temp_lVectors.Add(max_alpha);
                    }
                    this.m_valueFunction[_bs] = max_alpha;
                }
            }
            //this.m_lVectors = new List<AlphaVector>();
            this.m_lVectors = temp_lVectors;
            //foreach (AlphaVector updated_alpha in m_valueFunction.Values)
            //{
            //   if(!this.m_lVectors.Contains(updated_alpha))
            //      this.m_lVectors.Add(updated_alpha);
            //}
        }
Пример #8
0
        public double InnerProduct(BeliefState bs)
        {
            double dSum = 0.0;

            foreach (KeyValuePair <State, double> p in m_dValues)
            {
                dSum += p.Value * bs[p.Key];
            }
            return(dSum);
        }
Пример #9
0
        private List <BeliefState> CopyB(List <BeliefState> B)
        {
            List <BeliefState> Btag = new List <BeliefState>();

            foreach (BeliefState bs in B)
            {
                BeliefState newBeliefState = new BeliefState(bs);
                Btag.Add(newBeliefState);
            }
            return(Btag);
        }
Пример #10
0
        /**
         * Computes the best alpha vector with action on root for belief state bs
         * (alpha_action_bs)
         */
        private AlphaVector computeBestAlpha(Action action, BeliefState bs)
        {
            //initializing an alpha vector with action on its root
            AlphaVector discountedRewardVector = new AlphaVector(action);

            // We loop over all observations and alpha vectors for each observation obs,
            // we find the alpha vector maximizing dot(bs,alpha_action_obs) - we will use
            // these vectors (their sum) in order to calculate alpha_a_b
            foreach (Observation obs in m_dDomain.Observations)
            { //We compute alpha_a_o for every observation o, according to the equation in the slides
                AlphaVector cur_alpha_ao = null;
                AlphaVector best_alpha_ao = new AlphaVector();
                double      best_val = double.NegativeInfinity;  double cur_val = 0;

                //Looping over all alpha vectors, finding the best alpha that maximizes dot(bs,alpha_action_obs)
                foreach (AlphaVector av in m_lVectors)
                {
                    //We compute av_action_obs for every av
                    cur_alpha_ao = computeAlphaAO(av, action, obs);
                    // dot product between av_action_obs abd the belief state bs
                    cur_val = cur_alpha_ao.InnerProduct(bs);
                    //We take the vector maximizing the dot product
                    if (cur_val > best_val)
                    {
                        best_alpha_ao = cur_alpha_ao;
                        best_val      = cur_val;
                    }
                }
                //We compute the sum of these vectors, (SUM(arg max(dot(bs,alpha_bs_a))))
                discountedRewardVector += best_alpha_ao;
            }
            // Multiplying it with the discount factor
            discountedRewardVector = discountedRewardVector * m_dDomain.DiscountFactor;

            AlphaVector rA; //action's rewards vector, We add it to the sum, and return the result

            if (rewardsVectors.ContainsKey(action))
            {
                rA = rewardsVectors[action];
            }
            else
            {
                rA = new AlphaVector();
                foreach (State s in m_dDomain.States)
                {
                    rA[s] = s.Reward(action);
                }
                rewardsVectors[action] = rA;
            }

            return(discountedRewardVector + rA);
        }
Пример #11
0
        private AlphaVector ArgMax(List <AlphaVector> m_lVectors, BeliefState b)
        {
            AlphaVector maxAlphaVector = new AlphaVector();

            foreach (AlphaVector aVector in m_lVectors)
            {
                if (aVector.InnerProduct(b) > maxAlphaVector.InnerProduct(b))
                {
                    maxAlphaVector = aVector;
                }
            }
            return(maxAlphaVector);
        }
Пример #12
0
        private double ValueOf(BeliefState bs, List <AlphaVector> lVectors, out AlphaVector avBest)
        {
            double dValue = 0.0, dMaxValue = double.NegativeInfinity;

            avBest = null;
            foreach (AlphaVector av in lVectors)
            {
                dValue = av.InnerProduct(bs);
                if (dValue > dMaxValue)
                {
                    dMaxValue = dValue;
                    avBest    = av;
                }
            }
            return(dMaxValue);
        }
Пример #13
0
        public BeliefState Next(Action a, Observation o)
        {
            BeliefState bsNext = new BeliefState(m_dDomain); //Represents the new belief state b_o_s

            double normalizing_factor = 0;                   //We will divide our resulted belief state by this factor, instead of calculating Pr(o|a,b)

            HashSet <State> reachableStates = new HashSet <State>();

            // The neighboring states are the union of all neighboring states
            // of states with positive probability on current belief state.
            // When we calculate the new distribution over states, we just need
            // to look on S' such that Tr(S,a,S')>0
            foreach (KeyValuePair <State, double> entry in m_dBeliefs)
            {
                if (entry.Value > 0)
                {
                    foreach (State s in entry.Key.Successors(a))
                    {
                        reachableStates.Add(s);
                        // We optimize the calculation by adding the weighted transition value as we build the reachableStates Set
                        // Instead of first calculating the set and only then finding all its ancenstors and perform the calculation
                        bsNext.AddBelief(s, entry.Value * entry.Key.TransitionProbability(a, s));
                    }
                }
            }

            foreach (State s_prime in reachableStates)
            {
                double trans_prob = 0;
                double obs_prob   = s_prime.ObservationProbability(a, o); // We Calculate O(o,s',a)*(b\dot\Tr(s',a))
                trans_prob = bsNext[s_prime];
                // for each state s_prime trans_prob equals O(s_prime,a,o)*dot(b,Tr(s,a,s_prime))
                trans_prob *= obs_prob;
                //The normalizing factor is sum of all values, we divide the vector by this number to make it a distribution
                normalizing_factor += trans_prob;
                // Updating the new belief state
                bsNext[s_prime] = trans_prob;
            }

            foreach (State s in reachableStates)
            {
                bsNext[s] /= normalizing_factor;
            }

            Debug.Assert(bsNext.Validate());
            return(bsNext);
        }
Пример #14
0
        /**
         * Calculates the value of a belief state bs w.r.t a list to alpha vectors.
         * i.e finds the alpha vector alpha that maximizes dot(bs,alpha), returns the value of
         * this dot product, and return the vector as avBest
         *
         *
         */
        private double ValueOf(BeliefState bs, List <AlphaVector> lVectors, out AlphaVector avBest)
        {
            double dValue = 0.0, dMaxValue = double.NegativeInfinity;

            avBest = null;
            //We loop over all alpha vectors
            foreach (AlphaVector av in lVectors)
            {
                dValue = av.InnerProduct(bs);
                if (dValue > dMaxValue) //taking the maximum dot product
                {
                    dMaxValue = dValue;
                    avBest    = av;
                }
            }
            return(dMaxValue);
        }
Пример #15
0
        private List <BeliefState> GenerateB(int cBeliefs, Random rand)
        {
            List <BeliefState> B     = new List <BeliefState>();
            BeliefState        initB = m_dDomain.InitialBelief;
            int n = cBeliefs;

            while (n > 0)
            {
                Action      a     = (m_dDomain.GetRandomAction(rand));
                Observation oCurr = initB.RandomObservation(a);
                BeliefState bNext = initB.Next(a, oCurr);
                B.Add(bNext);
                initB = bNext;
                n--;
            }
            return(B);
        }
Пример #16
0
        private AlphaVector backup(BeliefState bs)
        {
            AlphaVector avBest = null, avCurrent = null;
            double      dMaxValue = double.NegativeInfinity, dValue = 0.0;

            //your code here
            foreach (var action in m_dDomain.Actions)
            {
                avCurrent = G(bs, action);
                dValue    = avCurrent.InnerProduct(bs);
                if (dValue > dMaxValue)
                {
                    dMaxValue = dValue;
                    avBest    = avCurrent;
                }
            }
            return(avBest);
        }
Пример #17
0
        /**
         *  The Backup operation, receives a belief state bs, and returns
         *  Backup(m_lVectors,bs)
         * (The best alpha vector alpha_a_bs maximizing
         * dot(b,alpha_a_bs))
         *
         */
        private AlphaVector backup(BeliefState bs)
        {
            AlphaVector avBest = new AlphaVector(), avCurrent = new AlphaVector();
            double      dMaxValue = double.NegativeInfinity, dValue = 0.0;

            //We loop over all actions in domain, and for every action a
            //we take the best alpha vector with a on its root
            foreach (Action a in m_dDomain.Actions)
            {
                avCurrent = computeBestAlpha(a, bs);    // alpha_a_b
                dValue    = avCurrent.InnerProduct(bs); // dot product with bs
                if (dValue > dMaxValue)
                {                                       // taking the vector alpha_a_b that maximizes the dot product
                    avBest    = avCurrent;
                    dMaxValue = dValue;
                }
            }
            return(avBest); //returns the best alpha_a_bs
        }
Пример #18
0
        private List <BeliefState> SimulateTrial(Policy p, int cMaxSteps)
        {
            BeliefState        bsCurrent = m_dDomain.InitialBelief, bsNext = null;
            State              sCurrent = bsCurrent.RandomState(), sNext = null;
            Action             a        = null;
            Observation        o        = null;
            List <BeliefState> lBeliefs = new List <BeliefState>();

            while (!m_dDomain.IsGoalState(sCurrent) && lBeliefs.Count < cMaxSteps)
            {
                a         = p.GetAction(bsCurrent);
                sNext     = sCurrent.Apply(a);
                o         = sNext.RandomObservation(a);
                bsNext    = bsCurrent.Next(a, o);
                bsCurrent = bsNext;
                lBeliefs.Add(bsCurrent);
                sCurrent = sNext;
            }
            return(lBeliefs);
        }
Пример #19
0
         /**
         * a recursive function performing one trial with stepsLeft steps, we are given a policy p,
         * stepsLeft, current state state, and current belief state bs
         * 
         * a) compute the action for the belief state bs
         * b) sample the result of applying a to s, obtaining nextState.
         * c) sample an observation o based on a and nextState
         * d) compute the new belief state given your old belief state, a, and o.
         * e) call the function recursively with the same policy p, stepsLeft-1, the new state, and the new
         * belief state. finally we accumalate the reward.
         * 
         * 
         */
        private double calcTrialReward(Policy p, int stepsLeft, State state, BeliefState bs)
        {
            // If we are already in a goal state or no steps are left then the reward is 0
            if (IsGoalState(state) || stepsLeft == 0)
                return 0;

            //Calculating the action for the belief state based on the policy
            Action a = p.GetAction(bs);
            
            //applying a on state, resulting in a new state nextState
            State nextState = state.Apply(a);
            //The reward of performing a on state
            double reward = nextState.Reward(a);
            // We sample an observation based on nextState and a
            Observation o = nextState.RandomObservation(a);

            // Updating the reward, calling the function recursively so we continue the "forward search" to goal state 
            reward += this.DiscountFactor * calcTrialReward(p, stepsLeft-1, nextState, bs.Next(a,o));
            return reward;
        }
Пример #20
0
        public double ComputeAverageDiscountedReward(Policy p, int cTrials, int cStepsPerTrial)
        {
            //your code here
            double        to_ret  = 0.0;
            List <double> rewards = new List <double>();

            for (int i = 0; i < cTrials; i++)
            {
                State       target             = sampleInitialState();
                BeliefState currentBeliefState = InitialBelief;
                double      sumRewards         = 0.0;
                int         counter            = 0;
                //while ((!IsGoalState(target)))
                while ((!IsGoalState(target) && counter < cStepsPerTrial))
                {
                    Action a        = p.GetAction(currentBeliefState);
                    State  newState = target.Apply(a: a);
                    List <KeyValuePair <Observation, double> > probabilitiesForObservation = new List <KeyValuePair <Observation, double> >();
                    double sum = 0.0;
                    foreach (Observation obs in Observations)
                    {
                        double prob = newState.ObservationProbability(a: a, o: obs);
                        sum += prob;
                        probabilitiesForObservation.Add(new KeyValuePair <Observation, double>(obs, sum));
                    }
                    Observation newObservation = samplingObservations(probabilitiesForObservation);
                    double      reward         = currentBeliefState.Reward(a);
                    currentBeliefState = currentBeliefState.Next(a: a, o: newObservation);
                    sumRewards        += reward * Math.Pow(DiscountFactor, counter);
                    counter++;
                    target = newState;
                }
                rewards.Add(sumRewards);
            }

            foreach (double r in rewards)
            {
                to_ret += r;
            }
            return((to_ret) / cTrials);
        }
Пример #21
0
        //returns the best alphaVector corresponds to a certain belief state
        private AlphaVector Backup(BeliefState bs)
        {
            AlphaVector avBest = null;
            //AlphaVector avCurrent = null;
            double dMaxValue = double.NegativeInfinity, dValue = 0.0;

            foreach (Action aCurr in m_dDomain.Actions)
            {
                foreach (AlphaVector avCurr in m_lVectors)
                {
                    AlphaVector avBA = G(bs, aCurr);
                    dValue = avBA.InnerProduct(bs);
                    if (dMaxValue < dValue)
                    {
                        dMaxValue = dValue;
                        avBest    = avCurr;
                    }
                }
            }
            return(avBest);
        }
Пример #22
0
        public void PointBasedVI(int cBeliefs, int cMaxIterations)
        {
            Random             rand = new Random();
            List <BeliefState> B    = GenerateB(cBeliefs, rand);

            InitV();
            m_dGCache = new Dictionary <AlphaVector, Dictionary <Action, Dictionary <Observation, AlphaVector> > >();
            List <BeliefState> BTag;

            while (cMaxIterations > 0)
            {
                BTag = CopyB(B);
                List <AlphaVector> VTag = new List <AlphaVector>();
                while (BTag.Count != 0)
                {
                    //choose arbitrary point in BTag to improve
                    BeliefState bCurr     = RandomBeliefState(BTag, rand);
                    AlphaVector newAV     = Backup(bCurr);
                    AlphaVector avBest    = new AlphaVector();
                    double      currValue = ValueOf(bCurr, m_lVectors, out avBest);
                    double      AlphaDotb = newAV.InnerProduct(bCurr);
                    if (AlphaDotb > currValue)
                    {
                        //remove from B points whose value was improved by new newAV
                        BTag.Where(b => newAV.InnerProduct(b) >= ValueOf(b, m_lVectors, out AlphaVector avTmp)).ToList();
                        avBest = newAV;
                    }
                    else
                    {
                        BTag.Remove(bCurr);
                        avBest = ArgMax(m_lVectors, b: bCurr);
                    }
                    VTag.Add(avBest);
                }
                m_lVectors = VTag;
                cMaxIterations--;
            }
        }
Пример #23
0
        private void SimulateTrial(Policy p, MazeViewer viewer)
        {
            BeliefState bsCurrent = InitialBelief, bsNext = null;
            State       sCurrent = bsCurrent.sampleState(), sNext = null;
            Action      a = null;
            Observation o = null;

            viewer.CurrentState  = (MazeState)sCurrent;
            viewer.CurrentBelief = bsCurrent;
            while (!IsGoalState(sCurrent))
            {
                a                         = p.GetAction(bsCurrent);
                sNext                     = sCurrent.Apply(a);
                o                         = sNext.RandomObservation(a);
                bsNext                    = bsCurrent.Next(a, o);
                bsCurrent                 = bsNext;
                sCurrent                  = sNext;
                viewer.CurrentState       = (MazeState)sCurrent;
                viewer.CurrentBelief      = bsCurrent;
                viewer.CurrentObservation = (MazeObservation)o;
                Thread.Sleep(500);
            }
        }
Пример #24
0
 public abstract Action GetAction(BeliefState bs);
Пример #25
0
        public override Action GetAction(BeliefState bs)
        {
            int idx = RandomGenerator.Next(m_lActions.Count);

            return(m_lActions[idx]);
        }
Пример #26
0
 public override Action GetAction(BeliefState bs)
 {
     //your code here
     throw new NotImplementedException();
 }
Пример #27
0
        /**
         * Performs the Value Iteration algorithm using the Perseus update algorithm,
         * generates a set containing cBelief belief states, and performs value iterations for
         * maximum cMaxIterations
         *
         */
        public void PointBasedVI(int cBeliefs, int cMaxIterations)
        {
            //Generates an initial set containing cBelief belief states
            List <BeliefState> beliefStates = CollectBeliefs(cBeliefs);
            List <AlphaVector> vTag; //V'

            //const double EPSILON = 0.1; //The convergence boundry
            int iterationsLeft = cMaxIterations;

            while (iterationsLeft > 0)
            {
                vTag = new List <AlphaVector>();
                List <BeliefState> beliefStatesLeftToImprove = new List <BeliefState>(beliefStates); // B'
                while (beliefStatesLeftToImprove.Count() > 0)
                {                                                                                    //While there are belief states to improve
                  //Console.WriteLine("Improvable belief states left");
                  //Console.WriteLine(beliefStatesLeftToImprove.Count());

                    //selecting a random index of a belief state to improve
                    int ri = RandomGenerator.Next(beliefStatesLeftToImprove.Count());

                    //We want to iterate over the belief states set and recieve the ri'th item
                    List <BeliefState> .Enumerator e = beliefStatesLeftToImprove.GetEnumerator();
                    for (int i = 0; i < ri + 1; i++) //iterating until the belief state at index ri
                    {
                        e.MoveNext();
                    }
                    BeliefState sampledBS = e.Current;//samplesBS is a randomly chosen belief state to for improvement

                    //Console.WriteLine("Iterations left: " + iterationsLeft);
                    //Console.WriteLine("Improvable bs left: " + beliefStatesLeftToImprove.Count());

                    //We calculate the backup of samplesBS
                    AlphaVector alpha = backup(sampledBS);
                    AlphaVector alphaToAdd;//It will contain the alpha vector to add to V'

                    AlphaVector prevBestAlphaVector = null;
                    //calculating the value of sampledBS (V(samplesBS)) which is the best dot product alpha*b
                    double prevValue = ValueOf(sampledBS, m_lVectors, out prevBestAlphaVector);

                    if (alpha.InnerProduct(sampledBS) >= prevValue) // alpha is dominating, remove all belief states that are improved by it
                    {
                        //Console.WriteLine("Found an improving vec");
                        List <BeliefState> beliefStatesToKeep = new List <BeliefState>();
                        foreach (BeliefState b_prime in beliefStatesLeftToImprove)
                        {
                            AlphaVector a = null;
                            if (alpha.InnerProduct(b_prime) < ValueOf(b_prime, m_lVectors, out a))
                            {
                                beliefStatesToKeep.Add(b_prime);
                            }
                        }
                        beliefStatesLeftToImprove = beliefStatesToKeep;
                        //In the case alpha is dominating, we add alpha to V'
                        alphaToAdd = alpha;
                    }

                    else
                    { //alpha does not improve,we remove sampledBS from the set
                        beliefStatesLeftToImprove.Remove(sampledBS);
                        alphaToAdd = prevBestAlphaVector;
                    }

                    if (!vTag.Contains(alphaToAdd))
                    {
                        vTag.Add(alphaToAdd);
                    }
                }

                /**
                 * //We estimate how the alpha vectors set was changed
                 * double diff = estimateDiff(m_lVectors, vTag, beliefStates);
                 * Console.WriteLine(diff);
                 *
                 * //The difference between the current set, and the previous is less than epsilon
                 * //We finish the update algorithm
                 * //if (diff < EPSILON)
                 * //   break;
                 **/

                Console.WriteLine("Iterations left {0}", iterationsLeft);
                m_lVectors = vTag;
                iterationsLeft--;
            }
        }