예제 #1
0
        private void pruneAlphaVector(List <BeliefState> bsSet)
        {
            List <BeliefState> copyBset      = new List <BeliefState>(bsSet);
            List <AlphaVector> temp_lVectors = new List <AlphaVector>();

            while (copyBset.Any())
            {
                BeliefState _bs     = copyBset.ElementAt(0);
                AlphaVector _alpha  = backup(_bs);
                double      _reward = _alpha.InnerProduct(_bs);
                if (this.m_valueFunction[_bs].InnerProduct(_bs) < _reward)
                {
                    this.m_valueFunction[_bs] = _alpha;
                    temp_lVectors.Add(_alpha);
                    copyBset.Remove(_bs);
                    List <BeliefState> copyBset_inner = new List <BeliefState>(copyBset);
                    foreach (BeliefState temp_bs in copyBset_inner)
                    {
                        double __reward = _alpha.InnerProduct(temp_bs);
                        double curr_val = this.m_valueFunction[temp_bs].InnerProduct(temp_bs);
                        if (curr_val < __reward)
                        {
                            this.m_valueFunction[temp_bs] = _alpha;
                            copyBset.Remove(temp_bs);
                        }
                    }
                }

                else
                {
                    copyBset.Remove(_bs);
                    double      max_reward = double.NegativeInfinity;
                    AlphaVector max_alpha  = null;
                    foreach (AlphaVector alpha in m_lVectors)
                    {
                        double reward = alpha.InnerProduct(_bs);
                        if (reward > max_reward)
                        {
                            max_reward = reward;
                            max_alpha  = alpha;
                        }
                    }
                    if (!temp_lVectors.Contains(max_alpha))
                    {
                        temp_lVectors.Add(max_alpha);
                    }
                    this.m_valueFunction[_bs] = max_alpha;
                }
            }
            //this.m_lVectors = new List<AlphaVector>();
            this.m_lVectors = temp_lVectors;
            //foreach (AlphaVector updated_alpha in m_valueFunction.Values)
            //{
            //   if(!this.m_lVectors.Contains(updated_alpha))
            //      this.m_lVectors.Add(updated_alpha);
            //}
        }
예제 #2
0
        //generate a new alpha vector from a belief state and an action
        private AlphaVector G(BeliefState bs, Action a)
        {
            AlphaVector avSum = new AlphaVector(a);
            AlphaVector avGMax = null;
            double      dValue = 0.0, dMaxValue = double.NegativeInfinity;

            foreach (Observation o in m_dDomain.Observations)
            {
                dMaxValue = double.NegativeInfinity;
                avGMax    = null;
                foreach (AlphaVector avCurrent in m_lVectors)
                {
                    AlphaVector avG = G(a, o, avCurrent);
                    dValue = avG.InnerProduct(bs);
                    if (dValue > dMaxValue)
                    {
                        dMaxValue = dValue;
                        avGMax    = avG;
                    }
                }
                avSum += avGMax;
            }
            avSum *= m_dDomain.DiscountFactor;
            AlphaVector avResult = new AlphaVector(a);

            foreach (State s in m_dDomain.States)
            {
                avResult[s] = avSum[s] + s.Reward(a);
            }
            return(avResult);
        }
예제 #3
0
        /**
         * Computes the best alpha vector with action on root for belief state bs
         * (alpha_action_bs)
         */
        private AlphaVector computeBestAlpha(Action action, BeliefState bs)
        {
            //initializing an alpha vector with action on its root
            AlphaVector discountedRewardVector = new AlphaVector(action);

            // We loop over all observations and alpha vectors for each observation obs,
            // we find the alpha vector maximizing dot(bs,alpha_action_obs) - we will use
            // these vectors (their sum) in order to calculate alpha_a_b
            foreach (Observation obs in m_dDomain.Observations)
            { //We compute alpha_a_o for every observation o, according to the equation in the slides
                AlphaVector cur_alpha_ao = null;
                AlphaVector best_alpha_ao = new AlphaVector();
                double      best_val = double.NegativeInfinity;  double cur_val = 0;

                //Looping over all alpha vectors, finding the best alpha that maximizes dot(bs,alpha_action_obs)
                foreach (AlphaVector av in m_lVectors)
                {
                    //We compute av_action_obs for every av
                    cur_alpha_ao = computeAlphaAO(av, action, obs);
                    // dot product between av_action_obs abd the belief state bs
                    cur_val = cur_alpha_ao.InnerProduct(bs);
                    //We take the vector maximizing the dot product
                    if (cur_val > best_val)
                    {
                        best_alpha_ao = cur_alpha_ao;
                        best_val      = cur_val;
                    }
                }
                //We compute the sum of these vectors, (SUM(arg max(dot(bs,alpha_bs_a))))
                discountedRewardVector += best_alpha_ao;
            }
            // Multiplying it with the discount factor
            discountedRewardVector = discountedRewardVector * m_dDomain.DiscountFactor;

            AlphaVector rA; //action's rewards vector, We add it to the sum, and return the result

            if (rewardsVectors.ContainsKey(action))
            {
                rA = rewardsVectors[action];
            }
            else
            {
                rA = new AlphaVector();
                foreach (State s in m_dDomain.States)
                {
                    rA[s] = s.Reward(action);
                }
                rewardsVectors[action] = rA;
            }

            return(discountedRewardVector + rA);
        }
예제 #4
0
        private AlphaVector ArgMax(List <AlphaVector> m_lVectors, BeliefState b)
        {
            AlphaVector maxAlphaVector = new AlphaVector();

            foreach (AlphaVector aVector in m_lVectors)
            {
                if (aVector.InnerProduct(b) > maxAlphaVector.InnerProduct(b))
                {
                    maxAlphaVector = aVector;
                }
            }
            return(maxAlphaVector);
        }
예제 #5
0
        public void PointBasedVI(int cBeliefs, int cMaxIterations)
        {
            Random             rand = new Random();
            List <BeliefState> B    = GenerateB(cBeliefs, rand);

            InitV();
            m_dGCache = new Dictionary <AlphaVector, Dictionary <Action, Dictionary <Observation, AlphaVector> > >();
            List <BeliefState> BTag;

            while (cMaxIterations > 0)
            {
                BTag = CopyB(B);
                List <AlphaVector> VTag = new List <AlphaVector>();
                while (BTag.Count != 0)
                {
                    //choose arbitrary point in BTag to improve
                    BeliefState bCurr     = RandomBeliefState(BTag, rand);
                    AlphaVector newAV     = Backup(bCurr);
                    AlphaVector avBest    = new AlphaVector();
                    double      currValue = ValueOf(bCurr, m_lVectors, out avBest);
                    double      AlphaDotb = newAV.InnerProduct(bCurr);
                    if (AlphaDotb > currValue)
                    {
                        //remove from B points whose value was improved by new newAV
                        BTag.Where(b => newAV.InnerProduct(b) >= ValueOf(b, m_lVectors, out AlphaVector avTmp)).ToList();
                        avBest = newAV;
                    }
                    else
                    {
                        BTag.Remove(bCurr);
                        avBest = ArgMax(m_lVectors, b: bCurr);
                    }
                    VTag.Add(avBest);
                }
                m_lVectors = VTag;
                cMaxIterations--;
            }
        }
예제 #6
0
        private AlphaVector backup(BeliefState bs)
        {
            AlphaVector avBest = null, avCurrent = null;
            double      dMaxValue = double.NegativeInfinity, dValue = 0.0;

            //your code here
            foreach (var action in m_dDomain.Actions)
            {
                avCurrent = G(bs, action);
                dValue    = avCurrent.InnerProduct(bs);
                if (dValue > dMaxValue)
                {
                    dMaxValue = dValue;
                    avBest    = avCurrent;
                }
            }
            return(avBest);
        }
예제 #7
0
        /**
         *  The Backup operation, receives a belief state bs, and returns
         *  Backup(m_lVectors,bs)
         * (The best alpha vector alpha_a_bs maximizing
         * dot(b,alpha_a_bs))
         *
         */
        private AlphaVector backup(BeliefState bs)
        {
            AlphaVector avBest = new AlphaVector(), avCurrent = new AlphaVector();
            double      dMaxValue = double.NegativeInfinity, dValue = 0.0;

            //We loop over all actions in domain, and for every action a
            //we take the best alpha vector with a on its root
            foreach (Action a in m_dDomain.Actions)
            {
                avCurrent = computeBestAlpha(a, bs);    // alpha_a_b
                dValue    = avCurrent.InnerProduct(bs); // dot product with bs
                if (dValue > dMaxValue)
                {                                       // taking the vector alpha_a_b that maximizes the dot product
                    avBest    = avCurrent;
                    dMaxValue = dValue;
                }
            }
            return(avBest); //returns the best alpha_a_bs
        }
예제 #8
0
        //returns the best alphaVector corresponds to a certain belief state
        private AlphaVector Backup(BeliefState bs)
        {
            AlphaVector avBest = null;
            //AlphaVector avCurrent = null;
            double dMaxValue = double.NegativeInfinity, dValue = 0.0;

            foreach (Action aCurr in m_dDomain.Actions)
            {
                foreach (AlphaVector avCurr in m_lVectors)
                {
                    AlphaVector avBA = G(bs, aCurr);
                    dValue = avBA.InnerProduct(bs);
                    if (dMaxValue < dValue)
                    {
                        dMaxValue = dValue;
                        avBest    = avCurr;
                    }
                }
            }
            return(avBest);
        }
예제 #9
0
        /**
         * Performs the Value Iteration algorithm using the Perseus update algorithm,
         * generates a set containing cBelief belief states, and performs value iterations for
         * maximum cMaxIterations
         *
         */
        public void PointBasedVI(int cBeliefs, int cMaxIterations)
        {
            //Generates an initial set containing cBelief belief states
            List <BeliefState> beliefStates = CollectBeliefs(cBeliefs);
            List <AlphaVector> vTag; //V'

            //const double EPSILON = 0.1; //The convergence boundry
            int iterationsLeft = cMaxIterations;

            while (iterationsLeft > 0)
            {
                vTag = new List <AlphaVector>();
                List <BeliefState> beliefStatesLeftToImprove = new List <BeliefState>(beliefStates); // B'
                while (beliefStatesLeftToImprove.Count() > 0)
                {                                                                                    //While there are belief states to improve
                  //Console.WriteLine("Improvable belief states left");
                  //Console.WriteLine(beliefStatesLeftToImprove.Count());

                    //selecting a random index of a belief state to improve
                    int ri = RandomGenerator.Next(beliefStatesLeftToImprove.Count());

                    //We want to iterate over the belief states set and recieve the ri'th item
                    List <BeliefState> .Enumerator e = beliefStatesLeftToImprove.GetEnumerator();
                    for (int i = 0; i < ri + 1; i++) //iterating until the belief state at index ri
                    {
                        e.MoveNext();
                    }
                    BeliefState sampledBS = e.Current;//samplesBS is a randomly chosen belief state to for improvement

                    //Console.WriteLine("Iterations left: " + iterationsLeft);
                    //Console.WriteLine("Improvable bs left: " + beliefStatesLeftToImprove.Count());

                    //We calculate the backup of samplesBS
                    AlphaVector alpha = backup(sampledBS);
                    AlphaVector alphaToAdd;//It will contain the alpha vector to add to V'

                    AlphaVector prevBestAlphaVector = null;
                    //calculating the value of sampledBS (V(samplesBS)) which is the best dot product alpha*b
                    double prevValue = ValueOf(sampledBS, m_lVectors, out prevBestAlphaVector);

                    if (alpha.InnerProduct(sampledBS) >= prevValue) // alpha is dominating, remove all belief states that are improved by it
                    {
                        //Console.WriteLine("Found an improving vec");
                        List <BeliefState> beliefStatesToKeep = new List <BeliefState>();
                        foreach (BeliefState b_prime in beliefStatesLeftToImprove)
                        {
                            AlphaVector a = null;
                            if (alpha.InnerProduct(b_prime) < ValueOf(b_prime, m_lVectors, out a))
                            {
                                beliefStatesToKeep.Add(b_prime);
                            }
                        }
                        beliefStatesLeftToImprove = beliefStatesToKeep;
                        //In the case alpha is dominating, we add alpha to V'
                        alphaToAdd = alpha;
                    }

                    else
                    { //alpha does not improve,we remove sampledBS from the set
                        beliefStatesLeftToImprove.Remove(sampledBS);
                        alphaToAdd = prevBestAlphaVector;
                    }

                    if (!vTag.Contains(alphaToAdd))
                    {
                        vTag.Add(alphaToAdd);
                    }
                }

                /**
                 * //We estimate how the alpha vectors set was changed
                 * double diff = estimateDiff(m_lVectors, vTag, beliefStates);
                 * Console.WriteLine(diff);
                 *
                 * //The difference between the current set, and the previous is less than epsilon
                 * //We finish the update algorithm
                 * //if (diff < EPSILON)
                 * //   break;
                 **/

                Console.WriteLine("Iterations left {0}", iterationsLeft);
                m_lVectors = vTag;
                iterationsLeft--;
            }
        }