예제 #1
0
        //problem: at the end of getAction, avBest is still null, maybe m_lVectors is null?
        public override Action GetAction(BeliefState bs)
        {
            AlphaVector avBest = null;

            ValueOf(bs, m_lVectors, out avBest);
            return(avBest.Action);
        }
예제 #2
0
        private AlphaVector G(Action a, Observation o, AlphaVector av)
        {
            if (!m_dGCache.ContainsKey(av))
            {
                m_dGCache[av] = new Dictionary <Action, Dictionary <Observation, AlphaVector> >();
            }
            if (!m_dGCache[av].ContainsKey(a))
            {
                m_dGCache[av][a] = new Dictionary <Observation, AlphaVector>();
            }
            if (m_dGCache[av][a].ContainsKey(o))
            {
                return(m_dGCache[av][a][o]);
            }
            AlphaVector avNew = new AlphaVector(a);

            foreach (State s in m_dDomain.States)
            {
                double dSum = 0.0;
                foreach (State sTag in m_dDomain.States)
                {
                    dSum += sTag.ObservationProbability(a, o) * s.TransitionProbability(a, sTag) * av[sTag];
                }
                avNew[s] = dSum;
            }
            m_dGCache[av][a][o] = avNew;
            return(avNew);
        }
예제 #3
0
        /**
         * Receives alpha vector alpha, action a, and observation o.
         * calculates and returns alpha_a_o
         * (alpha_a_o[s]=SUM(alpha[s']*O(a,s',o)*T(s,a,s')))
         */
        private AlphaVector computeAlphaAO(AlphaVector alpha, Action a, Observation o)
        {
            Tuple <AlphaVector, Action, Observation> key = new Tuple <AlphaVector, Action, Observation>(alpha, a, o);

            if (m_dAlphaA_O.ContainsKey(key))
            {
                return(m_dAlphaA_O[key]);
            }

            else
            {
                AlphaVector res = new AlphaVector(a);
                //We loop over all states s, for each s we compute alpha_a_o[s]
                foreach (State s in m_dDomain.States)
                {
                    double accumulated_sum = 0;
                    res[s] = 0;
                    //Looping only on successors of s because only for them T(s,a,succ)>0
                    foreach (State succ in s.Successors(a))
                    {
                        accumulated_sum += (alpha[succ] * succ.ObservationProbability(a, o) * s.TransitionProbability(a, succ));
                    }
                    res[s] = accumulated_sum;
                }
                m_dAlphaA_O.Add(key, res);
                return(res);
            }
        }
예제 #4
0
        //generate a new alpha vector from a belief state and an action
        private AlphaVector G(BeliefState bs, Action a)
        {
            AlphaVector avSum = new AlphaVector(a);
            AlphaVector avGMax = null;
            double      dValue = 0.0, dMaxValue = double.NegativeInfinity;

            foreach (Observation o in m_dDomain.Observations)
            {
                dMaxValue = double.NegativeInfinity;
                avGMax    = null;
                foreach (AlphaVector avCurrent in m_lVectors)
                {
                    AlphaVector avG = G(a, o, avCurrent);
                    dValue = avG.InnerProduct(bs);
                    if (dValue > dMaxValue)
                    {
                        dMaxValue = dValue;
                        avGMax    = avG;
                    }
                }
                avSum += avGMax;
            }
            avSum *= m_dDomain.DiscountFactor;
            AlphaVector avResult = new AlphaVector(a);

            foreach (State s in m_dDomain.States)
            {
                avResult[s] = avSum[s] + s.Reward(a);
            }
            return(avResult);
        }
예제 #5
0
        private void pruneAlphaVector(List <BeliefState> bsSet)
        {
            List <BeliefState> copyBset      = new List <BeliefState>(bsSet);
            List <AlphaVector> temp_lVectors = new List <AlphaVector>();

            while (copyBset.Any())
            {
                BeliefState _bs     = copyBset.ElementAt(0);
                AlphaVector _alpha  = backup(_bs);
                double      _reward = _alpha.InnerProduct(_bs);
                if (this.m_valueFunction[_bs].InnerProduct(_bs) < _reward)
                {
                    this.m_valueFunction[_bs] = _alpha;
                    temp_lVectors.Add(_alpha);
                    copyBset.Remove(_bs);
                    List <BeliefState> copyBset_inner = new List <BeliefState>(copyBset);
                    foreach (BeliefState temp_bs in copyBset_inner)
                    {
                        double __reward = _alpha.InnerProduct(temp_bs);
                        double curr_val = this.m_valueFunction[temp_bs].InnerProduct(temp_bs);
                        if (curr_val < __reward)
                        {
                            this.m_valueFunction[temp_bs] = _alpha;
                            copyBset.Remove(temp_bs);
                        }
                    }
                }

                else
                {
                    copyBset.Remove(_bs);
                    double      max_reward = double.NegativeInfinity;
                    AlphaVector max_alpha  = null;
                    foreach (AlphaVector alpha in m_lVectors)
                    {
                        double reward = alpha.InnerProduct(_bs);
                        if (reward > max_reward)
                        {
                            max_reward = reward;
                            max_alpha  = alpha;
                        }
                    }
                    if (!temp_lVectors.Contains(max_alpha))
                    {
                        temp_lVectors.Add(max_alpha);
                    }
                    this.m_valueFunction[_bs] = max_alpha;
                }
            }
            //this.m_lVectors = new List<AlphaVector>();
            this.m_lVectors = temp_lVectors;
            //foreach (AlphaVector updated_alpha in m_valueFunction.Values)
            //{
            //   if(!this.m_lVectors.Contains(updated_alpha))
            //      this.m_lVectors.Add(updated_alpha);
            //}
        }
예제 #6
0
        public static AlphaVector operator *(AlphaVector av, double dScalar)
        {
            AlphaVector avNew = new AlphaVector(av.Action);

            foreach (KeyValuePair <State, double> p in av.Values)
            {
                avNew.m_dValues[p.Key] = p.Value * dScalar;
            }
            return(avNew);
        }
예제 #7
0
 private void InitV()
 {
     m_lVectors = new List <AlphaVector>();
     foreach (Action a in m_dDomain.Actions)
     {
         AlphaVector newAV = new AlphaVector(a);
         newAV.InitAlphaVector(m_dDomain.States);
         m_lVectors.Add(newAV);
     }
 }
예제 #8
0
        /**
         * Computes the best alpha vector with action on root for belief state bs
         * (alpha_action_bs)
         */
        private AlphaVector computeBestAlpha(Action action, BeliefState bs)
        {
            //initializing an alpha vector with action on its root
            AlphaVector discountedRewardVector = new AlphaVector(action);

            // We loop over all observations and alpha vectors for each observation obs,
            // we find the alpha vector maximizing dot(bs,alpha_action_obs) - we will use
            // these vectors (their sum) in order to calculate alpha_a_b
            foreach (Observation obs in m_dDomain.Observations)
            { //We compute alpha_a_o for every observation o, according to the equation in the slides
                AlphaVector cur_alpha_ao = null;
                AlphaVector best_alpha_ao = new AlphaVector();
                double      best_val = double.NegativeInfinity;  double cur_val = 0;

                //Looping over all alpha vectors, finding the best alpha that maximizes dot(bs,alpha_action_obs)
                foreach (AlphaVector av in m_lVectors)
                {
                    //We compute av_action_obs for every av
                    cur_alpha_ao = computeAlphaAO(av, action, obs);
                    // dot product between av_action_obs abd the belief state bs
                    cur_val = cur_alpha_ao.InnerProduct(bs);
                    //We take the vector maximizing the dot product
                    if (cur_val > best_val)
                    {
                        best_alpha_ao = cur_alpha_ao;
                        best_val      = cur_val;
                    }
                }
                //We compute the sum of these vectors, (SUM(arg max(dot(bs,alpha_bs_a))))
                discountedRewardVector += best_alpha_ao;
            }
            // Multiplying it with the discount factor
            discountedRewardVector = discountedRewardVector * m_dDomain.DiscountFactor;

            AlphaVector rA; //action's rewards vector, We add it to the sum, and return the result

            if (rewardsVectors.ContainsKey(action))
            {
                rA = rewardsVectors[action];
            }
            else
            {
                rA = new AlphaVector();
                foreach (State s in m_dDomain.States)
                {
                    rA[s] = s.Reward(action);
                }
                rewardsVectors[action] = rA;
            }

            return(discountedRewardVector + rA);
        }
예제 #9
0
        private AlphaVector ArgMax(List <AlphaVector> m_lVectors, BeliefState b)
        {
            AlphaVector maxAlphaVector = new AlphaVector();

            foreach (AlphaVector aVector in m_lVectors)
            {
                if (aVector.InnerProduct(b) > maxAlphaVector.InnerProduct(b))
                {
                    maxAlphaVector = aVector;
                }
            }
            return(maxAlphaVector);
        }
예제 #10
0
        public static AlphaVector operator +(AlphaVector av1, AlphaVector av2)
        {
            AlphaVector avNew = new AlphaVector(av1);

            foreach (KeyValuePair <State, double> p in av2.Values)
            {
                if (!avNew.m_dValues.ContainsKey(p.Key))
                {
                    avNew.m_dValues[p.Key] = 0.0;
                }
                avNew.m_dValues[p.Key] += p.Value;
            }
            return(avNew);
        }
예제 #11
0
        private double ValueOf(BeliefState bs, List <AlphaVector> lVectors, out AlphaVector avBest)
        {
            double dValue = 0.0, dMaxValue = double.NegativeInfinity;

            avBest = null;
            foreach (AlphaVector av in lVectors)
            {
                dValue = av.InnerProduct(bs);
                if (dValue > dMaxValue)
                {
                    dMaxValue = dValue;
                    avBest    = av;
                }
            }
            return(dMaxValue);
        }
예제 #12
0
        /**
         * Calculates the value of a belief state bs w.r.t a list to alpha vectors.
         * i.e finds the alpha vector alpha that maximizes dot(bs,alpha), returns the value of
         * this dot product, and return the vector as avBest
         *
         *
         */
        private double ValueOf(BeliefState bs, List <AlphaVector> lVectors, out AlphaVector avBest)
        {
            double dValue = 0.0, dMaxValue = double.NegativeInfinity;

            avBest = null;
            //We loop over all alpha vectors
            foreach (AlphaVector av in lVectors)
            {
                dValue = av.InnerProduct(bs);
                if (dValue > dMaxValue) //taking the maximum dot product
                {
                    dMaxValue = dValue;
                    avBest    = av;
                }
            }
            return(dMaxValue);
        }
예제 #13
0
        private AlphaVector backup(BeliefState bs)
        {
            AlphaVector avBest = null, avCurrent = null;
            double      dMaxValue = double.NegativeInfinity, dValue = 0.0;

            //your code here
            foreach (var action in m_dDomain.Actions)
            {
                avCurrent = G(bs, action);
                dValue    = avCurrent.InnerProduct(bs);
                if (dValue > dMaxValue)
                {
                    dMaxValue = dValue;
                    avBest    = avCurrent;
                }
            }
            return(avBest);
        }
예제 #14
0
        /**
         *  The Backup operation, receives a belief state bs, and returns
         *  Backup(m_lVectors,bs)
         * (The best alpha vector alpha_a_bs maximizing
         * dot(b,alpha_a_bs))
         *
         */
        private AlphaVector backup(BeliefState bs)
        {
            AlphaVector avBest = new AlphaVector(), avCurrent = new AlphaVector();
            double      dMaxValue = double.NegativeInfinity, dValue = 0.0;

            //We loop over all actions in domain, and for every action a
            //we take the best alpha vector with a on its root
            foreach (Action a in m_dDomain.Actions)
            {
                avCurrent = computeBestAlpha(a, bs);    // alpha_a_b
                dValue    = avCurrent.InnerProduct(bs); // dot product with bs
                if (dValue > dMaxValue)
                {                                       // taking the vector alpha_a_b that maximizes the dot product
                    avBest    = avCurrent;
                    dMaxValue = dValue;
                }
            }
            return(avBest); //returns the best alpha_a_bs
        }
예제 #15
0
        //returns the best alphaVector corresponds to a certain belief state
        private AlphaVector Backup(BeliefState bs)
        {
            AlphaVector avBest = null;
            //AlphaVector avCurrent = null;
            double dMaxValue = double.NegativeInfinity, dValue = 0.0;

            foreach (Action aCurr in m_dDomain.Actions)
            {
                foreach (AlphaVector avCurr in m_lVectors)
                {
                    AlphaVector avBA = G(bs, aCurr);
                    dValue = avBA.InnerProduct(bs);
                    if (dMaxValue < dValue)
                    {
                        dMaxValue = dValue;
                        avBest    = avCurr;
                    }
                }
            }
            return(avBest);
        }
예제 #16
0
        public void PointBasedVI(int cBeliefs, int cMaxIterations)
        {
            Random             rand = new Random();
            List <BeliefState> B    = GenerateB(cBeliefs, rand);

            InitV();
            m_dGCache = new Dictionary <AlphaVector, Dictionary <Action, Dictionary <Observation, AlphaVector> > >();
            List <BeliefState> BTag;

            while (cMaxIterations > 0)
            {
                BTag = CopyB(B);
                List <AlphaVector> VTag = new List <AlphaVector>();
                while (BTag.Count != 0)
                {
                    //choose arbitrary point in BTag to improve
                    BeliefState bCurr     = RandomBeliefState(BTag, rand);
                    AlphaVector newAV     = Backup(bCurr);
                    AlphaVector avBest    = new AlphaVector();
                    double      currValue = ValueOf(bCurr, m_lVectors, out avBest);
                    double      AlphaDotb = newAV.InnerProduct(bCurr);
                    if (AlphaDotb > currValue)
                    {
                        //remove from B points whose value was improved by new newAV
                        BTag.Where(b => newAV.InnerProduct(b) >= ValueOf(b, m_lVectors, out AlphaVector avTmp)).ToList();
                        avBest = newAV;
                    }
                    else
                    {
                        BTag.Remove(bCurr);
                        avBest = ArgMax(m_lVectors, b: bCurr);
                    }
                    VTag.Add(avBest);
                }
                m_lVectors = VTag;
                cMaxIterations--;
            }
        }
예제 #17
0
 public override bool Equals(object obj)
 {
     if (obj is AlphaVector)
     {
         AlphaVector av = (AlphaVector)obj;
         foreach (KeyValuePair <State, double> p in m_dValues)
         {
             if (Math.Abs(p.Value - av[p.Key]) > 0.001)
             {
                 return(false);
             }
         }
         foreach (KeyValuePair <State, double> p in av.m_dValues)
         {
             if (Math.Abs(p.Value - this[p.Key]) > 0.001)
             {
                 return(false);
             }
         }
         return(true);
     }
     return(false);
 }
예제 #18
0
        /**
         * Initializes an alpha vector with some best practice presented in the provided article
         */
        private AlphaVector createInitialAlphaVector()
        {
            AlphaVector V0        = new AlphaVector();
            double      minReward = Double.PositiveInfinity;

            foreach (State s in m_dDomain.States)
            {
                foreach (Action a in m_dDomain.Actions)
                {
                    if (minReward > s.Reward(a))
                    {
                        minReward = s.Reward(a);
                    }
                }
            }
            double defaultVal = (1 / (1 - m_dDomain.DiscountFactor)) * minReward; //best practice

            foreach (State s in m_dDomain.States)
            {
                V0[s] = defaultVal;
            }

            return(V0);
        }
예제 #19
0
        public void PointBasedVI(int cBeliefs, int cMaxIterations)
        {
            // your code here
            List <BeliefState> setBeliefStates = CollectBeliefs(cBeliefs);

            this.m_lVectors = new List <AlphaVector>(this.m_dDomain.States.Count());
            foreach (BeliefState bs in setBeliefStates)
            {
                m_lVectors.Add(new AlphaVector());
            }
            List <AlphaVector> _m_lVectors = new List <AlphaVector>();

            foreach (BeliefState bs in setBeliefStates)
            {
                AlphaVector curr = backup(bs);
                _m_lVectors.Add(curr);
            }
            this.m_lVectors = new List <AlphaVector>(_m_lVectors);
            initialValueFunction(setBeliefStates);
            for (int i = 0; i < cMaxIterations; i++)
            {
                pruneAlphaVector(setBeliefStates);
            }
        }
예제 #20
0
 public AlphaVector(AlphaVector av)
 {
     Action    = av.Action;
     m_dValues = new Dictionary <State, double>(av.m_dValues);
 }
예제 #21
0
        /**
         * Performs the Value Iteration algorithm using the Perseus update algorithm,
         * generates a set containing cBelief belief states, and performs value iterations for
         * maximum cMaxIterations
         *
         */
        public void PointBasedVI(int cBeliefs, int cMaxIterations)
        {
            //Generates an initial set containing cBelief belief states
            List <BeliefState> beliefStates = CollectBeliefs(cBeliefs);
            List <AlphaVector> vTag; //V'

            //const double EPSILON = 0.1; //The convergence boundry
            int iterationsLeft = cMaxIterations;

            while (iterationsLeft > 0)
            {
                vTag = new List <AlphaVector>();
                List <BeliefState> beliefStatesLeftToImprove = new List <BeliefState>(beliefStates); // B'
                while (beliefStatesLeftToImprove.Count() > 0)
                {                                                                                    //While there are belief states to improve
                  //Console.WriteLine("Improvable belief states left");
                  //Console.WriteLine(beliefStatesLeftToImprove.Count());

                    //selecting a random index of a belief state to improve
                    int ri = RandomGenerator.Next(beliefStatesLeftToImprove.Count());

                    //We want to iterate over the belief states set and recieve the ri'th item
                    List <BeliefState> .Enumerator e = beliefStatesLeftToImprove.GetEnumerator();
                    for (int i = 0; i < ri + 1; i++) //iterating until the belief state at index ri
                    {
                        e.MoveNext();
                    }
                    BeliefState sampledBS = e.Current;//samplesBS is a randomly chosen belief state to for improvement

                    //Console.WriteLine("Iterations left: " + iterationsLeft);
                    //Console.WriteLine("Improvable bs left: " + beliefStatesLeftToImprove.Count());

                    //We calculate the backup of samplesBS
                    AlphaVector alpha = backup(sampledBS);
                    AlphaVector alphaToAdd;//It will contain the alpha vector to add to V'

                    AlphaVector prevBestAlphaVector = null;
                    //calculating the value of sampledBS (V(samplesBS)) which is the best dot product alpha*b
                    double prevValue = ValueOf(sampledBS, m_lVectors, out prevBestAlphaVector);

                    if (alpha.InnerProduct(sampledBS) >= prevValue) // alpha is dominating, remove all belief states that are improved by it
                    {
                        //Console.WriteLine("Found an improving vec");
                        List <BeliefState> beliefStatesToKeep = new List <BeliefState>();
                        foreach (BeliefState b_prime in beliefStatesLeftToImprove)
                        {
                            AlphaVector a = null;
                            if (alpha.InnerProduct(b_prime) < ValueOf(b_prime, m_lVectors, out a))
                            {
                                beliefStatesToKeep.Add(b_prime);
                            }
                        }
                        beliefStatesLeftToImprove = beliefStatesToKeep;
                        //In the case alpha is dominating, we add alpha to V'
                        alphaToAdd = alpha;
                    }

                    else
                    { //alpha does not improve,we remove sampledBS from the set
                        beliefStatesLeftToImprove.Remove(sampledBS);
                        alphaToAdd = prevBestAlphaVector;
                    }

                    if (!vTag.Contains(alphaToAdd))
                    {
                        vTag.Add(alphaToAdd);
                    }
                }

                /**
                 * //We estimate how the alpha vectors set was changed
                 * double diff = estimateDiff(m_lVectors, vTag, beliefStates);
                 * Console.WriteLine(diff);
                 *
                 * //The difference between the current set, and the previous is less than epsilon
                 * //We finish the update algorithm
                 * //if (diff < EPSILON)
                 * //   break;
                 **/

                Console.WriteLine("Iterations left {0}", iterationsLeft);
                m_lVectors = vTag;
                iterationsLeft--;
            }
        }