private void pruneAlphaVector(List <BeliefState> bsSet) { List <BeliefState> copyBset = new List <BeliefState>(bsSet); List <AlphaVector> temp_lVectors = new List <AlphaVector>(); while (copyBset.Any()) { BeliefState _bs = copyBset.ElementAt(0); AlphaVector _alpha = backup(_bs); double _reward = _alpha.InnerProduct(_bs); if (this.m_valueFunction[_bs].InnerProduct(_bs) < _reward) { this.m_valueFunction[_bs] = _alpha; temp_lVectors.Add(_alpha); copyBset.Remove(_bs); List <BeliefState> copyBset_inner = new List <BeliefState>(copyBset); foreach (BeliefState temp_bs in copyBset_inner) { double __reward = _alpha.InnerProduct(temp_bs); double curr_val = this.m_valueFunction[temp_bs].InnerProduct(temp_bs); if (curr_val < __reward) { this.m_valueFunction[temp_bs] = _alpha; copyBset.Remove(temp_bs); } } } else { copyBset.Remove(_bs); double max_reward = double.NegativeInfinity; AlphaVector max_alpha = null; foreach (AlphaVector alpha in m_lVectors) { double reward = alpha.InnerProduct(_bs); if (reward > max_reward) { max_reward = reward; max_alpha = alpha; } } if (!temp_lVectors.Contains(max_alpha)) { temp_lVectors.Add(max_alpha); } this.m_valueFunction[_bs] = max_alpha; } } //this.m_lVectors = new List<AlphaVector>(); this.m_lVectors = temp_lVectors; //foreach (AlphaVector updated_alpha in m_valueFunction.Values) //{ // if(!this.m_lVectors.Contains(updated_alpha)) // this.m_lVectors.Add(updated_alpha); //} }
//generate a new alpha vector from a belief state and an action private AlphaVector G(BeliefState bs, Action a) { AlphaVector avSum = new AlphaVector(a); AlphaVector avGMax = null; double dValue = 0.0, dMaxValue = double.NegativeInfinity; foreach (Observation o in m_dDomain.Observations) { dMaxValue = double.NegativeInfinity; avGMax = null; foreach (AlphaVector avCurrent in m_lVectors) { AlphaVector avG = G(a, o, avCurrent); dValue = avG.InnerProduct(bs); if (dValue > dMaxValue) { dMaxValue = dValue; avGMax = avG; } } avSum += avGMax; } avSum *= m_dDomain.DiscountFactor; AlphaVector avResult = new AlphaVector(a); foreach (State s in m_dDomain.States) { avResult[s] = avSum[s] + s.Reward(a); } return(avResult); }
/** * Computes the best alpha vector with action on root for belief state bs * (alpha_action_bs) */ private AlphaVector computeBestAlpha(Action action, BeliefState bs) { //initializing an alpha vector with action on its root AlphaVector discountedRewardVector = new AlphaVector(action); // We loop over all observations and alpha vectors for each observation obs, // we find the alpha vector maximizing dot(bs,alpha_action_obs) - we will use // these vectors (their sum) in order to calculate alpha_a_b foreach (Observation obs in m_dDomain.Observations) { //We compute alpha_a_o for every observation o, according to the equation in the slides AlphaVector cur_alpha_ao = null; AlphaVector best_alpha_ao = new AlphaVector(); double best_val = double.NegativeInfinity; double cur_val = 0; //Looping over all alpha vectors, finding the best alpha that maximizes dot(bs,alpha_action_obs) foreach (AlphaVector av in m_lVectors) { //We compute av_action_obs for every av cur_alpha_ao = computeAlphaAO(av, action, obs); // dot product between av_action_obs abd the belief state bs cur_val = cur_alpha_ao.InnerProduct(bs); //We take the vector maximizing the dot product if (cur_val > best_val) { best_alpha_ao = cur_alpha_ao; best_val = cur_val; } } //We compute the sum of these vectors, (SUM(arg max(dot(bs,alpha_bs_a)))) discountedRewardVector += best_alpha_ao; } // Multiplying it with the discount factor discountedRewardVector = discountedRewardVector * m_dDomain.DiscountFactor; AlphaVector rA; //action's rewards vector, We add it to the sum, and return the result if (rewardsVectors.ContainsKey(action)) { rA = rewardsVectors[action]; } else { rA = new AlphaVector(); foreach (State s in m_dDomain.States) { rA[s] = s.Reward(action); } rewardsVectors[action] = rA; } return(discountedRewardVector + rA); }
private AlphaVector ArgMax(List <AlphaVector> m_lVectors, BeliefState b) { AlphaVector maxAlphaVector = new AlphaVector(); foreach (AlphaVector aVector in m_lVectors) { if (aVector.InnerProduct(b) > maxAlphaVector.InnerProduct(b)) { maxAlphaVector = aVector; } } return(maxAlphaVector); }
public void PointBasedVI(int cBeliefs, int cMaxIterations) { Random rand = new Random(); List <BeliefState> B = GenerateB(cBeliefs, rand); InitV(); m_dGCache = new Dictionary <AlphaVector, Dictionary <Action, Dictionary <Observation, AlphaVector> > >(); List <BeliefState> BTag; while (cMaxIterations > 0) { BTag = CopyB(B); List <AlphaVector> VTag = new List <AlphaVector>(); while (BTag.Count != 0) { //choose arbitrary point in BTag to improve BeliefState bCurr = RandomBeliefState(BTag, rand); AlphaVector newAV = Backup(bCurr); AlphaVector avBest = new AlphaVector(); double currValue = ValueOf(bCurr, m_lVectors, out avBest); double AlphaDotb = newAV.InnerProduct(bCurr); if (AlphaDotb > currValue) { //remove from B points whose value was improved by new newAV BTag.Where(b => newAV.InnerProduct(b) >= ValueOf(b, m_lVectors, out AlphaVector avTmp)).ToList(); avBest = newAV; } else { BTag.Remove(bCurr); avBest = ArgMax(m_lVectors, b: bCurr); } VTag.Add(avBest); } m_lVectors = VTag; cMaxIterations--; } }
private AlphaVector backup(BeliefState bs) { AlphaVector avBest = null, avCurrent = null; double dMaxValue = double.NegativeInfinity, dValue = 0.0; //your code here foreach (var action in m_dDomain.Actions) { avCurrent = G(bs, action); dValue = avCurrent.InnerProduct(bs); if (dValue > dMaxValue) { dMaxValue = dValue; avBest = avCurrent; } } return(avBest); }
/** * The Backup operation, receives a belief state bs, and returns * Backup(m_lVectors,bs) * (The best alpha vector alpha_a_bs maximizing * dot(b,alpha_a_bs)) * */ private AlphaVector backup(BeliefState bs) { AlphaVector avBest = new AlphaVector(), avCurrent = new AlphaVector(); double dMaxValue = double.NegativeInfinity, dValue = 0.0; //We loop over all actions in domain, and for every action a //we take the best alpha vector with a on its root foreach (Action a in m_dDomain.Actions) { avCurrent = computeBestAlpha(a, bs); // alpha_a_b dValue = avCurrent.InnerProduct(bs); // dot product with bs if (dValue > dMaxValue) { // taking the vector alpha_a_b that maximizes the dot product avBest = avCurrent; dMaxValue = dValue; } } return(avBest); //returns the best alpha_a_bs }
//returns the best alphaVector corresponds to a certain belief state private AlphaVector Backup(BeliefState bs) { AlphaVector avBest = null; //AlphaVector avCurrent = null; double dMaxValue = double.NegativeInfinity, dValue = 0.0; foreach (Action aCurr in m_dDomain.Actions) { foreach (AlphaVector avCurr in m_lVectors) { AlphaVector avBA = G(bs, aCurr); dValue = avBA.InnerProduct(bs); if (dMaxValue < dValue) { dMaxValue = dValue; avBest = avCurr; } } } return(avBest); }
/** * Performs the Value Iteration algorithm using the Perseus update algorithm, * generates a set containing cBelief belief states, and performs value iterations for * maximum cMaxIterations * */ public void PointBasedVI(int cBeliefs, int cMaxIterations) { //Generates an initial set containing cBelief belief states List <BeliefState> beliefStates = CollectBeliefs(cBeliefs); List <AlphaVector> vTag; //V' //const double EPSILON = 0.1; //The convergence boundry int iterationsLeft = cMaxIterations; while (iterationsLeft > 0) { vTag = new List <AlphaVector>(); List <BeliefState> beliefStatesLeftToImprove = new List <BeliefState>(beliefStates); // B' while (beliefStatesLeftToImprove.Count() > 0) { //While there are belief states to improve //Console.WriteLine("Improvable belief states left"); //Console.WriteLine(beliefStatesLeftToImprove.Count()); //selecting a random index of a belief state to improve int ri = RandomGenerator.Next(beliefStatesLeftToImprove.Count()); //We want to iterate over the belief states set and recieve the ri'th item List <BeliefState> .Enumerator e = beliefStatesLeftToImprove.GetEnumerator(); for (int i = 0; i < ri + 1; i++) //iterating until the belief state at index ri { e.MoveNext(); } BeliefState sampledBS = e.Current;//samplesBS is a randomly chosen belief state to for improvement //Console.WriteLine("Iterations left: " + iterationsLeft); //Console.WriteLine("Improvable bs left: " + beliefStatesLeftToImprove.Count()); //We calculate the backup of samplesBS AlphaVector alpha = backup(sampledBS); AlphaVector alphaToAdd;//It will contain the alpha vector to add to V' AlphaVector prevBestAlphaVector = null; //calculating the value of sampledBS (V(samplesBS)) which is the best dot product alpha*b double prevValue = ValueOf(sampledBS, m_lVectors, out prevBestAlphaVector); if (alpha.InnerProduct(sampledBS) >= prevValue) // alpha is dominating, remove all belief states that are improved by it { //Console.WriteLine("Found an improving vec"); List <BeliefState> beliefStatesToKeep = new List <BeliefState>(); foreach (BeliefState b_prime in beliefStatesLeftToImprove) { AlphaVector a = null; if (alpha.InnerProduct(b_prime) < ValueOf(b_prime, m_lVectors, out a)) { beliefStatesToKeep.Add(b_prime); } } beliefStatesLeftToImprove = beliefStatesToKeep; //In the case alpha is dominating, we add alpha to V' alphaToAdd = alpha; } else { //alpha does not improve,we remove sampledBS from the set beliefStatesLeftToImprove.Remove(sampledBS); alphaToAdd = prevBestAlphaVector; } if (!vTag.Contains(alphaToAdd)) { vTag.Add(alphaToAdd); } } /** * //We estimate how the alpha vectors set was changed * double diff = estimateDiff(m_lVectors, vTag, beliefStates); * Console.WriteLine(diff); * * //The difference between the current set, and the previous is less than epsilon * //We finish the update algorithm * //if (diff < EPSILON) * // break; **/ Console.WriteLine("Iterations left {0}", iterationsLeft); m_lVectors = vTag; iterationsLeft--; } }