//problem: at the end of getAction, avBest is still null, maybe m_lVectors is null? public override Action GetAction(BeliefState bs) { AlphaVector avBest = null; ValueOf(bs, m_lVectors, out avBest); return(avBest.Action); }
private AlphaVector G(Action a, Observation o, AlphaVector av) { if (!m_dGCache.ContainsKey(av)) { m_dGCache[av] = new Dictionary <Action, Dictionary <Observation, AlphaVector> >(); } if (!m_dGCache[av].ContainsKey(a)) { m_dGCache[av][a] = new Dictionary <Observation, AlphaVector>(); } if (m_dGCache[av][a].ContainsKey(o)) { return(m_dGCache[av][a][o]); } AlphaVector avNew = new AlphaVector(a); foreach (State s in m_dDomain.States) { double dSum = 0.0; foreach (State sTag in m_dDomain.States) { dSum += sTag.ObservationProbability(a, o) * s.TransitionProbability(a, sTag) * av[sTag]; } avNew[s] = dSum; } m_dGCache[av][a][o] = avNew; return(avNew); }
/** * Receives alpha vector alpha, action a, and observation o. * calculates and returns alpha_a_o * (alpha_a_o[s]=SUM(alpha[s']*O(a,s',o)*T(s,a,s'))) */ private AlphaVector computeAlphaAO(AlphaVector alpha, Action a, Observation o) { Tuple <AlphaVector, Action, Observation> key = new Tuple <AlphaVector, Action, Observation>(alpha, a, o); if (m_dAlphaA_O.ContainsKey(key)) { return(m_dAlphaA_O[key]); } else { AlphaVector res = new AlphaVector(a); //We loop over all states s, for each s we compute alpha_a_o[s] foreach (State s in m_dDomain.States) { double accumulated_sum = 0; res[s] = 0; //Looping only on successors of s because only for them T(s,a,succ)>0 foreach (State succ in s.Successors(a)) { accumulated_sum += (alpha[succ] * succ.ObservationProbability(a, o) * s.TransitionProbability(a, succ)); } res[s] = accumulated_sum; } m_dAlphaA_O.Add(key, res); return(res); } }
//generate a new alpha vector from a belief state and an action private AlphaVector G(BeliefState bs, Action a) { AlphaVector avSum = new AlphaVector(a); AlphaVector avGMax = null; double dValue = 0.0, dMaxValue = double.NegativeInfinity; foreach (Observation o in m_dDomain.Observations) { dMaxValue = double.NegativeInfinity; avGMax = null; foreach (AlphaVector avCurrent in m_lVectors) { AlphaVector avG = G(a, o, avCurrent); dValue = avG.InnerProduct(bs); if (dValue > dMaxValue) { dMaxValue = dValue; avGMax = avG; } } avSum += avGMax; } avSum *= m_dDomain.DiscountFactor; AlphaVector avResult = new AlphaVector(a); foreach (State s in m_dDomain.States) { avResult[s] = avSum[s] + s.Reward(a); } return(avResult); }
private void pruneAlphaVector(List <BeliefState> bsSet) { List <BeliefState> copyBset = new List <BeliefState>(bsSet); List <AlphaVector> temp_lVectors = new List <AlphaVector>(); while (copyBset.Any()) { BeliefState _bs = copyBset.ElementAt(0); AlphaVector _alpha = backup(_bs); double _reward = _alpha.InnerProduct(_bs); if (this.m_valueFunction[_bs].InnerProduct(_bs) < _reward) { this.m_valueFunction[_bs] = _alpha; temp_lVectors.Add(_alpha); copyBset.Remove(_bs); List <BeliefState> copyBset_inner = new List <BeliefState>(copyBset); foreach (BeliefState temp_bs in copyBset_inner) { double __reward = _alpha.InnerProduct(temp_bs); double curr_val = this.m_valueFunction[temp_bs].InnerProduct(temp_bs); if (curr_val < __reward) { this.m_valueFunction[temp_bs] = _alpha; copyBset.Remove(temp_bs); } } } else { copyBset.Remove(_bs); double max_reward = double.NegativeInfinity; AlphaVector max_alpha = null; foreach (AlphaVector alpha in m_lVectors) { double reward = alpha.InnerProduct(_bs); if (reward > max_reward) { max_reward = reward; max_alpha = alpha; } } if (!temp_lVectors.Contains(max_alpha)) { temp_lVectors.Add(max_alpha); } this.m_valueFunction[_bs] = max_alpha; } } //this.m_lVectors = new List<AlphaVector>(); this.m_lVectors = temp_lVectors; //foreach (AlphaVector updated_alpha in m_valueFunction.Values) //{ // if(!this.m_lVectors.Contains(updated_alpha)) // this.m_lVectors.Add(updated_alpha); //} }
public static AlphaVector operator *(AlphaVector av, double dScalar) { AlphaVector avNew = new AlphaVector(av.Action); foreach (KeyValuePair <State, double> p in av.Values) { avNew.m_dValues[p.Key] = p.Value * dScalar; } return(avNew); }
private void InitV() { m_lVectors = new List <AlphaVector>(); foreach (Action a in m_dDomain.Actions) { AlphaVector newAV = new AlphaVector(a); newAV.InitAlphaVector(m_dDomain.States); m_lVectors.Add(newAV); } }
/** * Computes the best alpha vector with action on root for belief state bs * (alpha_action_bs) */ private AlphaVector computeBestAlpha(Action action, BeliefState bs) { //initializing an alpha vector with action on its root AlphaVector discountedRewardVector = new AlphaVector(action); // We loop over all observations and alpha vectors for each observation obs, // we find the alpha vector maximizing dot(bs,alpha_action_obs) - we will use // these vectors (their sum) in order to calculate alpha_a_b foreach (Observation obs in m_dDomain.Observations) { //We compute alpha_a_o for every observation o, according to the equation in the slides AlphaVector cur_alpha_ao = null; AlphaVector best_alpha_ao = new AlphaVector(); double best_val = double.NegativeInfinity; double cur_val = 0; //Looping over all alpha vectors, finding the best alpha that maximizes dot(bs,alpha_action_obs) foreach (AlphaVector av in m_lVectors) { //We compute av_action_obs for every av cur_alpha_ao = computeAlphaAO(av, action, obs); // dot product between av_action_obs abd the belief state bs cur_val = cur_alpha_ao.InnerProduct(bs); //We take the vector maximizing the dot product if (cur_val > best_val) { best_alpha_ao = cur_alpha_ao; best_val = cur_val; } } //We compute the sum of these vectors, (SUM(arg max(dot(bs,alpha_bs_a)))) discountedRewardVector += best_alpha_ao; } // Multiplying it with the discount factor discountedRewardVector = discountedRewardVector * m_dDomain.DiscountFactor; AlphaVector rA; //action's rewards vector, We add it to the sum, and return the result if (rewardsVectors.ContainsKey(action)) { rA = rewardsVectors[action]; } else { rA = new AlphaVector(); foreach (State s in m_dDomain.States) { rA[s] = s.Reward(action); } rewardsVectors[action] = rA; } return(discountedRewardVector + rA); }
private AlphaVector ArgMax(List <AlphaVector> m_lVectors, BeliefState b) { AlphaVector maxAlphaVector = new AlphaVector(); foreach (AlphaVector aVector in m_lVectors) { if (aVector.InnerProduct(b) > maxAlphaVector.InnerProduct(b)) { maxAlphaVector = aVector; } } return(maxAlphaVector); }
public static AlphaVector operator +(AlphaVector av1, AlphaVector av2) { AlphaVector avNew = new AlphaVector(av1); foreach (KeyValuePair <State, double> p in av2.Values) { if (!avNew.m_dValues.ContainsKey(p.Key)) { avNew.m_dValues[p.Key] = 0.0; } avNew.m_dValues[p.Key] += p.Value; } return(avNew); }
private double ValueOf(BeliefState bs, List <AlphaVector> lVectors, out AlphaVector avBest) { double dValue = 0.0, dMaxValue = double.NegativeInfinity; avBest = null; foreach (AlphaVector av in lVectors) { dValue = av.InnerProduct(bs); if (dValue > dMaxValue) { dMaxValue = dValue; avBest = av; } } return(dMaxValue); }
/** * Calculates the value of a belief state bs w.r.t a list to alpha vectors. * i.e finds the alpha vector alpha that maximizes dot(bs,alpha), returns the value of * this dot product, and return the vector as avBest * * */ private double ValueOf(BeliefState bs, List <AlphaVector> lVectors, out AlphaVector avBest) { double dValue = 0.0, dMaxValue = double.NegativeInfinity; avBest = null; //We loop over all alpha vectors foreach (AlphaVector av in lVectors) { dValue = av.InnerProduct(bs); if (dValue > dMaxValue) //taking the maximum dot product { dMaxValue = dValue; avBest = av; } } return(dMaxValue); }
private AlphaVector backup(BeliefState bs) { AlphaVector avBest = null, avCurrent = null; double dMaxValue = double.NegativeInfinity, dValue = 0.0; //your code here foreach (var action in m_dDomain.Actions) { avCurrent = G(bs, action); dValue = avCurrent.InnerProduct(bs); if (dValue > dMaxValue) { dMaxValue = dValue; avBest = avCurrent; } } return(avBest); }
/** * The Backup operation, receives a belief state bs, and returns * Backup(m_lVectors,bs) * (The best alpha vector alpha_a_bs maximizing * dot(b,alpha_a_bs)) * */ private AlphaVector backup(BeliefState bs) { AlphaVector avBest = new AlphaVector(), avCurrent = new AlphaVector(); double dMaxValue = double.NegativeInfinity, dValue = 0.0; //We loop over all actions in domain, and for every action a //we take the best alpha vector with a on its root foreach (Action a in m_dDomain.Actions) { avCurrent = computeBestAlpha(a, bs); // alpha_a_b dValue = avCurrent.InnerProduct(bs); // dot product with bs if (dValue > dMaxValue) { // taking the vector alpha_a_b that maximizes the dot product avBest = avCurrent; dMaxValue = dValue; } } return(avBest); //returns the best alpha_a_bs }
//returns the best alphaVector corresponds to a certain belief state private AlphaVector Backup(BeliefState bs) { AlphaVector avBest = null; //AlphaVector avCurrent = null; double dMaxValue = double.NegativeInfinity, dValue = 0.0; foreach (Action aCurr in m_dDomain.Actions) { foreach (AlphaVector avCurr in m_lVectors) { AlphaVector avBA = G(bs, aCurr); dValue = avBA.InnerProduct(bs); if (dMaxValue < dValue) { dMaxValue = dValue; avBest = avCurr; } } } return(avBest); }
public void PointBasedVI(int cBeliefs, int cMaxIterations) { Random rand = new Random(); List <BeliefState> B = GenerateB(cBeliefs, rand); InitV(); m_dGCache = new Dictionary <AlphaVector, Dictionary <Action, Dictionary <Observation, AlphaVector> > >(); List <BeliefState> BTag; while (cMaxIterations > 0) { BTag = CopyB(B); List <AlphaVector> VTag = new List <AlphaVector>(); while (BTag.Count != 0) { //choose arbitrary point in BTag to improve BeliefState bCurr = RandomBeliefState(BTag, rand); AlphaVector newAV = Backup(bCurr); AlphaVector avBest = new AlphaVector(); double currValue = ValueOf(bCurr, m_lVectors, out avBest); double AlphaDotb = newAV.InnerProduct(bCurr); if (AlphaDotb > currValue) { //remove from B points whose value was improved by new newAV BTag.Where(b => newAV.InnerProduct(b) >= ValueOf(b, m_lVectors, out AlphaVector avTmp)).ToList(); avBest = newAV; } else { BTag.Remove(bCurr); avBest = ArgMax(m_lVectors, b: bCurr); } VTag.Add(avBest); } m_lVectors = VTag; cMaxIterations--; } }
public override bool Equals(object obj) { if (obj is AlphaVector) { AlphaVector av = (AlphaVector)obj; foreach (KeyValuePair <State, double> p in m_dValues) { if (Math.Abs(p.Value - av[p.Key]) > 0.001) { return(false); } } foreach (KeyValuePair <State, double> p in av.m_dValues) { if (Math.Abs(p.Value - this[p.Key]) > 0.001) { return(false); } } return(true); } return(false); }
/** * Initializes an alpha vector with some best practice presented in the provided article */ private AlphaVector createInitialAlphaVector() { AlphaVector V0 = new AlphaVector(); double minReward = Double.PositiveInfinity; foreach (State s in m_dDomain.States) { foreach (Action a in m_dDomain.Actions) { if (minReward > s.Reward(a)) { minReward = s.Reward(a); } } } double defaultVal = (1 / (1 - m_dDomain.DiscountFactor)) * minReward; //best practice foreach (State s in m_dDomain.States) { V0[s] = defaultVal; } return(V0); }
public void PointBasedVI(int cBeliefs, int cMaxIterations) { // your code here List <BeliefState> setBeliefStates = CollectBeliefs(cBeliefs); this.m_lVectors = new List <AlphaVector>(this.m_dDomain.States.Count()); foreach (BeliefState bs in setBeliefStates) { m_lVectors.Add(new AlphaVector()); } List <AlphaVector> _m_lVectors = new List <AlphaVector>(); foreach (BeliefState bs in setBeliefStates) { AlphaVector curr = backup(bs); _m_lVectors.Add(curr); } this.m_lVectors = new List <AlphaVector>(_m_lVectors); initialValueFunction(setBeliefStates); for (int i = 0; i < cMaxIterations; i++) { pruneAlphaVector(setBeliefStates); } }
public AlphaVector(AlphaVector av) { Action = av.Action; m_dValues = new Dictionary <State, double>(av.m_dValues); }
/** * Performs the Value Iteration algorithm using the Perseus update algorithm, * generates a set containing cBelief belief states, and performs value iterations for * maximum cMaxIterations * */ public void PointBasedVI(int cBeliefs, int cMaxIterations) { //Generates an initial set containing cBelief belief states List <BeliefState> beliefStates = CollectBeliefs(cBeliefs); List <AlphaVector> vTag; //V' //const double EPSILON = 0.1; //The convergence boundry int iterationsLeft = cMaxIterations; while (iterationsLeft > 0) { vTag = new List <AlphaVector>(); List <BeliefState> beliefStatesLeftToImprove = new List <BeliefState>(beliefStates); // B' while (beliefStatesLeftToImprove.Count() > 0) { //While there are belief states to improve //Console.WriteLine("Improvable belief states left"); //Console.WriteLine(beliefStatesLeftToImprove.Count()); //selecting a random index of a belief state to improve int ri = RandomGenerator.Next(beliefStatesLeftToImprove.Count()); //We want to iterate over the belief states set and recieve the ri'th item List <BeliefState> .Enumerator e = beliefStatesLeftToImprove.GetEnumerator(); for (int i = 0; i < ri + 1; i++) //iterating until the belief state at index ri { e.MoveNext(); } BeliefState sampledBS = e.Current;//samplesBS is a randomly chosen belief state to for improvement //Console.WriteLine("Iterations left: " + iterationsLeft); //Console.WriteLine("Improvable bs left: " + beliefStatesLeftToImprove.Count()); //We calculate the backup of samplesBS AlphaVector alpha = backup(sampledBS); AlphaVector alphaToAdd;//It will contain the alpha vector to add to V' AlphaVector prevBestAlphaVector = null; //calculating the value of sampledBS (V(samplesBS)) which is the best dot product alpha*b double prevValue = ValueOf(sampledBS, m_lVectors, out prevBestAlphaVector); if (alpha.InnerProduct(sampledBS) >= prevValue) // alpha is dominating, remove all belief states that are improved by it { //Console.WriteLine("Found an improving vec"); List <BeliefState> beliefStatesToKeep = new List <BeliefState>(); foreach (BeliefState b_prime in beliefStatesLeftToImprove) { AlphaVector a = null; if (alpha.InnerProduct(b_prime) < ValueOf(b_prime, m_lVectors, out a)) { beliefStatesToKeep.Add(b_prime); } } beliefStatesLeftToImprove = beliefStatesToKeep; //In the case alpha is dominating, we add alpha to V' alphaToAdd = alpha; } else { //alpha does not improve,we remove sampledBS from the set beliefStatesLeftToImprove.Remove(sampledBS); alphaToAdd = prevBestAlphaVector; } if (!vTag.Contains(alphaToAdd)) { vTag.Add(alphaToAdd); } } /** * //We estimate how the alpha vectors set was changed * double diff = estimateDiff(m_lVectors, vTag, beliefStates); * Console.WriteLine(diff); * * //The difference between the current set, and the previous is less than epsilon * //We finish the update algorithm * //if (diff < EPSILON) * // break; **/ Console.WriteLine("Iterations left {0}", iterationsLeft); m_lVectors = vTag; iterationsLeft--; } }