public BeliefState Next(Action a, Observation o) { BeliefState bsNext = new BeliefState(m_dDomain); //your code here double normalizationFactor = 0.0; foreach (State stateTag in m_dDomain.States) { double updateProbabilityForState = 0.0; foreach (State state in m_dDomain.States) //or in States???????????? { if (state.Successors(a).Contains(stateTag)) { double transitionProbability = state.TransitionProbability(a: a, sTag: stateTag); double beliefOfstate = this.m_dBeliefs[state]; updateProbabilityForState += transitionProbability * beliefOfstate; } } updateProbabilityForState *= stateTag.ObservationProbability(a: a, o: o); bsNext.AddBelief(stateTag, updateProbabilityForState); normalizationFactor += updateProbabilityForState; } for (int i = 0; i < bsNext.m_dBeliefs.Keys.Count; i++) { State stateToNormalize = bsNext.m_dBeliefs.Keys.ElementAt(i); bsNext.m_dBeliefs[stateToNormalize] /= normalizationFactor; } Debug.Assert(bsNext.Validate()); return(bsNext); }
// t(b,a,b') = pr(b'| a,b) = (sum over all o in omega) pr(b'|a,o,b) * pr(o|a,b). lecture 13, page 3 public BeliefState Next(Action a, Observation o) { BeliefState bsNext = new BeliefState(m_dDomain); //double sumOfBTag = 0; foreach (State sTag in m_dDomain.States) { double stateProbabilityInBtag; stateProbabilityInBtag = sTag.ObservationProbability(a, o) * transitionProbabilityForEachState(sTag, a) / probabilityOfObservationGivenAB(a, o); bsNext.AddBelief(sTag, stateProbabilityInBtag); } return(bsNext); //foreach (State sTag in m_dDomain.States) //{ // //pr(b'|a,o,b) // double currBTag = CalculateBTagForEachState(sTag, a, o); // sumOfBTag += currBTag; // bsNext.AddBelief(sTag, currBTag); //} //foreach (State sTag in m_dDomain.States) //{ // bsNext.m_dBeliefs[sTag] = bsNext.m_dBeliefs[sTag]/sumOfBTag; //} //Debug.Assert(bsNext.Validate()); //return bsNext; }
public BeliefState Next(Action a, Observation o) { BeliefState bsNext = new BeliefState(m_dDomain); //Represents the new belief state b_o_s double normalizing_factor = 0; //We will divide our resulted belief state by this factor, instead of calculating Pr(o|a,b) HashSet <State> reachableStates = new HashSet <State>(); // The neighboring states are the union of all neighboring states // of states with positive probability on current belief state. // When we calculate the new distribution over states, we just need // to look on S' such that Tr(S,a,S')>0 foreach (KeyValuePair <State, double> entry in m_dBeliefs) { if (entry.Value > 0) { foreach (State s in entry.Key.Successors(a)) { reachableStates.Add(s); // We optimize the calculation by adding the weighted transition value as we build the reachableStates Set // Instead of first calculating the set and only then finding all its ancenstors and perform the calculation bsNext.AddBelief(s, entry.Value * entry.Key.TransitionProbability(a, s)); } } } foreach (State s_prime in reachableStates) { double trans_prob = 0; double obs_prob = s_prime.ObservationProbability(a, o); // We Calculate O(o,s',a)*(b\dot\Tr(s',a)) trans_prob = bsNext[s_prime]; // for each state s_prime trans_prob equals O(s_prime,a,o)*dot(b,Tr(s,a,s_prime)) trans_prob *= obs_prob; //The normalizing factor is sum of all values, we divide the vector by this number to make it a distribution normalizing_factor += trans_prob; // Updating the new belief state bsNext[s_prime] = trans_prob; } foreach (State s in reachableStates) { bsNext[s] /= normalizing_factor; } Debug.Assert(bsNext.Validate()); return(bsNext); }