/// <summary> /// Returns a flat collection of states/actions and their transition states. /// </summary> /// <param name="states">State matrix.</param> /// <param name="actions">Action label vector.</param> /// <param name="statesP">Transition states matrix.</param> /// <param name="properties">(Optional) Feature summary.</param> /// <param name="discretizer">Disretization function to apply for reducing states.</param> /// <returns></returns> public static Tuple <IEnumerable <IState>, IEnumerable <IAction>, IEnumerable <IState> > GetStates(Matrix states, Vector actions, Matrix statesP, Math.Summary properties, IDiscretizer discretizer) { Math.Summary summary = (properties != null ? properties : Math.Summary.Summarize(states)); var slist = new IState[states.Rows]; var alist = new IAction[actions.Length]; var splist = new IState[statesP.Rows]; for (int i = 0; i < states.Rows; i++) { slist[i] = MDPConverter.GetState(states[i], summary, discretizer); splist[i] = MDPConverter.GetState(statesP[i], summary, discretizer); alist[i] = MDPConverter.GetAction(actions[i], slist[i].Id, splist[i].Id); } return(new Tuple <IEnumerable <IState>, IEnumerable <IAction>, IEnumerable <IState> >(slist, alist, splist)); }
/// <summary> /// Returns a graph of MDP States from the States matrices and Action label vector. /// </summary> /// <param name="states">State matrix.</param> /// <param name="actions">Action label vector.</param> /// <param name="statesP">Transition states matrix.</param> /// <param name="reward">Reward value vector.</param> /// <param name="properties">Feature properties from the original set.</param> /// <param name="discretizer">Discretization function for generating unique state identifiers.</param> /// <returns>IEnumerable<IMDPState></returns> public static IEnumerable <MDPState> GetStates(Matrix states, Vector actions, Matrix statesP, Vector reward, Math.Summary properties, IDiscretizer discretizer) { Math.Summary summary = (properties != null ? properties : Math.Summary.Summarize(states)); discretizer.Initialize(states, summary); var sdist = new Dictionary <double, MDPState>(); var adist = new Dictionary <string, double>(); var results = new Dictionary <double, MDPState>(); for (int i = 0; i < states.Rows; i++) { double sid = discretizer.Discretize(states[i], summary); if (!sdist.ContainsKey(sid)) { sdist.Add(sid, MDPConverter.GetState(states[i], summary, discretizer)); results.Add(sid, sdist[sid]); } double tsid = discretizer.Discretize(statesP[i], summary); MDPState tstate = (sdist.ContainsKey(tsid) ? sdist[tsid] : MDPConverter.GetState(statesP[i], summary, discretizer)); if (!sdist.ContainsKey(tsid)) { sdist.Add(tsid, tstate); } string key = GetActionKey((int)sid, (int)tsid); if (!adist.ContainsKey(key)) { adist.Add(key, 1); } else { adist[key]++; } sdist[sid].Successors.Add(new MDPSuccessorState(MDPConverter.GetAction(actions[i], (int)sid, (int)tsid), 0, tstate, reward[i])); if (results.ContainsKey(tsid)) { results.Remove(tsid); } } foreach (var state in sdist.Values) { double sum = state.Successors.Sum(s => adist[GetActionKey(state.Id, s.State.Id)]); foreach (var successor in state.Successors) { var key = GetActionKey(state.Id, successor.State.Id); ((Action)successor.Action).Probability = adist[key] / sum; } } // return starting states return(results.Values); }