/// <summary> /// Returns a graph of MDP States from the States matrices and Action label vector. /// </summary> /// <param name="states">State matrix.</param> /// <param name="actions">Action label vector.</param> /// <param name="statesP">Transition states matrix.</param> /// <param name="reward">Reward value vector.</param> /// <param name="properties">Feature properties from the original set.</param> /// <param name="discretizer">Discretization function for generating unique state identifiers.</param> /// <returns>IEnumerable<IMDPState></returns> public static IEnumerable<MDPState> GetStates(Matrix states, Vector actions, Matrix statesP, Vector reward, Math.Summary properties, IDiscretizer discretizer) { Math.Summary summary = (properties != null ? properties : Math.Summary.Summarize(states)); discretizer.Initialize(states, summary); var sdist = new Dictionary<double, MDPState>(); var adist = new Dictionary<string, double>(); var results = new Dictionary<double, MDPState>(); for (int i = 0; i < states.Rows; i++) { double sid = discretizer.Discretize(states[i], summary); if (!sdist.ContainsKey(sid)) { sdist.Add(sid, MDPConverter.GetState(states[i], summary, discretizer)); results.Add(sid, sdist[sid]); } double tsid = discretizer.Discretize(statesP[i], summary); MDPState tstate = (sdist.ContainsKey(tsid) ? sdist[tsid] : MDPConverter.GetState(statesP[i], summary, discretizer)); if (!sdist.ContainsKey(tsid)) sdist.Add(tsid, tstate); string key = GetActionKey((int)sid, (int)tsid); if (!adist.ContainsKey(key)) adist.Add(key, 1); else { adist[key]++; } sdist[sid].Successors.Add(new MDPSuccessorState(MDPConverter.GetAction(actions[i], (int) sid, (int) tsid), 0, tstate, reward[i])); if (results.ContainsKey(tsid)) results.Remove(tsid); } foreach (var state in sdist.Values) { double sum = state.Successors.Sum(s => adist[GetActionKey(state.Id, s.State.Id)]); foreach (var successor in state.Successors) { var key = GetActionKey(state.Id, successor.State.Id); ((AI.Action) successor.Action).Probability = adist[key] / sum; } } // return starting states return results.Values; }
/// <summary> /// Converts the state vector into an MDP state. /// </summary> /// <param name="state">State vector.</param> /// <param name="summary">Feature properties from the original set.</param> /// <param name="discretizer">Discretization function for generating unique state identifiers.</param> /// <returns>MDPState.</returns> public static MDPState GetState(Vector state, Summary summary, IDiscretizer discretizer) { return(new MDPState((int)discretizer.Discretize(state, summary)) { Features = state }); }
/// <summary> /// Converts the state vector into an MDP state. /// </summary> /// <param name="state">State vector.</param> /// <param name="summary">Feature properties from the original set.</param> /// <param name="discretizer">Discretization function for generating unique state identifiers.</param> /// <returns>MDPState.</returns> public static MDPState GetState(Vector state, Summary summary, IDiscretizer discretizer) { return new MDPState((int)discretizer.Discretize(state, summary)) { Features = state }; }
/// <summary> /// Returns a graph of MDP States from the States matrices and Action label vector. /// </summary> /// <param name="states">State matrix.</param> /// <param name="actions">Action label vector.</param> /// <param name="statesP">Transition states matrix.</param> /// <param name="reward">Reward value vector.</param> /// <param name="properties">Feature properties from the original set.</param> /// <param name="discretizer">Discretization function for generating unique state identifiers.</param> /// <returns>IEnumerable<IMDPState></returns> public static IEnumerable <MDPState> GetStates( Matrix states, Vector actions, Matrix statesP, Vector reward, Summary properties, IDiscretizer discretizer) { var summary = properties ?? Summary.Summarize(states); discretizer.Initialize(states, summary); var sdist = new Dictionary <double, MDPState>(); var adist = new Dictionary <string, double>(); var results = new Dictionary <double, MDPState>(); for (var i = 0; i < states.Rows; i++) { var sid = discretizer.Discretize(states[i], summary); if (!sdist.ContainsKey(sid)) { sdist.Add(sid, GetState(states[i], summary, discretizer)); results.Add(sid, sdist[sid]); } var tsid = discretizer.Discretize(statesP[i], summary); var tstate = sdist.ContainsKey(tsid) ? sdist[tsid] : GetState(statesP[i], summary, discretizer); if (!sdist.ContainsKey(tsid)) { sdist.Add(tsid, tstate); } var key = GetActionKey((int)sid, (int)tsid); if (!adist.ContainsKey(key)) { adist.Add(key, 1); } else { adist[key]++; } sdist[sid].Successors.Add( new MDPSuccessorState(GetAction(actions[i], (int)sid, (int)tsid), 0, tstate, reward[i])); if (results.ContainsKey(tsid)) { results.Remove(tsid); } } foreach (var state in sdist.Values) { var sum = state.Successors.Sum(s => adist[GetActionKey(state.Id, s.State.Id)]); foreach (var successor in state.Successors) { var key = GetActionKey(state.Id, successor.State.Id); ((Action)successor.Action).Probability = adist[key] / sum; } } // return starting states return(results.Values); }