示例#1
0
        /// <summary>
        /// Returns a flat collection of states/actions and their transition states.
        /// </summary>
        /// <param name="states">State matrix.</param>
        /// <param name="actions">Action label vector.</param>
        /// <param name="statesP">Transition states matrix.</param>
        /// <param name="properties">(Optional) Feature summary.</param>
        /// <param name="discretizer">Disretization function to apply for reducing states.</param>
        /// <returns></returns>
        public static Tuple <IEnumerable <IState>, IEnumerable <IAction>, IEnumerable <IState> > GetStates(Matrix states,
                                                                                                           Vector actions, Matrix statesP, Math.Summary properties, IDiscretizer discretizer)
        {
            Math.Summary summary = (properties != null ? properties : Math.Summary.Summarize(states));

            var slist  = new IState[states.Rows];
            var alist  = new IAction[actions.Length];
            var splist = new IState[statesP.Rows];

            for (int i = 0; i < states.Rows; i++)
            {
                slist[i]  = MDPConverter.GetState(states[i], summary, discretizer);
                splist[i] = MDPConverter.GetState(statesP[i], summary, discretizer);
                alist[i]  = MDPConverter.GetAction(actions[i], slist[i].Id, splist[i].Id);
            }

            return(new Tuple <IEnumerable <IState>, IEnumerable <IAction>, IEnumerable <IState> >(slist, alist, splist));
        }
示例#2
0
        /// <summary>
        /// Returns a graph of MDP States from the States matrices and Action label vector.
        /// </summary>
        /// <param name="states">State matrix.</param>
        /// <param name="actions">Action label vector.</param>
        /// <param name="statesP">Transition states matrix.</param>
        /// <param name="reward">Reward value vector.</param>
        /// <param name="properties">Feature properties from the original set.</param>
        /// <param name="discretizer">Discretization function for generating unique state identifiers.</param>
        /// <returns>IEnumerable&lt;IMDPState&gt;</returns>
        public static IEnumerable <MDPState> GetStates(Matrix states, Vector actions, Matrix statesP, Vector reward,
                                                       Math.Summary properties, IDiscretizer discretizer)
        {
            Math.Summary summary = (properties != null ? properties : Math.Summary.Summarize(states));

            discretizer.Initialize(states, summary);

            var sdist   = new Dictionary <double, MDPState>();
            var adist   = new Dictionary <string, double>();
            var results = new Dictionary <double, MDPState>();

            for (int i = 0; i < states.Rows; i++)
            {
                double sid = discretizer.Discretize(states[i], summary);

                if (!sdist.ContainsKey(sid))
                {
                    sdist.Add(sid, MDPConverter.GetState(states[i], summary, discretizer));
                    results.Add(sid, sdist[sid]);
                }

                double   tsid   = discretizer.Discretize(statesP[i], summary);
                MDPState tstate = (sdist.ContainsKey(tsid) ? sdist[tsid] : MDPConverter.GetState(statesP[i], summary, discretizer));

                if (!sdist.ContainsKey(tsid))
                {
                    sdist.Add(tsid, tstate);
                }

                string key = GetActionKey((int)sid, (int)tsid);

                if (!adist.ContainsKey(key))
                {
                    adist.Add(key, 1);
                }
                else
                {
                    adist[key]++;
                }

                sdist[sid].Successors.Add(new MDPSuccessorState(MDPConverter.GetAction(actions[i], (int)sid, (int)tsid), 0, tstate, reward[i]));

                if (results.ContainsKey(tsid))
                {
                    results.Remove(tsid);
                }
            }

            foreach (var state in sdist.Values)
            {
                double sum = state.Successors.Sum(s => adist[GetActionKey(state.Id, s.State.Id)]);
                foreach (var successor in state.Successors)
                {
                    var key = GetActionKey(state.Id, successor.State.Id);
                    ((Action)successor.Action).Probability = adist[key] / sum;
                }
            }

            // return starting states
            return(results.Values);
        }