/// <summary> /// Initializes a new MDPSuccessorState. /// </summary> /// <param name="action">Action to the state.</param> /// <param name="cost">Cost of the transition state.</param> /// <param name="state">Transition state.</param> /// <param name="reward">Reward value.</param> public MDPSuccessorState(IAction action, double cost, IMDPState state, double reward) { this.Action = action; this.Cost = cost; this.State = state; this.Reward = reward; }
/// <summary> /// Initializes a new MDPSuccessorState. /// </summary> /// <param name="action">Action to the state.</param> /// <param name="cost">Cost of the transition state.</param> /// <param name="state">Transition state.</param> /// <param name="reward">Reward value.</param> public MDPSuccessorState(IAction action, double cost, IMDPState state, double reward) { Action = action; Cost = cost; State = state; Reward = reward; }
public IMDPDecisionSpace GenDecisionSpace(IMDPState s) { IMDPDecisionSpace res = new DecisionSpace(); foreach (Decision _d in _ds) { if ((s as State).CanSupportDecision(_d)) { res.Add(_d); } } return(res); }
/// <summary> /// Converts a Matrix of states into an array of State objects. /// </summary> /// <param name="states">State matrix.</param> /// <param name="properties">(Optional) Feature summary.</param> /// <param name="discretizer">Disretization function to apply for reducing states.</param> /// <returns></returns> public static IEnumerable <IMDPState> GetStates(Matrix states, Summary properties, IDiscretizer discretizer) { var summary = properties ?? Summary.Summarize(states); var slist = new IMDPState[states.Rows]; for (var i = 0; i < states.Rows; i++) { slist[i] = GetState(states[i], summary, discretizer); } return(slist); }
/// <summary> /// Converts a Matrix of states into an array of State objects. /// </summary> /// <param name="states">State matrix.</param> /// <param name="properties">(Optional) Feature summary.</param> /// <param name="discretizer">Disretization function to apply for reducing states.</param> /// <returns></returns> public static IEnumerable <IMDPState> GetStates(Matrix states, Math.Summary properties, IDiscretizer discretizer) { Math.Summary summary = (properties != null ? properties : Math.Summary.Summarize(states)); var slist = new IMDPState[states.Rows]; for (int i = 0; i < states.Rows; i++) { slist[i] = MDPConverter.GetState(states[i], summary, discretizer); } return(slist); }
public IMDPState CreateOrFind(IMDPState _s) { IMDPState res = _ss.FirstOrDefault(s => s.Equals(_s)); if (res == null) { _ss.Add(_s); return(_s); } else { return(res); } }
public bool Equals(IMDPState other) { if (this.Keys.Count != (other as State).Keys.Count) { return(false); } foreach (IALPResource ts in this.Keys) { if (!((other as State).Keys.Contains(ts) && (other as State)[ts] == this[ts])) { return(false); } } return(true); }
public static State MinusOneUnit(this StateSpace ss, IMDPState s, Route route) { IMDPState _s = s.Clone(); foreach (Product p in route) { foreach (Resource r in p) { if ((_s as State).Keys.Contains(r) && (_s as State)[r] > 0) { (_s as State)[r] -= 1; } } } return(ss.FirstOrDefault(i => i.Equals(_s)) as State); }
/// <summary> /// Converts the experience pair into their equivalent math forms. /// </summary> /// <param name="state">IMDPState instance.</param> /// <param name="nodes">List of nodes added to the result set.</param> /// <param name="states">Matrix to store contained successor state vectors.</param> /// <param name="actions">Vector to store the contained action values.</param> /// <param name="statesP">Matrix to store all contained successor transition state vectors.</param> /// <param name="rewards">Vector to store all contained reward values.</param> /// <returns>HashSet<string></returns> private static void Convert( this IMDPState state, ref List <string> nodes, ref Matrix states, ref Vector actions, ref Matrix statesP, ref Vector rewards) { if (state != null) { foreach (IMDPSuccessor successor in state.GetSuccessors()) { if (state.Features.Length != states.Cols) { states = Matrix.Reshape(states, states.Rows, state.Features.Length); } if (state.Features.Length != statesP.Cols) { statesP = Matrix.Reshape(statesP, statesP.Rows, ((IMDPState)successor.State).Features.Length); } string id = $"{state.Id}:{successor.State.Id}"; if (!nodes.Contains(id)) { states = states.Insert(state.ToVector(), states.Rows - 1, VectorType.Row); actions = actions.Insert(actions.Length - 1, successor.Action.Id); statesP = statesP.Insert(((IMDPState)successor.State).ToVector(), statesP.Rows - 1, VectorType.Row); rewards = rewards.Insert(rewards.Length - 1, successor.Reward); nodes.Add(id); } if (!successor.State.IsTerminal) { var successorState = (IMDPState)successor.State; if (successorState.Id != state.Id) { successorState.Convert(ref nodes, ref states, ref actions, ref statesP, ref rewards); } } } } }
public IALPState CreateOrFind(MetaResouceState RecDic) { State state = new State(); foreach (Resource ts in this.ResSpace) { state.Add(ts, RecDic.GetRemainNum(ts));//这里没有管安全性 } IMDPState res = _ss.FirstOrDefault(i => i.Equals(state)); if (res == null) { _ss.Add(state); return(state); } else { return(res as IALPState); } }
public IALPState CreateOrFind(IDictionary <IALPResource, int> RecDic) { State state = new State(); foreach (Resource ts in this.ResSpace) { state.Add(ts, RecDic[ts]);//这里没有管安全性 } IMDPState res = _ss.FirstOrDefault(i => i.Equals(state)); if (res == null) { _ss.Add(state); return(state); } else { return(res as IALPState); } }
public double Prob(int time, IMDPState s1, IMDPState s2, IMDPDecision a) { if (s1.Equals(s2)) { return(1 - suppRoute((a as Decision).OpenProductSet).Sum(p => Ro(time) * P(time, p, a as Decision))); } else { //Find a route via which s1 transits to s2. Route r = suppRoute((a as Decision).OpenProductSet). FirstOrDefault(i => (GenStateSpace(s1, a) as StateSpace).MinusOneUnit(s1, i).Equals(s2)); if (r != null) { return(Ro(time) * P(time, r, a as Decision)); } else { return(0); } } }
public IMDPStateSpace GenStateSpace(IMDPState s, IMDPDecision a) { //目前状态减去开放产品集中的产品 IMDPStateSpace subss = new StateSpace(); if ((s as State).CanSupportDecision(a as Decision)) { foreach (Product p in (a as Decision).OpenProductSet) { IMDPState _s = (_ss as StateSpace).MinusOneUnit(s, p); if (!subss.Contains(_s)) { subss.Add(_s); } } //再加入本身 subss.Add(s); } return(subss); }
/// <summary> /// Converts a Matrix of states into an array of State objects. /// </summary> /// <param name="states">State matrix.</param> /// <param name="properties">(Optional) Feature summary.</param> /// <param name="discretizer">Disretization function to apply for reducing states.</param> /// <returns></returns> public static IEnumerable<IMDPState> GetStates(Matrix states, Math.Summary properties, IDiscretizer discretizer) { Math.Summary summary = (properties != null ? properties : Math.Summary.Summarize(states)); var slist = new IMDPState[states.Rows]; for (int i = 0; i < states.Rows; i++) { slist[i] = MDPConverter.GetState(states[i], summary, discretizer); } return slist; }
public void Test_QLearning_Path_Finder() { // start var master = new MDPState(2); var kitchen = new MDPState(3); master.Successors.Add(new MDPSuccessorState(new AI.Action(1, "Goto Kitchen"), 0.1, kitchen, 0)); var entrance = new MDPState(1); var lounge = new MDPState(4); kitchen.Successors.Add(new MDPSuccessorState(new AI.Action(2, "Goto Lounge"), 0.1, lounge, -15)); kitchen.Successors.Add(new MDPSuccessorState(new AI.Action(3, "Goto Entrance Hall"), 0, entrance, -30)); var spare = new MDPState(0); lounge.Successors.Add(new MDPSuccessorState(new AI.Action(4, "Goto Spare Room"), 0.1, spare, -10)); var outside = new MDPState(5); lounge.Successors.Add(new MDPSuccessorState(new AI.Action(5, "Go Outside"), 0.1, outside, 30)); entrance.Successors.Add(new MDPSuccessorState(new AI.Action(6, "Go Outside"), 0.1, outside, 50)); outside.Successors.Add(new MDPSuccessorState(new AI.Action(7, "Stay Outside"), 0.2, outside, 50)); var examples = MDPConverter.ToExamples(master); Assert.Equal(7, examples.Item1.Rows); Assert.Equal(7, examples.Item2.Length); Assert.Equal(7, examples.Item3.Rows); Assert.Equal(7, examples.Item4.Length); var generator = new Reinforcement.QLearning.QLearnerGenerator() { Lambda = 0.9 }; Reinforcement.QLearning.QLearnerModel model = (Reinforcement.QLearning.QLearnerModel)generator.Generate(examples.Item1, examples.Item2, examples.Item3, examples.Item4); Assert.Equal(3, (int)model.Predict(kitchen.ToVector()) /*, "Expected to move from kitchen to entrance hall"*/); Assert.Equal(5, (int)model.Predict(lounge.ToVector()) /*, "Expected to move from lounge to outside"*/); Assert.Equal(7, (int)model.Predict(outside.ToVector()) /*, "Expected to stay outside"*/); string path = "Start: " + master.Id; IMDPState current = master; int counter = 0; while (current.Id != outside.Id) { if (counter > 20) { break; } double v = model.Predict(current.ToVector()); var next = current.GetSuccessors().Where(w => w.Action.Id == (int)v).FirstOrDefault() as IMDPSuccessor; if (next == null) { break; } current = next.State as IMDPState; counter++; path += $"\n next: { current.Id } ({ next.Reward.ToString("N2") })"; } Console.Write(path); }
/// <summary> /// Converts the MDP State into a vector form. /// </summary> /// <param name="state">MDP State.</param> /// <returns>Vector.</returns> public static Vector ToVector(this IMDPState state) { return(state.Features); }
public bool Equals(IMDPState other) { throw new NotImplementedException(); }
/// <summary> /// Converts an MDP State (recursively) into it's equivalent math form, including all successor states. /// </summary> /// <param name="state">The starting state.</param> /// <returns>Tuple<Matrix, Vector, Matrix></returns> public static Tuple <Matrix, Vector, Matrix, Vector> ToExamples(this IMDPState state) { return(new[] { state }.ToExamples()); }
public double Reward(int t, IMDPState s, IMDPDecision a) { return(suppRoute((a as Decision).OpenProductSet). Where(i => (s as State).CanSupportRoute(i)). Sum(i => Ro(t) * P(t, i, a as Decision) * f(i))); }