private qTurple GetBestTurple(IState s) { IEnumerable <qTurple> list = null; if (QValues.TryGetValue(s, out list)) { if (list.Count() == 0) { throw new NoOperatorFoundException(); } qTurple bestTurple = list.ElementAt(0); foreach (qTurple turple in list) { if (turple.Q > bestTurple.Q) { bestTurple = turple; } } return(bestTurple); } else { AddNewState(s); return(GetBestTurple(s)); } }
private void AddNewState(IState state) { List <qTurple> list = new List <qTurple>(); foreach (IOperator op in Env.ApplicableOperators(state)) { qTurple turple = new qTurple(); turple.Q = DefaultQ(state, op); turple.Op = op; list.Add(turple); } QValues.Add(state, list); }
private void Train() { episodeCount += 1; EpisodeReward = 0; IState currentState = Start.State; while (!currentState.Equals(Goal.State) && stopwatch.Elapsed.TotalMilliseconds < timeAvailable && !stopped) { IOperator op = trainingPolicy.action(currentState); IOutcome outcome = Env.act(currentState, op); EpisodeReward += outcome.Reward; qTurple turple = getTurple(currentState, op); if (!goalFound && outcome.State.Equals(Goal.State)) { turple.Q += 100; } turple.Q = turple.Q + Alpha * (outcome.Reward + Gamma * getTurple(outcome.State, GetBestOp(outcome.State)).Q - turple.Q); currentState = outcome.State; } EpisodeRewards.Add(EpisodeReward); TimesList.Add(stopwatch.Elapsed.TotalMilliseconds); if (currentState.Equals(Goal.State)) { goalFound = true; } }