private TAction SelectRandomAction() { IList <TAction> allActions = MDP.GetAllActions(); return(allActions[0]); // return Util.selectRandomlyFromList(allActions); }
public QLearningAgent(MDP <TState, TAction> mdp) : base(mdp) { Q = new Dictionary <Pair <TState, TAction>, double>(); qTable = new QTable <TState, TAction>(mdp.GetAllActions()); stateActionCount = new FrequencyCounter <Pair <TState, TAction> >(); actionCounter = 0; }
private TAction ActionMaximizingLearningFunction() { TAction maxAct = null; double maxValue = double.NegativeInfinity; foreach (TAction action in MDP.GetAllActions()) { double qValue = qTable.GetQValue(CurrentState, action); var lfv = this.LearningFunction(qValue); if (lfv > maxValue) { maxValue = lfv; maxAct = action; } } return(maxAct); }