public QLearning(int state_count, int action_count, double alpha = 0.1, double gamma = 0.7, double initial_Q = 0.1) { mModel = new QModel(state_count, action_count, initial_Q); LearningRate = alpha; DiscountFactor = gamma; mActionSelectionStrategy = new EpsilonGreedyActionSelectionStrategy(); }
public RLearning(int state_count, int action_count, double alpha = 0.1, double beta = 0.1, double rho = 0.7, double initial_Q = 0.1) { mModel = new QModel(state_count, action_count, initial_Q); LearningRate = alpha; mRho = rho; mBeta = beta; mActionSelectionStrategy = new EpsilonGreedyActionSelectionStrategy(); }
public QLearning(SparseQModel model) { mModel = model; mActionSelectionStrategy = new EpsilonGreedyActionSelectionStrategy(); }