public QLearning(int state_count, int action_count, double alpha = 0.1, double gamma = 0.7, double initial_Q = 0.1)
        {
            mModel         = new QModel(state_count, action_count, initial_Q);
            LearningRate   = alpha;
            DiscountFactor = gamma;

            mActionSelectionStrategy = new EpsilonGreedyActionSelectionStrategy();
        }
Beispiel #2
0
        public RLearning(int state_count, int action_count, double alpha = 0.1, double beta = 0.1, double rho = 0.7, double initial_Q = 0.1)
        {
            mModel       = new QModel(state_count, action_count, initial_Q);
            LearningRate = alpha;
            mRho         = rho;
            mBeta        = beta;

            mActionSelectionStrategy = new EpsilonGreedyActionSelectionStrategy();
        }
 public QLearning(SparseQModel model)
 {
     mModel = model;
     mActionSelectionStrategy = new EpsilonGreedyActionSelectionStrategy();
 }