Exemplo n.º 1
0
        /// <summary>
        /// 获取当前actor下的action和reward
        /// </summary>
        /// <param name="step"></param>
        /// <param name="state"></param>
        /// <returns></returns>
        public (double[] action, double q) EpsilonGreedy(int step, double[] state)
        {
            int totalEpochs = Convert.ToInt32(_epoches * 0.9);
            var epsion      = EpsilonCalcute(step, eps_total: totalEpochs);

            if (NP.Random() < epsion)
            {
                return(_env.RandomAction(), 0);
            }
            else
            {
                var(action, q) = ChooseAction(state);
                return(action, q);
            }
        }