/// <summary> /// 获取当前actor下的action和reward /// </summary> /// <param name="step"></param> /// <param name="state"></param> /// <returns></returns> public (double[] action, double q) EpsilonGreedy(int step, double[] state) { int totalEpochs = Convert.ToInt32(_epoches * 0.9); var epsion = EpsilonCalcute(step, eps_total: totalEpochs); if (NP.Random() < epsion) { return(_env.RandomAction(), 0); } else { var(action, q) = ChooseAction(state); return(action, q); } }