public virtual void UpdateQ_SARSA_Lambda(int prevActIndex, int curActIndex) { double reward = EnvironmentModeler.GetReward(m_prevState, m_curState, SoccerAction.GetActionTypeFromIndex(prevActIndex, Params.MoveKings)); UpdateQ_SARSA_Lambda(reward, m_prevState, m_curState, prevActIndex, curActIndex); }
public virtual void UpdateQ_QL_Watkins(int prevActIndex, int curActIndex, bool isNaive) { double reward = EnvironmentModeler.GetReward(m_prevState, m_curState, SoccerAction.GetActionTypeFromIndex(prevActIndex, Params.MoveKings)); UpdateQ_QL_Watkins(reward, m_prevState, m_curState, prevActIndex, curActIndex, isNaive); }
protected virtual int RLThink() { int greedyActIndex = m_qTable.GetCurrentGreedyActionIndex(); int actIndex = greedyActIndex; if (Params.Epsillon > 0.0) { actIndex = ChooseActionEpsilonGreedy( Params.Epsillon, SoccerAction.GetActionCount(Params.MoveKings, TeammatesCount), greedyActIndex); } //SoccerAction act = SoccerAction.GetActionFromIndex(actIndex, Params.MoveKings, MyUnum); if (Cycle > 0) // because in cycle 0 there is no prev state { double reward = EnvironmentModeler.GetReward(m_prevState, m_curState, SoccerAction.GetActionTypeFromIndex(m_prevActionIndex, Params.MoveKings)); switch (Params.RLMethod) { case Params.RLMethods.Evolutionary: m_qTable.UpdateQ_Evolutionary(m_prevActionIndex, reward); break; case Params.RLMethods.Q_Zero: m_qTable.UpdateQ_QLearning(m_prevActionIndex); break; case Params.RLMethods.SARSA_Zero: m_qTable.UpdateQ_SARSA(m_prevActionIndex, actIndex); break; case Params.RLMethods.SARSA_Lambda: m_qTable.UpdateQ_SARSA_Lambda(m_prevActionIndex, actIndex); break; case Params.RLMethods.Q_Lambda_Watkins: m_qTable.UpdateQ_QL_Watkins(m_prevActionIndex, actIndex, false); break; case Params.RLMethods.Q_Lambda_Naive: m_qTable.UpdateQ_QL_Watkins(m_prevActionIndex, actIndex, true); break; default: break; } if (m_performanceLogger.Enabled) { m_performanceLogger.Log(Cycle, reward, OurScore, OppScore); } } return(actIndex); }