Пример #1
0
        public virtual void UpdateQ_SARSA_Lambda(int prevActIndex, int curActIndex)
        {
            double reward = EnvironmentModeler.GetReward(m_prevState, m_curState,
                                                         SoccerAction.GetActionTypeFromIndex(prevActIndex, Params.MoveKings));

            UpdateQ_SARSA_Lambda(reward, m_prevState, m_curState, prevActIndex, curActIndex);
        }
Пример #2
0
        public virtual void UpdateQ_QL_Watkins(int prevActIndex, int curActIndex, bool isNaive)
        {
            double reward = EnvironmentModeler.GetReward(m_prevState, m_curState,
                                                         SoccerAction.GetActionTypeFromIndex(prevActIndex, Params.MoveKings));

            UpdateQ_QL_Watkins(reward, m_prevState, m_curState, prevActIndex, curActIndex, isNaive);
        }
Пример #3
0
        protected virtual int RLThink()
        {
            int greedyActIndex = m_qTable.GetCurrentGreedyActionIndex();

            int actIndex = greedyActIndex;

            if (Params.Epsillon > 0.0)
            {
                actIndex = ChooseActionEpsilonGreedy(
                    Params.Epsillon,
                    SoccerAction.GetActionCount(Params.MoveKings, TeammatesCount),
                    greedyActIndex);
            }
            //SoccerAction act = SoccerAction.GetActionFromIndex(actIndex, Params.MoveKings, MyUnum);


            if (Cycle > 0) // because in cycle 0 there is no prev state
            {
                double reward = EnvironmentModeler.GetReward(m_prevState, m_curState,
                                                             SoccerAction.GetActionTypeFromIndex(m_prevActionIndex, Params.MoveKings));

                switch (Params.RLMethod)
                {
                case Params.RLMethods.Evolutionary:
                    m_qTable.UpdateQ_Evolutionary(m_prevActionIndex, reward);
                    break;

                case Params.RLMethods.Q_Zero:
                    m_qTable.UpdateQ_QLearning(m_prevActionIndex);
                    break;

                case Params.RLMethods.SARSA_Zero:
                    m_qTable.UpdateQ_SARSA(m_prevActionIndex, actIndex);
                    break;

                case Params.RLMethods.SARSA_Lambda:
                    m_qTable.UpdateQ_SARSA_Lambda(m_prevActionIndex, actIndex);
                    break;

                case Params.RLMethods.Q_Lambda_Watkins:
                    m_qTable.UpdateQ_QL_Watkins(m_prevActionIndex, actIndex, false);
                    break;

                case Params.RLMethods.Q_Lambda_Naive:
                    m_qTable.UpdateQ_QL_Watkins(m_prevActionIndex, actIndex, true);
                    break;

                default:
                    break;
                }

                if (m_performanceLogger.Enabled)
                {
                    m_performanceLogger.Log(Cycle, reward, OurScore, OppScore);
                }
            }

            return(actIndex);
        }