protected override void UpdateQValue(State s, int ai, double newValue)
        {
            double reward = EnvironmentModeler.GetReward(base.m_prevState, base.m_curState,
                                                         SoccerAction.GetActionTypeFromIndex(ai, Params.MoveKings));

            double dummy;
            int    prevStateGreedyAct = GetMaxQ(base.m_prevState, out dummy);

            if (prevStateGreedyAct == ai) // add reward to fitness only if it was a greedy action
            {
                m_genomeSumRewards += reward;
            }

            if (this.m_client.Cycle % NeatExpParams.CyclesPerGenome == 0)
            {
                Console.Write("Gener: {0}, ", m_evoAlg != null ? m_evoAlg.Generation : 0);

                int curGoalDiff = m_client.OurScore - m_client.OppScore;

                this.GoalDiffGainedByNetworkToEvaluate = curGoalDiff - m_prevGoalDiff;
                this.FitnessNetworkToEvaluate          = m_genomeSumRewards;

                m_prevGoalDiff = curGoalDiff;
                m_eventNetFitnessReady.Set();

                m_genomeSumRewards = 0.0;

                m_eventNewNetReady.WaitOne();
            }
        }
        public override void UpdateQ_QLearning(int prevActIndex)
        {
            double reward = EnvironmentModeler.GetReward(m_prevState, m_curState,
                                                         SoccerAction.GetActionTypeFromIndex(prevActIndex, Params.MoveKings));

            int ai_star_other = EnvironmentModeler.GuessOpponentAction(m_prevOriginalState, m_curOriginalState, m_oppToMonitor);

            UpdateIM_Learning(m_prevState, ai_star_other);

            int ai_prime_other = GetMaxIMIndex(m_prevState);

            double oldQ  = GetQValue(m_prevState, prevActIndex, ai_star_other);
            double bestQ = GetBestQ(m_curState, ai_prime_other);
            double newQ  = oldQ + Params.Alpha * (reward + Params.Gamma * bestQ - oldQ);

            UpdateQValue(m_prevState, prevActIndex, ai_star_other, newQ);
        }
示例#3
0
        public void UpdateQ_QLearning(State prevState, State curState, SoccerAction prevAct)
        {
            double reward = EnvironmentModeler.GetReward(prevState, curState, prevAct.ActionType);

            this.UpdateQ_QLearning(reward, prevState, curState, prevAct);
        }