protected override void UpdateQValue(State s, int ai, double newValue) { double reward = EnvironmentModeler.GetReward(base.m_prevState, base.m_curState, SoccerAction.GetActionTypeFromIndex(ai, Params.MoveKings)); double dummy; int prevStateGreedyAct = GetMaxQ(base.m_prevState, out dummy); if (prevStateGreedyAct == ai) // add reward to fitness only if it was a greedy action { m_genomeSumRewards += reward; } if (this.m_client.Cycle % NeatExpParams.CyclesPerGenome == 0) { Console.Write("Gener: {0}, ", m_evoAlg != null ? m_evoAlg.Generation : 0); int curGoalDiff = m_client.OurScore - m_client.OppScore; this.GoalDiffGainedByNetworkToEvaluate = curGoalDiff - m_prevGoalDiff; this.FitnessNetworkToEvaluate = m_genomeSumRewards; m_prevGoalDiff = curGoalDiff; m_eventNetFitnessReady.Set(); m_genomeSumRewards = 0.0; m_eventNewNetReady.WaitOne(); } }
public override void UpdateQ_QLearning(int prevActIndex) { double reward = EnvironmentModeler.GetReward(m_prevState, m_curState, SoccerAction.GetActionTypeFromIndex(prevActIndex, Params.MoveKings)); int ai_star_other = EnvironmentModeler.GuessOpponentAction(m_prevOriginalState, m_curOriginalState, m_oppToMonitor); UpdateIM_Learning(m_prevState, ai_star_other); int ai_prime_other = GetMaxIMIndex(m_prevState); double oldQ = GetQValue(m_prevState, prevActIndex, ai_star_other); double bestQ = GetBestQ(m_curState, ai_prime_other); double newQ = oldQ + Params.Alpha * (reward + Params.Gamma * bestQ - oldQ); UpdateQValue(m_prevState, prevActIndex, ai_star_other, newQ); }
public void UpdateQ_QLearning(State prevState, State curState, SoccerAction prevAct) { double reward = EnvironmentModeler.GetReward(prevState, curState, prevAct.ActionType); this.UpdateQ_QLearning(reward, prevState, curState, prevAct); }