예제 #1
0
        void Learn(PolarCoord s1, HandleAction a, PolarCoord s2, float reward)
        {
            const float alpha = 0.1f;
            const float gamma = 0.99f;

            QTable[s1][a] += alpha * (reward + gamma * QTable[s2].Values.Max() - QTable[s1][a]);
        }
예제 #2
0
        public override Actions RunStep(States states)
        {
            PolarCoord   nowState = (PolarCoord)StateFactory.FromRawState(states);
            HandleAction decision = (HandleAction)ActionFactory.Random();

            Debug(nowState.DebugStr());

            if (states.episode_i == CurrentEpisode)
            {
                Store(PrevState, PrevAction, nowState);
                Learn();

                decision = Policy(nowState);
            }
            else
            {
                CurrentEpisode = states.episode_i;
            }

            PrevState  = nowState;
            PrevAction = decision;

            bool shoot = states.bullet_num2 != 0 && nowState.RawAbsPhi < 60;

            return(decision.ToRawAction(shoot));
        }
예제 #3
0
파일: main.cs 프로젝트: watabe951/RLFighter
        float Reward(PolarCoord s1, HandleAction a, PolarCoord s2)
        {
            float reward = 0;

            reward += -(s2.RawAbsPhi / 180f * 10f);
            reward += s2.RawAbsTheta / 180f * 10f;
            reward += s2.RawDistance > 800 ? -20 : 0;
            return(reward);
        }
예제 #4
0
        void Store(PolarCoord s1, HandleAction a, PolarCoord s2)
        {
            const int bufsize = 1000;

            History.Add(new Step <PolarCoord, HandleAction>(s1, a, s2));
            if (History.Count() > bufsize)
            {
                History.RemoveAt(0);
            }
        }
예제 #5
0
 float Reward(PolarCoord s1, HandleAction a, PolarCoord s2)
 {
     return(-s2.AbsPhi + s2.AbsTheta + s2.Distance * (s2.RawAbsTheta < 90 ? -1 : 1));
 }