Пример #1
0
        public Skater Move()
        {
            double reward;

            // Get an action following some policy/strategy
            int action = DetermineAction(Parameters.CurrentPolicy);

            Torus.actionHistogram[action]++;

            // Calculate possible new position
            double direction         = Parameters.DirectionList[action];
            double xCoordNewPosition = Shared.Mod((xCoord + Parameters.StepSize * Math.Cos(direction)), 1);
            double yCoordNewPosition = Shared.Mod((yCoord + Parameters.StepSize * Math.Sin(direction)), 1);

            // Check Collision
            if (Torus.CollisionCheck(this, xCoordNewPosition, yCoordNewPosition))
            {
                RecentCollision = Parameters.CollisionFadeSpeed;
                // Due to collision risk, movement is cancelled!
                // The newly gennerated coordinates will not be applied.
                Torus.collisionCount++;
                reward = CalculateReward(action, false);
            }
            else
            {
                // No collision, so we update the coordinates of this skater!
                xCoord = xCoordNewPosition;
                yCoord = yCoordNewPosition;
                if (RecentCollision > 0)
                {
                    RecentCollision--;
                }
                reward = CalculateReward(action, true);
            }

            // update RL tables
            UpdateQValue(action, reward);
            // And log reward to histogram.
            Torus.rewardHistogram[action] += reward;

            // No Collision -> Update position
            return(new Skater(xCoord, yCoord, RecentCollision, qValues, InitialColor));
        }