示例#1
0
        public QState GetState()
        {
            var   winner = _ball.IsTerminal();
            float reward;
            bool  terminal;
            //var terminal = winner.HasValue;
            //var reward = terminal ? (winner.Value == Side ? 1 : 0) : 0;
            var b          = PongGame.BoundsFromTransform(_ball.transform);
            var controller = PongGame.BoundsFromTransform(transform);

            controller.size += new Vector3(0.2f, 0, 0);
            PongGame.DebugDrawBounds(controller, Color.blue);
            if (b.Intersects(controller))
            {
                reward   = 1;
                terminal = true;
                if (PongBenchmark.Running)
                {
                    terminal = winner.HasValue;
                }
            }
            else
            {
                terminal = winner.HasValue;
                reward   = terminal ? (winner.Value == Side ? 1 : 0) : 0;
            }

            var bp  = _ball.transform.position;
            var rbp = bp - transform.position;

            var gbp = _grid.Locate(bp);
            var bpy = gbp.HasValue ? gbp.Value.y : -1;

            _grid.Populate((bo, c) => {
                //var x = bo.center.x;
                //var v = bo.Contains(new Vector3(x, _game.Border.min.y)) || bo.Contains(new Vector3(x, _game.Border.max.y)) ? 0.3f : 0; // walls
                var ham = gbp.HasValue ? HammingDistance(gbp.Value, c) : int.MaxValue; // Hamming distance
                var v   = ham <= 0 ? 1f : ham <= 1 ? 0.5f : 0;                         // ball
                //v = bo.Contains(bp + _ball.Velocity.normalized * 2) ? 150f : v;
                //v = bo.Intersects(controller) ? 100 : v;
                return(v);
            });

            var state = _grid.Matrix;

            return(new QState(new[] { state }, _vect.Clone(), reward, terminal));
        }
示例#2
0
    public QState GetState()
    {
        _grid.SetAll(1f);
        for (int i = -2; i < 20; i++)
        {
            var point       = Track.GetPointAtDistance(_distanceTravelled + i * 0.8f);
            var coordinates = _grid.Locate(point);
            if (coordinates.HasValue)
            {
                _grid[coordinates.Value] = 0f;
            }
        }
        var linear = Vector <float> .Build.Dense(
            _velocityBin.Get(Velocity / 20f)
            .Concat(_forceBin.Get(Mathf.Abs(Force))).ToArray()
            );

//		_vector[0] = Velocity / 20f;
//		_vector[1] = Mathf.Abs(Force);

//		var reward = Mathf.Abs(DistanceTravelled - StartPosition) - lastReward > 0.001f ? 0.5f : 0f;
        var reward = Velocity / 20f;

        reward += Mathf.Abs(Force) * -0.2f;
//		reward = !OnTrack && Mathf.Abs(Force) > 1f ? 0f : reward;
//		reward +=  DistanceTravelled - StartPosition > 80 ? 80 / LapTime : 0;

        if (_standStillTicks > 20)
        {
            reward = 0;
        }

        var terminal =
            _distanceTravelled - StartPosition > Track.length ||
            !_onTrack ||
            _standStillTicks > 200;

        if (QAIManager.CurrentMode == QAIMode.Testing)
        {
            terminal = _distanceTravelled - StartPosition > Track.length ||
                       _standStillTicks > 200;
        }

        var state = new QState(
            new [] { _grid.Matrix },
            linear.Clone(),
//			!terminal ? 0 : (DistanceTravelled - StartPosition) / (Track.length/2),
            reward,
            terminal);

        if (Velocity < 1f)
        {
            _standStillTicks++;
        }
        else
        {
            _standStillTicks = 0;
        }

        return(state);
    }