public QState GetState() { var winner = _ball.IsTerminal(); float reward; bool terminal; //var terminal = winner.HasValue; //var reward = terminal ? (winner.Value == Side ? 1 : 0) : 0; var b = PongGame.BoundsFromTransform(_ball.transform); var controller = PongGame.BoundsFromTransform(transform); controller.size += new Vector3(0.2f, 0, 0); PongGame.DebugDrawBounds(controller, Color.blue); if (b.Intersects(controller)) { reward = 1; terminal = true; if (PongBenchmark.Running) { terminal = winner.HasValue; } } else { terminal = winner.HasValue; reward = terminal ? (winner.Value == Side ? 1 : 0) : 0; } var bp = _ball.transform.position; var rbp = bp - transform.position; var gbp = _grid.Locate(bp); var bpy = gbp.HasValue ? gbp.Value.y : -1; _grid.Populate((bo, c) => { //var x = bo.center.x; //var v = bo.Contains(new Vector3(x, _game.Border.min.y)) || bo.Contains(new Vector3(x, _game.Border.max.y)) ? 0.3f : 0; // walls var ham = gbp.HasValue ? HammingDistance(gbp.Value, c) : int.MaxValue; // Hamming distance var v = ham <= 0 ? 1f : ham <= 1 ? 0.5f : 0; // ball //v = bo.Contains(bp + _ball.Velocity.normalized * 2) ? 150f : v; //v = bo.Intersects(controller) ? 100 : v; return(v); }); var state = _grid.Matrix; return(new QState(new[] { state }, _vect.Clone(), reward, terminal)); }
public QState GetState() { _grid.SetAll(1f); for (int i = -2; i < 20; i++) { var point = Track.GetPointAtDistance(_distanceTravelled + i * 0.8f); var coordinates = _grid.Locate(point); if (coordinates.HasValue) { _grid[coordinates.Value] = 0f; } } var linear = Vector <float> .Build.Dense( _velocityBin.Get(Velocity / 20f) .Concat(_forceBin.Get(Mathf.Abs(Force))).ToArray() ); // _vector[0] = Velocity / 20f; // _vector[1] = Mathf.Abs(Force); // var reward = Mathf.Abs(DistanceTravelled - StartPosition) - lastReward > 0.001f ? 0.5f : 0f; var reward = Velocity / 20f; reward += Mathf.Abs(Force) * -0.2f; // reward = !OnTrack && Mathf.Abs(Force) > 1f ? 0f : reward; // reward += DistanceTravelled - StartPosition > 80 ? 80 / LapTime : 0; if (_standStillTicks > 20) { reward = 0; } var terminal = _distanceTravelled - StartPosition > Track.length || !_onTrack || _standStillTicks > 200; if (QAIManager.CurrentMode == QAIMode.Testing) { terminal = _distanceTravelled - StartPosition > Track.length || _standStillTicks > 200; } var state = new QState( new [] { _grid.Matrix }, linear.Clone(), // !terminal ? 0 : (DistanceTravelled - StartPosition) / (Track.length/2), reward, terminal); if (Velocity < 1f) { _standStillTicks++; } else { _standStillTicks = 0; } return(state); }