Example #1
0
    public override void AgentStep(float[] act)
    {
        switch ((int)act[0])
        {
        case 0:
            ship.rb.AddForce(new Vector2(ship.thrust, 0));
            reward += 0.01f;
            break;

        case 1:
            ship.rb.AddForce(new Vector2(-ship.thrust, 0));
            reward += 0.01f;
            break;

        case 2:
            ship.rb.AddForce(new Vector2(0, ship.thrust));
            reward -= 0.005f;
            break;

        case 3:
            ship.rb.AddForce(new Vector2(0, -ship.thrust));
            reward += 0.01f;
            break;

        case 4:
            ship.Shoot();
            reward += 0.05f;
            break;
        }

        if (currentScore < (float)GameControl.instance.score)
        {
            //Enemy killed
            currentScore          = (float)GameControl.instance.score;
            currenOnScreenEnemies = Spawner.instance.getOnScreenEnemies();
            reward += 1f;
        }

        if (currentHealth < GameControl.instance.health)
        {
            //health kit picked up
            currentHealth = GameControl.instance.health;
            reward       += 1f;
        }
        if (currentHealth > GameControl.instance.health)
        {
            reward       -= .5f;
            currentHealth = GameControl.instance.health;
        }


        if (GameControl.instance.gameOver)
        {
            reward = -1;
            done   = true;
            return;
        }
        reward          = Mathf.Clamp(reward, -1f, 1f);
        rewardText.text = string.Format("Reward: {0}", CumulativeReward.ToString("0.00"));
    }
Example #2
0
    public override void AgentStep(float[] act)
    {
        float action_horizontal = act[0];
        float action_vertical   = act[1];

        // Debug.Log("action_horizontal = " + action_horizontal);
        // Debug.Log("action_vertical = " + action_vertical);

        Vector3 nextPos = transform.position + new Vector3(action_horizontal * Time.deltaTime, action_vertical * Time.deltaTime, originalZ);

        // nextPos = Camera.main.WorldToScreenPoint(nextPos);

        // Debug.Log("nextPos = " + nextPos);
        // Debug.Log("Screen.width = " + Screen.width);
        // Debug.Log("Screen.height = " + Screen.height);

        if (nextPos.x > -10 && nextPos.x < 10)
        {
            transform.Translate(Vector3.right * action_horizontal * Time.deltaTime);
        }

        if (nextPos.y > -5 && nextPos.y < 5)
        {
            transform.Translate(Vector3.up * action_vertical * Time.deltaTime);
        }

        if (Vector3.Distance(mouseObject.position, transform.position) < 1f)
        {
            reward -= 1f;
            done    = true;
        }
        else
        {
            reward += 0.01f;
        }

        // Debug.Log("stepCounter = " + stepCounter);
        // Debug.Log("maxStep = " + maxStep);

        if (stepCounter >= maxStep)
        {
            reward += 5f;
            done    = true;
        }

        myText.text = CumulativeReward.ToString("F2");
        // Debug.Log("academy.episodeCount = " + academy.episodeCount);

        // Debug.Log("mousePos = " + mouseObject.position );
    }
Example #3
0
    public void DoUpdateSlugBody(float alphaChange, float t)
    {
        Debug.Log(string.Format("Update Slug Body: {0}, {1}", alphaChange.ToString(), t.ToString()));
        float currentAlpha = Mathf.Clamp(_previousAlpha + alphaChange, 0f, Mathf.PI * 0.5f);

        float delta_d  = 2f * bodySeparation * (Mathf.Cos(currentAlpha) - Mathf.Cos(_previousAlpha));
        float delta_xa = (t - 1f) * delta_d;
        float delta_xb = t * delta_d;

        bodyA.position += Vector3.right * delta_xa;
        bodyB.position += Vector3.right * delta_xb;

        Vector3 newPositionC = bodyC.position;

        newPositionC.x = (bodyA.position.x + bodyB.position.x) * 0.5f;
        newPositionC.y = bodyA.position.y + bodySeparation * Mathf.Sin(currentAlpha);
        bodyC.position = newPositionC;

        bodyPreA.position = bodyA.position - Vector3.right * bodySeparation;
        bodyPreB.position = bodyB.position + Vector3.right * bodySeparation;

        if (done == false)
        {
            float velocityC = (bodyC.position.x - _previousPositionC) / Time.fixedDeltaTime;

            reward = 0f;
            // Only apply bonus if displacement is positive (no increased penalty)
            reward += velocityC * multiplierForBodyVelocity;
            reward -= stepPenalty;
            reward -= alphaChange * alphaChange * multiplierForActionSquared;

#if UNITY_EDITOR
            Debug.Log("Reward: " + reward.ToString() + " ; Cum: " + CumulativeReward.ToString());
#endif
        }

        _previousAlpha = currentAlpha;
        _previousT     = t;
    }