public override void AgentAction(float[] act, string textAction) { if (agentDone) { if (!cr_.allDone_) { return; } else { agentDone = false; cr_.setAgentActiveStatus(sw_id, true); return; //Done(); } } //Debug.Log("Actions act[0] " + act[0] + "act[1] " + act[1]); step++; // 0 -> move forward // 1 -> turn left // 2 -> turn right float distToTargetOld = (target_.transform.position - agent_.pos).magnitude; float orientationOld = agent_.cosineOrientation(); if (brain.brainParameters.vectorActionSpaceType == SpaceType.continuous) { gameObject.transform.position += gameObject.transform.forward.normalized * Mathf.Clamp(act[0], 0f, 1f); float rotAngle = 90.0f * Mathf.Clamp(act[1], -1f, 1f); // opposite ends, reachable from [-90, 90] rotation gameObject.transform.Rotate(0f, rotAngle, 0f); } else { int action = (int)act[0]; //Debug.Log(action); if (action == 0) // walk forward { agent_.walkForward(); } if (action == 1) // turn right { agent_.turnRight(); } else if (action == 2) // turn left { agent_.turnLeft(); } else if (action == 3) // stop { agent_.Brake(); } gameObject.transform.position = agent_.pos; gameObject.transform.forward = agent_.forward; } cr_.setAgent(ref agent_, sw_id); float distToTarget = (target_.transform.position - agent_.pos).magnitude; float orientation = agent_.cosineOrientation(); if (agent_.targetReached()) { //Debug.Log("Reached Target! Agent Done."); AddReward(rewardTargetReached); AgentDoneStuff(); return; } if (!agent_.withinBounds(cr_.areaMinBound_, cr_.areaMaxBound_) || step > maxSteps) { //Debug.Log("Went out of Arena! Agent Done."); AddReward(rewardOutOfBounds); AgentDoneStuff(); return; } if (cr_.doesCollide(sw_id)) { Debug.Log("Collision!"); AddReward(rewardCollision); } //reward for gaining distance towards the target AddReward(distanceGainedWeight * (distToTargetOld - distToTarget)); // should have come closer to target //reward for orienting towards the target AddReward(orientationGainedWeight * (orientation - orientationOld)); // should have aligned better with target //reward for each step (usually negative) AddReward(rewardEachStep); }