示例#1
0
        private void ApplyReward()
        {
            float delta, distance;

            CalculateDestinationData(out delta, out distance);

            float angleSign, velocitySign;
            bool  leavingBroomBehind = LeavingBroomBehind(out angleSign, out velocitySign);

            float penalty = velocitySign == MH.Sign(delta)
                                ? leavingBroomBehind
                                        ? -0.0005f
                                        : -0.00005f
                                : leavingBroomBehind
                                        ? -0.00005f
                                        : -0.005f;

            AddReward(penalty);

            if (distance < DistanceThreshold)
            {
                ResetDestination();
                SetReward(1f);
            }

            if (CheckFailure())
            {
                Done();
                SetReward(-1f);
            }
        }
示例#2
0
 private bool LeavingBroomBehind(out float angleSign, out float velocitySign)
 {
     angleSign    = MH.Sign(balancer.Broom.Joint.angle);
     velocitySign = MH.Sign(balancer.Rigidbody.velocity.x);
     return(angleSign == velocitySign);
 }