コード例 #1
0
    public override void AgentAction(float[] vectorAction, string textAction)
    {
        if (_styleAnimator == _localStyleAnimator)
        {
            _styleAnimator.OnAgentAction();
        }
        _master.OnAgentAction();
        int i = 0;

        foreach (var muscle in _master.Muscles)
        {
            // if(muscle.Parent == null)
            //  continue;
            if (muscle.ConfigurableJoint.angularXMotion != ConfigurableJointMotion.Locked)
            {
                muscle.TargetNormalizedRotationX = vectorAction[i++];
            }
            if (muscle.ConfigurableJoint.angularYMotion != ConfigurableJointMotion.Locked)
            {
                muscle.TargetNormalizedRotationY = vectorAction[i++];
            }
            if (muscle.ConfigurableJoint.angularZMotion != ConfigurableJointMotion.Locked)
            {
                muscle.TargetNormalizedRotationZ = vectorAction[i++];
            }
        }
        float effort         = GetEffort();
        var   effortPenality = 0.05f * (float)effort;

        // var poseReward = 1f - _master.RotationDistance;
        // var velocityReward = 1f - Mathf.Abs(_master.VelocityDistance);
        // var endEffectorReward = 1f - _master.EndEffectorDistance;
        // // var feetPoseReward = 1f - _master.FeetRotationDistance;
        // var centerMassReward = 1f - _master.CenterOfMassDistance;
        // var sensorReward = 1f - _master.SensorDistance;

        var rotationDistanceScale     = (float)_master.BodyParts.Count;
        var velocityDistanceScale     = 3f;
        var endEffectorDistanceScale  = 8f;
        var centerOfMassDistancScalee = 5f;
        var sensorDistanceScale       = 1f;
        var rotationDistance          = _master.RotationDistance;
        var velocityDistance          = Mathf.Abs(_master.VelocityDistance);
        var endEffectorDistance       = _master.EndEffectorDistance;
        var centerOfMassDistance      = _master.CenterOfMassDistance;
        var sensorDistance            = _master.SensorDistance;

        rotationDistance     = Mathf.Clamp(rotationDistance, 0f, rotationDistanceScale);
        velocityDistance     = Mathf.Clamp(velocityDistance, 0f, velocityDistanceScale);
        endEffectorDistance  = Mathf.Clamp(endEffectorDistance, 0f, endEffectorDistanceScale);
        centerOfMassDistance = Mathf.Clamp(centerOfMassDistance, 0f, centerOfMassDistancScalee);
        sensorDistance       = Mathf.Clamp(sensorDistance, 0f, sensorDistanceScale);

        var rotationReward    = (rotationDistanceScale - rotationDistance) / rotationDistanceScale;
        var velocityReward    = (velocityDistanceScale - velocityDistance) / velocityDistanceScale;
        var endEffectorReward = (endEffectorDistanceScale - endEffectorDistance) / endEffectorDistanceScale;
        var centerMassReward  = (centerOfMassDistancScalee - centerOfMassDistance) / centerOfMassDistancScalee;
        var sensorReward      = (sensorDistanceScale - sensorDistance) / sensorDistanceScale;

        rotationReward    = Mathf.Pow(rotationReward, rotationDistanceScale);
        velocityReward    = Mathf.Pow(velocityReward, velocityDistanceScale);
        endEffectorReward = Mathf.Pow(endEffectorReward, endEffectorDistanceScale);
        centerMassReward  = Mathf.Pow(centerMassReward, centerOfMassDistancScalee);
        sensorReward      = Mathf.Pow(sensorReward, sensorDistanceScale);

        float rotationRewardScale    = .65f * .9f;
        float velocityRewardScale    = .1f * .9f;
        float endEffectorRewardScale = .15f * .9f;
        float centerMassRewardScale  = .1f * .9f;
        float sensorRewardScale      = .1f * .9f;

        // float poseRewardScale = .65f;
        // float velocityRewardScale = .1f;
        // float endEffectorRewardScale = .15f;
        // // float feetRewardScale = .15f;
        // float centerMassRewardScale = .1f;
        // float sensorRewardScale = .1f;

        // poseReward = Mathf.Clamp(poseReward, -1f, 1f);
        // velocityReward = Mathf.Clamp(velocityReward, -1f, 1f);
        // endEffectorReward = Mathf.Clamp(endEffectorReward, -1f, 1f);
        // centerMassReward = Mathf.Clamp(centerMassReward, -1f, 1f);
        // feetPoseReward = Mathf.Clamp(feetPoseReward, -1f, 1f);
        // sensorReward = Mathf.Clamp(sensorReward, -1f, 1f);
        var jointsNotAtLimitReward      = 1f - JointsAtLimit();
        var jointsNotAtLimitRewardScale = .09f;


        float distanceReward =
            (rotationReward * rotationRewardScale) +
            (velocityReward * velocityRewardScale) +
            (endEffectorReward * endEffectorRewardScale) +
            // (feetPoseReward * feetRewardScale) +
            (centerMassReward * centerMassRewardScale) +
            (sensorReward * sensorRewardScale);
        float reward =
            distanceReward
            // - effortPenality +
            + (jointsNotAtLimitReward * jointsNotAtLimitRewardScale);

        // HACK _startCount used as Monitor does not like reset
        if (ShowMonitor && _startCount < 2)
        {
            // Monitor.Log("start frame hist", Rewards.ToArray());
            var hist = new [] {
                reward,
                distanceReward,
                (jointsNotAtLimitReward * jointsNotAtLimitRewardScale),
                // - effortPenality,
                (rotationReward * rotationRewardScale),
                (velocityReward * velocityRewardScale),
                (endEffectorReward * endEffectorRewardScale),
                // (feetPoseReward * feetRewardScale),
                (centerMassReward * centerMassRewardScale),
                (sensorReward * sensorRewardScale),
            }.ToList();
            Monitor.Log("rewardHist", hist.ToArray());
        }

        if (!_master.IgnorRewardUntilObservation)
        {
            AddReward(reward);
        }
        // if (distanceReward < 0.18f && _master.IsInferenceMode == false)
        // if (distanceReward < 0.334f && _master.IsInferenceMode == false)
        // if (distanceReward < 0.25f && _master.IsInferenceMode == false)
        // if (_trainerAgent.ShouldAgentTerminate(distanceReward) && _master.IsInferenceMode == false)
        // Done();
        // if (GetStepCount() >= 50 && _master.IsInferenceMode == false)
        if (distanceReward < 0.334f && _master.IsInferenceMode == false)
        {
            Done();
        }
        if (!IsDone())
        {
            // // if (distanceReward < _master.ErrorCutoff && !_master.DebugShowWithOffset) {
            // if (shouldTerminate && !_master.DebugShowWithOffset) {
            //  AddReward(-10f);
            //  Done();
            //  // _master.StartAnimationIndex = _muscleAnimator.AnimationSteps.Count-1;
            //  if (_master.StartAnimationIndex < _styleAnimator.AnimationSteps.Count-1)
            //      _master.StartAnimationIndex++;
            // }
            if (_master.IsDone())
            {
                // AddReward(1f*(float)this.GetStepCount());
                // AddReward(10f);
                Done();
                // if (_master.StartAnimationIndex > 0 && distanceReward >= _master.ErrorCutoff)
                // if (_master.StartAnimationIndex > 0 && !shouldTerminate)
                if (_master.StartAnimationIndex > 0)
                {
                    _master.StartAnimationIndex--;
                }
            }
        }
        FrameReward = reward;
        var stepCount = GetStepCount() > 0 ? GetStepCount() : 1;

        AverageReward = GetCumulativeReward() / (float)stepCount;
    }
コード例 #2
0
    // A method that applies the vectorAction to the muscles, and calculates the rewards.
    public override void AgentAction(float[] vectorAction)
    {
        if (!_hasLazyInitialized)
        {
            return;
        }
        _isDone = false;
        if (_styleAnimator == _localStyleAnimator)
        {
            _styleAnimator.OnAgentAction();
        }
        _master.OnAgentAction();
        int i = 0;

        foreach (var muscle in _master.Muscles)
        {
            if (muscle.ConfigurableJoint.angularXMotion != ConfigurableJointMotion.Locked)
            {
                muscle.TargetNormalizedRotationX = vectorAction[i++];
            }
            if (muscle.ConfigurableJoint.angularYMotion != ConfigurableJointMotion.Locked)
            {
                muscle.TargetNormalizedRotationY = vectorAction[i++];
            }
            if (muscle.ConfigurableJoint.angularZMotion != ConfigurableJointMotion.Locked)
            {
                muscle.TargetNormalizedRotationZ = vectorAction[i++];
            }
        }

        // the scaler factors are picked empirically by calculating the MaxRotationDistance, MaxVelocityDistance achieved for an untrained agent.
        var rotationDistance                  = _master.RotationDistance / 16f;
        var centerOfMassvelocityDistance      = _master.CenterOfMassVelocityDistance / 6f;
        var endEffectorDistance               = _master.EndEffectorDistance / 1f;
        var endEffectorVelocityDistance       = _master.EndEffectorVelocityDistance / 170f;
        var jointAngularVelocityDistance      = _master.JointAngularVelocityDistance / 7000f;
        var jointAngularVelocityDistanceWorld = _master.JointAngularVelocityDistanceWorld / 7000f;
        var centerOfMassDistance              = _master.CenterOfMassDistance / 0.3f;
        var angularMomentDistance             = _master.AngularMomentDistance / 150.0f;
        var sensorDistance = _master.SensorDistance / 1f;

        var rotationReward                  = 0.35f * Mathf.Exp(-rotationDistance);
        var centerOfMassVelocityReward      = 0.1f * Mathf.Exp(-centerOfMassvelocityDistance);
        var endEffectorReward               = 0.15f * Mathf.Exp(-endEffectorDistance);
        var endEffectorVelocityReward       = 0.1f * Mathf.Exp(-endEffectorVelocityDistance);
        var jointAngularVelocityReward      = 0.1f * Mathf.Exp(-jointAngularVelocityDistance);
        var jointAngularVelocityRewardWorld = 0.0f * Mathf.Exp(-jointAngularVelocityDistanceWorld);
        var centerMassReward                = 0.05f * Mathf.Exp(-centerOfMassDistance);
        var angularMomentReward             = 0.15f * Mathf.Exp(-angularMomentDistance);
        var sensorReward           = 0.0f * Mathf.Exp(-sensorDistance);
        var jointsNotAtLimitReward = 0.0f * Mathf.Exp(-JointsAtLimit());

        //Debug.Log("---------------");
        //Debug.Log("rotation reward: " + rotationReward);
        //Debug.Log("endEffectorReward: " + endEffectorReward);
        //Debug.Log("endEffectorVelocityReward: " + endEffectorVelocityReward);
        //Debug.Log("jointAngularVelocityReward: " + jointAngularVelocityReward);
        //Debug.Log("jointAngularVelocityRewardWorld: " + jointAngularVelocityRewardWorld);
        //Debug.Log("centerMassReward: " + centerMassReward);
        //Debug.Log("centerMassVelocityReward: " + centerOfMassVelocityReward);
        //Debug.Log("angularMomentReward: " + angularMomentReward);
        //Debug.Log("sensorReward: " + sensorReward);
        //Debug.Log("joints not at limit rewards:" + jointsNotAtLimitReward);

        float reward = rotationReward +
                       centerOfMassVelocityReward +
                       endEffectorReward +
                       endEffectorVelocityReward +
                       jointAngularVelocityReward +
                       jointAngularVelocityRewardWorld +
                       centerMassReward +
                       angularMomentReward +
                       sensorReward +
                       jointsNotAtLimitReward;

        if (!_master.IgnorRewardUntilObservation)
        {
            AddReward(reward);
        }

        if (reward < 0.5)
        {
            Done();
        }

        if (!_isDone)
        {
            if (_master.IsDone())
            {
                Done();
                if (_master.StartAnimationIndex > 0)
                {
                    _master.StartAnimationIndex--;
                }
            }
        }
        FrameReward = reward;
        var stepCount = GetStepCount() > 0 ? GetStepCount() : 1;

        AverageReward = GetCumulativeReward() / (float)stepCount;
    }