public override void AgentAction(float[] vectorAction, string textAction) { if (_styleAnimator == _localStyleAnimator) { _styleAnimator.OnAgentAction(); } _master.OnAgentAction(); int i = 0; foreach (var muscle in _master.Muscles) { // if(muscle.Parent == null) // continue; if (muscle.ConfigurableJoint.angularXMotion != ConfigurableJointMotion.Locked) { muscle.TargetNormalizedRotationX = vectorAction[i++]; } if (muscle.ConfigurableJoint.angularYMotion != ConfigurableJointMotion.Locked) { muscle.TargetNormalizedRotationY = vectorAction[i++]; } if (muscle.ConfigurableJoint.angularZMotion != ConfigurableJointMotion.Locked) { muscle.TargetNormalizedRotationZ = vectorAction[i++]; } } float effort = GetEffort(); var effortPenality = 0.05f * (float)effort; // var poseReward = 1f - _master.RotationDistance; // var velocityReward = 1f - Mathf.Abs(_master.VelocityDistance); // var endEffectorReward = 1f - _master.EndEffectorDistance; // // var feetPoseReward = 1f - _master.FeetRotationDistance; // var centerMassReward = 1f - _master.CenterOfMassDistance; // var sensorReward = 1f - _master.SensorDistance; var rotationDistanceScale = (float)_master.BodyParts.Count; var velocityDistanceScale = 3f; var endEffectorDistanceScale = 8f; var centerOfMassDistancScalee = 5f; var sensorDistanceScale = 1f; var rotationDistance = _master.RotationDistance; var velocityDistance = Mathf.Abs(_master.VelocityDistance); var endEffectorDistance = _master.EndEffectorDistance; var centerOfMassDistance = _master.CenterOfMassDistance; var sensorDistance = _master.SensorDistance; rotationDistance = Mathf.Clamp(rotationDistance, 0f, rotationDistanceScale); velocityDistance = Mathf.Clamp(velocityDistance, 0f, velocityDistanceScale); endEffectorDistance = Mathf.Clamp(endEffectorDistance, 0f, endEffectorDistanceScale); centerOfMassDistance = Mathf.Clamp(centerOfMassDistance, 0f, centerOfMassDistancScalee); sensorDistance = Mathf.Clamp(sensorDistance, 0f, sensorDistanceScale); var rotationReward = (rotationDistanceScale - rotationDistance) / rotationDistanceScale; var velocityReward = (velocityDistanceScale - velocityDistance) / velocityDistanceScale; var endEffectorReward = (endEffectorDistanceScale - endEffectorDistance) / endEffectorDistanceScale; var centerMassReward = (centerOfMassDistancScalee - centerOfMassDistance) / centerOfMassDistancScalee; var sensorReward = (sensorDistanceScale - sensorDistance) / sensorDistanceScale; rotationReward = Mathf.Pow(rotationReward, rotationDistanceScale); velocityReward = Mathf.Pow(velocityReward, velocityDistanceScale); endEffectorReward = Mathf.Pow(endEffectorReward, endEffectorDistanceScale); centerMassReward = Mathf.Pow(centerMassReward, centerOfMassDistancScalee); sensorReward = Mathf.Pow(sensorReward, sensorDistanceScale); float rotationRewardScale = .65f * .9f; float velocityRewardScale = .1f * .9f; float endEffectorRewardScale = .15f * .9f; float centerMassRewardScale = .1f * .9f; float sensorRewardScale = .1f * .9f; // float poseRewardScale = .65f; // float velocityRewardScale = .1f; // float endEffectorRewardScale = .15f; // // float feetRewardScale = .15f; // float centerMassRewardScale = .1f; // float sensorRewardScale = .1f; // poseReward = Mathf.Clamp(poseReward, -1f, 1f); // velocityReward = Mathf.Clamp(velocityReward, -1f, 1f); // endEffectorReward = Mathf.Clamp(endEffectorReward, -1f, 1f); // centerMassReward = Mathf.Clamp(centerMassReward, -1f, 1f); // feetPoseReward = Mathf.Clamp(feetPoseReward, -1f, 1f); // sensorReward = Mathf.Clamp(sensorReward, -1f, 1f); var jointsNotAtLimitReward = 1f - JointsAtLimit(); var jointsNotAtLimitRewardScale = .09f; float distanceReward = (rotationReward * rotationRewardScale) + (velocityReward * velocityRewardScale) + (endEffectorReward * endEffectorRewardScale) + // (feetPoseReward * feetRewardScale) + (centerMassReward * centerMassRewardScale) + (sensorReward * sensorRewardScale); float reward = distanceReward // - effortPenality + + (jointsNotAtLimitReward * jointsNotAtLimitRewardScale); // HACK _startCount used as Monitor does not like reset if (ShowMonitor && _startCount < 2) { // Monitor.Log("start frame hist", Rewards.ToArray()); var hist = new [] { reward, distanceReward, (jointsNotAtLimitReward * jointsNotAtLimitRewardScale), // - effortPenality, (rotationReward * rotationRewardScale), (velocityReward * velocityRewardScale), (endEffectorReward * endEffectorRewardScale), // (feetPoseReward * feetRewardScale), (centerMassReward * centerMassRewardScale), (sensorReward * sensorRewardScale), }.ToList(); Monitor.Log("rewardHist", hist.ToArray()); } if (!_master.IgnorRewardUntilObservation) { AddReward(reward); } // if (distanceReward < 0.18f && _master.IsInferenceMode == false) // if (distanceReward < 0.334f && _master.IsInferenceMode == false) // if (distanceReward < 0.25f && _master.IsInferenceMode == false) // if (_trainerAgent.ShouldAgentTerminate(distanceReward) && _master.IsInferenceMode == false) // Done(); // if (GetStepCount() >= 50 && _master.IsInferenceMode == false) if (distanceReward < 0.334f && _master.IsInferenceMode == false) { Done(); } if (!IsDone()) { // // if (distanceReward < _master.ErrorCutoff && !_master.DebugShowWithOffset) { // if (shouldTerminate && !_master.DebugShowWithOffset) { // AddReward(-10f); // Done(); // // _master.StartAnimationIndex = _muscleAnimator.AnimationSteps.Count-1; // if (_master.StartAnimationIndex < _styleAnimator.AnimationSteps.Count-1) // _master.StartAnimationIndex++; // } if (_master.IsDone()) { // AddReward(1f*(float)this.GetStepCount()); // AddReward(10f); Done(); // if (_master.StartAnimationIndex > 0 && distanceReward >= _master.ErrorCutoff) // if (_master.StartAnimationIndex > 0 && !shouldTerminate) if (_master.StartAnimationIndex > 0) { _master.StartAnimationIndex--; } } } FrameReward = reward; var stepCount = GetStepCount() > 0 ? GetStepCount() : 1; AverageReward = GetCumulativeReward() / (float)stepCount; }
// A method that applies the vectorAction to the muscles, and calculates the rewards. public override void AgentAction(float[] vectorAction) { if (!_hasLazyInitialized) { return; } _isDone = false; if (_styleAnimator == _localStyleAnimator) { _styleAnimator.OnAgentAction(); } _master.OnAgentAction(); int i = 0; foreach (var muscle in _master.Muscles) { if (muscle.ConfigurableJoint.angularXMotion != ConfigurableJointMotion.Locked) { muscle.TargetNormalizedRotationX = vectorAction[i++]; } if (muscle.ConfigurableJoint.angularYMotion != ConfigurableJointMotion.Locked) { muscle.TargetNormalizedRotationY = vectorAction[i++]; } if (muscle.ConfigurableJoint.angularZMotion != ConfigurableJointMotion.Locked) { muscle.TargetNormalizedRotationZ = vectorAction[i++]; } } // the scaler factors are picked empirically by calculating the MaxRotationDistance, MaxVelocityDistance achieved for an untrained agent. var rotationDistance = _master.RotationDistance / 16f; var centerOfMassvelocityDistance = _master.CenterOfMassVelocityDistance / 6f; var endEffectorDistance = _master.EndEffectorDistance / 1f; var endEffectorVelocityDistance = _master.EndEffectorVelocityDistance / 170f; var jointAngularVelocityDistance = _master.JointAngularVelocityDistance / 7000f; var jointAngularVelocityDistanceWorld = _master.JointAngularVelocityDistanceWorld / 7000f; var centerOfMassDistance = _master.CenterOfMassDistance / 0.3f; var angularMomentDistance = _master.AngularMomentDistance / 150.0f; var sensorDistance = _master.SensorDistance / 1f; var rotationReward = 0.35f * Mathf.Exp(-rotationDistance); var centerOfMassVelocityReward = 0.1f * Mathf.Exp(-centerOfMassvelocityDistance); var endEffectorReward = 0.15f * Mathf.Exp(-endEffectorDistance); var endEffectorVelocityReward = 0.1f * Mathf.Exp(-endEffectorVelocityDistance); var jointAngularVelocityReward = 0.1f * Mathf.Exp(-jointAngularVelocityDistance); var jointAngularVelocityRewardWorld = 0.0f * Mathf.Exp(-jointAngularVelocityDistanceWorld); var centerMassReward = 0.05f * Mathf.Exp(-centerOfMassDistance); var angularMomentReward = 0.15f * Mathf.Exp(-angularMomentDistance); var sensorReward = 0.0f * Mathf.Exp(-sensorDistance); var jointsNotAtLimitReward = 0.0f * Mathf.Exp(-JointsAtLimit()); //Debug.Log("---------------"); //Debug.Log("rotation reward: " + rotationReward); //Debug.Log("endEffectorReward: " + endEffectorReward); //Debug.Log("endEffectorVelocityReward: " + endEffectorVelocityReward); //Debug.Log("jointAngularVelocityReward: " + jointAngularVelocityReward); //Debug.Log("jointAngularVelocityRewardWorld: " + jointAngularVelocityRewardWorld); //Debug.Log("centerMassReward: " + centerMassReward); //Debug.Log("centerMassVelocityReward: " + centerOfMassVelocityReward); //Debug.Log("angularMomentReward: " + angularMomentReward); //Debug.Log("sensorReward: " + sensorReward); //Debug.Log("joints not at limit rewards:" + jointsNotAtLimitReward); float reward = rotationReward + centerOfMassVelocityReward + endEffectorReward + endEffectorVelocityReward + jointAngularVelocityReward + jointAngularVelocityRewardWorld + centerMassReward + angularMomentReward + sensorReward + jointsNotAtLimitReward; if (!_master.IgnorRewardUntilObservation) { AddReward(reward); } if (reward < 0.5) { Done(); } if (!_isDone) { if (_master.IsDone()) { Done(); if (_master.StartAnimationIndex > 0) { _master.StartAnimationIndex--; } } } FrameReward = reward; var stepCount = GetStepCount() > 0 ? GetStepCount() : 1; AverageReward = GetCumulativeReward() / (float)stepCount; }