예제 #1
0
        public override Step Step(int action)
        {
            Debug.Assert(ActionSpace.Contains(action), $"{action} ({action.GetType().Name}) invalid action for {GetType().Name} environment");
            //get the last step data
            var x         = state.GetDouble(0);
            var x_dot     = state.GetDouble(1);
            var theta     = state.GetDouble(2);
            var theta_dot = state.GetDouble(3);

            var force    = action == 1 ? force_mag : -force_mag;
            var costheta = Math.Cos(theta);
            var sintheta = Math.Sin(theta);
            var temp     = (force + polemass_length * theta_dot * theta_dot * sintheta) / total_mass;
            var thetaacc = (gravity * sintheta - costheta * temp) / (length * (4.0 / 3.0 - masspole * costheta * costheta / total_mass));
            var xacc     = temp - polemass_length * thetaacc * costheta / total_mass;

            // ReSharper disable once ConditionIsAlwaysTrueOrFalse
            if (kinematics_integrator == "euler")
            {
                x         = x + tau * x_dot;
                x_dot     = x_dot + tau * xacc;
                theta     = theta + tau * theta_dot;
                theta_dot = theta_dot + tau * thetaacc;
            }
            else
            {
                // semi-implicit euler
                x_dot     = x_dot + tau * xacc;
                x         = x + tau * x_dot;
                theta_dot = theta_dot + tau * thetaacc;
                theta     = theta + tau * theta_dot;
            }

            state = np.array(x, x_dot, theta, theta_dot);
            var   done = x <-x_threshold || x> x_threshold || theta <-theta_threshold_radians || theta> theta_threshold_radians;
            float reward;

            if (!done)
            {
                reward = 1.0f;
            }
            else if (steps_beyond_done == -1)
            {
                // Pole just fell!
                steps_beyond_done = 0;
                reward            = 1.0f;
            }
            else
            {
                if (steps_beyond_done == 0)
                {
                    Console.WriteLine("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.");
                    //todo logging: logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.");
                }

                steps_beyond_done += 1;
                reward             = 0.0f;
            }

            return(new Step(state, reward, done, null));
        }
예제 #2
0
 public override Tensor GetNextAction(Tensor state)
 {
     return(ActionSpace.Sample());
 }