public override Step Step(int action) { Debug.Assert(ActionSpace.Contains(action), $"{action} ({action.GetType().Name}) invalid action for {GetType().Name} environment"); //get the last step data var x = state.GetDouble(0); var x_dot = state.GetDouble(1); var theta = state.GetDouble(2); var theta_dot = state.GetDouble(3); var force = action == 1 ? force_mag : -force_mag; var costheta = Math.Cos(theta); var sintheta = Math.Sin(theta); var temp = (force + polemass_length * theta_dot * theta_dot * sintheta) / total_mass; var thetaacc = (gravity * sintheta - costheta * temp) / (length * (4.0 / 3.0 - masspole * costheta * costheta / total_mass)); var xacc = temp - polemass_length * thetaacc * costheta / total_mass; // ReSharper disable once ConditionIsAlwaysTrueOrFalse if (kinematics_integrator == "euler") { x = x + tau * x_dot; x_dot = x_dot + tau * xacc; theta = theta + tau * theta_dot; theta_dot = theta_dot + tau * thetaacc; } else { // semi-implicit euler x_dot = x_dot + tau * xacc; x = x + tau * x_dot; theta_dot = theta_dot + tau * thetaacc; theta = theta + tau * theta_dot; } state = np.array(x, x_dot, theta, theta_dot); var done = x <-x_threshold || x> x_threshold || theta <-theta_threshold_radians || theta> theta_threshold_radians; float reward; if (!done) { reward = 1.0f; } else if (steps_beyond_done == -1) { // Pole just fell! steps_beyond_done = 0; reward = 1.0f; } else { if (steps_beyond_done == 0) { Console.WriteLine("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior."); //todo logging: logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior."); } steps_beyond_done += 1; reward = 0.0f; } return(new Step(state, reward, done, null)); }
public override Tensor GetNextAction(Tensor state) { return(ActionSpace.Sample()); }