public CartPoleState(CartPoleState s) { m_dfX = s.m_dfX; m_dfXDot = s.m_dfXDot; m_dfTheta = s.m_dfTheta; m_dfThetaDot = s.m_dfThetaDot; m_dfForceMag = s.m_dfForceMag; m_nSteps = s.m_nSteps; }
/// <summary> /// Reset the state of the gym. /// </summary> /// <param name="bGetLabel">Not used.</param> /// <returns>A tuple containing state data, the reward, and the done state is returned.</returns> public Tuple <State, double, bool> Reset(bool bGetLabel) { double dfX = randomUniform(-0.05, 0.05); double dfXDot = randomUniform(-0.05, 0.05); double dfTheta = randomUniform(-0.05, 0.05); double dfThetaDot = randomUniform(-0.05, 0.05); m_nStepsBeyondDone = null; m_nSteps = 0; m_state = new CartPoleState(dfX, dfXDot, dfTheta, dfThetaDot); return(new Tuple <State, double, bool>(m_state.Clone(), 1, false)); }
/// <summary> /// Step the gym one step in its simulation. /// </summary> /// <param name="nAction">Specifies the action to run on the gym.</param> /// <param name="bGetLabel">Not used.</param> /// <param name="propExtra">Optionally, specifies extra parameters.</param> /// <returns>A tuple containing state data, the reward, and the done state is returned.</returns> public Tuple <State, double, bool> Step(int nAction, bool bGetLabel, PropertySet propExtra = null) { CartPoleState state = new CartPoleState(m_state); double dfReward = 0; processAction((ACTION)nAction); double dfX = state.X; double dfXDot = state.XDot; double dfTheta = state.Theta; double dfThetaDot = state.ThetaDot; double dfForce = m_state.ForceMag; double dfCosTheta = Math.Cos(dfTheta); double dfSinTheta = Math.Sin(dfTheta); double dfTemp = (dfForce + m_dfPoleMassLength * dfThetaDot * dfThetaDot * dfSinTheta) / m_dfTotalMass; double dfThetaAcc = (m_dfGravity * dfSinTheta - dfCosTheta * dfTemp) / (m_dfLength * ((4.0 / 3.0) - m_dfMassPole * dfCosTheta * dfCosTheta / m_dfTotalMass)); double dfXAcc = dfTemp - m_dfPoleMassLength * dfThetaAcc * dfCosTheta / m_dfTotalMass; dfX += m_dfTau * dfXDot; dfXDot += m_dfTau * dfXAcc; dfTheta += m_dfTau * dfThetaDot; dfThetaDot += m_dfTau * dfThetaAcc; CartPoleState stateOut = m_state; m_state = new CartPoleState(dfX, dfXDot, dfTheta, dfThetaDot); bool bDone = false; if (dfX < -m_dfXThreshold || dfX > m_dfXThreshold || dfTheta < -m_dfThetaThreshold || dfTheta > m_dfThetaThreshold) { bDone = true; } if (!bDone) { dfReward = 1.0; } else if (!m_nStepsBeyondDone.HasValue) { // Pole just fell! m_nStepsBeyondDone = 0; dfReward = 1.0; } else { if (m_nStepsBeyondDone.Value == 0) { m_log.WriteLine("WARNING: You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()'"); } m_nStepsBeyondDone++; dfReward = 0.0; } m_nSteps++; m_nMaxSteps = Math.Max(m_nMaxSteps, m_nSteps); stateOut.Steps = m_nSteps; return(new Tuple <State, double, bool>(stateOut.Clone(), dfReward, bDone)); }