예제 #1
0
 public CartPoleState(CartPoleState s)
 {
     m_dfX        = s.m_dfX;
     m_dfXDot     = s.m_dfXDot;
     m_dfTheta    = s.m_dfTheta;
     m_dfThetaDot = s.m_dfThetaDot;
     m_dfForceMag = s.m_dfForceMag;
     m_nSteps     = s.m_nSteps;
 }
예제 #2
0
        /// <summary>
        /// Reset the state of the gym.
        /// </summary>
        /// <param name="bGetLabel">Not used.</param>
        /// <returns>A tuple containing state data, the reward, and the done state is returned.</returns>
        public Tuple <State, double, bool> Reset(bool bGetLabel)
        {
            double dfX        = randomUniform(-0.05, 0.05);
            double dfXDot     = randomUniform(-0.05, 0.05);
            double dfTheta    = randomUniform(-0.05, 0.05);
            double dfThetaDot = randomUniform(-0.05, 0.05);

            m_nStepsBeyondDone = null;
            m_nSteps           = 0;

            m_state = new CartPoleState(dfX, dfXDot, dfTheta, dfThetaDot);
            return(new Tuple <State, double, bool>(m_state.Clone(), 1, false));
        }
예제 #3
0
        /// <summary>
        /// Step the gym one step in its simulation.
        /// </summary>
        /// <param name="nAction">Specifies the action to run on the gym.</param>
        /// <param name="bGetLabel">Not used.</param>
        /// <param name="propExtra">Optionally, specifies extra parameters.</param>
        /// <returns>A tuple containing state data, the reward, and the done state is returned.</returns>
        public Tuple <State, double, bool> Step(int nAction, bool bGetLabel, PropertySet propExtra = null)
        {
            CartPoleState state    = new CartPoleState(m_state);
            double        dfReward = 0;

            processAction((ACTION)nAction);

            double dfX        = state.X;
            double dfXDot     = state.XDot;
            double dfTheta    = state.Theta;
            double dfThetaDot = state.ThetaDot;
            double dfForce    = m_state.ForceMag;
            double dfCosTheta = Math.Cos(dfTheta);
            double dfSinTheta = Math.Sin(dfTheta);
            double dfTemp     = (dfForce + m_dfPoleMassLength * dfThetaDot * dfThetaDot * dfSinTheta) / m_dfTotalMass;
            double dfThetaAcc = (m_dfGravity * dfSinTheta - dfCosTheta * dfTemp) / (m_dfLength * ((4.0 / 3.0) - m_dfMassPole * dfCosTheta * dfCosTheta / m_dfTotalMass));
            double dfXAcc     = dfTemp - m_dfPoleMassLength * dfThetaAcc * dfCosTheta / m_dfTotalMass;

            dfX        += m_dfTau * dfXDot;
            dfXDot     += m_dfTau * dfXAcc;
            dfTheta    += m_dfTau * dfThetaDot;
            dfThetaDot += m_dfTau * dfThetaAcc;

            CartPoleState stateOut = m_state;

            m_state = new CartPoleState(dfX, dfXDot, dfTheta, dfThetaDot);

            bool bDone = false;

            if (dfX < -m_dfXThreshold || dfX > m_dfXThreshold ||
                dfTheta < -m_dfThetaThreshold || dfTheta > m_dfThetaThreshold)
            {
                bDone = true;
            }

            if (!bDone)
            {
                dfReward = 1.0;
            }
            else if (!m_nStepsBeyondDone.HasValue)
            {
                // Pole just fell!
                m_nStepsBeyondDone = 0;
                dfReward           = 1.0;
            }
            else
            {
                if (m_nStepsBeyondDone.Value == 0)
                {
                    m_log.WriteLine("WARNING: You are calling 'step()' even though this environment has already returned done = True.  You should always call 'reset()'");
                }

                m_nStepsBeyondDone++;
                dfReward = 0.0;
            }

            m_nSteps++;
            m_nMaxSteps = Math.Max(m_nMaxSteps, m_nSteps);

            stateOut.Steps = m_nSteps;
            return(new Tuple <State, double, bool>(stateOut.Clone(), dfReward, bDone));
        }