/// <summary>
        /// Steps the gym one or more steps with a given action.
        /// </summary>
        /// <param name="nAction">Specifies the action to run.</param>
        /// <param name="nSteps">Specifies the number of steps to run the action.</param>
        /// <returns>A tuple containing a double[] with the data, a double with the reward and a bool with the terminal state is returned.</returns>
        public CurrentState Step(int nAction, int nSteps = 1)
        {
            if (m_igym == null)
            {
                throw new Exception("You must call 'Initialize' first!");
            }

            for (int i = 0; i < nSteps - 1; i++)
            {
                m_igym.Step(nAction);
            }

            Tuple <State, double, bool> state = m_igym.Step(nAction);

            bool bIsOpen = (m_nUiId >= 0) ? true : false;
            Tuple <Bitmap, SimpleDatum> data = m_igym.Render(bIsOpen, 512, 512, true);
            int         nDataLen             = 0;
            SimpleDatum sd  = state.Item1.GetData(false, out nDataLen);
            Observation obs = new Observation(data.Item1, ImageData.GetImage(data.Item2), m_igym.RequiresDisplayImage, sd.GetData <double>(), state.Item2, state.Item3);

            if (bIsOpen)
            {
                if (m_rgrgActionDistributions != null)
                {
                    overlay(obs.ImageDisplay, m_rgrgActionDistributions);
                }

                m_gymui.Render(m_nUiId, obs);
                Thread.Sleep(m_igym.UiDelay);
            }

            if (m_igym.SelectedDataType == DATA_TYPE.BLOB)
            {
                sd = data.Item2;
            }
            else
            {
                sd.Clip(nDataLen, null, nDataLen, null);
            }

            m_state = new Tuple <SimpleDatum, double, bool>(sd, state.Item2, state.Item3);

            return(new CurrentState(m_state.Item1.GetData <double>(), m_state.Item2, m_state.Item3));
        }
Пример #2
0
        protected override bool getData(GetDataArgs e)
        {
            Tuple <State, double, bool> state = null;

            if (e.Reset)
            {
                if (m_firststate != null)
                {
                    state        = m_firststate;
                    m_firststate = null;
                }
                else
                {
                    state = m_igym.Reset();
                }
            }

            if (e.Action >= 0)
            {
                state = m_igym.Step(e.Action);
            }

            bool        bIsOpen   = (m_nUiId >= 0) ? true : false;
            int         nDataLen  = 0;
            SimpleDatum stateData = state.Item1.GetData(false, out nDataLen);

            e.State         = new StateBase(m_igym.GetActionSpace().Count());
            e.State.Reward  = 0;
            e.State.Data    = stateData;
            e.State.Done    = state.Item3;
            e.State.IsValid = true;

            if (m_sw.Elapsed.TotalMilliseconds > 1000)
            {
                int    nMax       = (int)GetProperty("GlobalMaxIterations");
                int    nIteration = (int)GetProperty("GlobalIteration");
                double dfPct      = (nMax == 0) ? 0 : (double)nIteration / (double)nMax;
                e.OutputLog.Progress = dfPct;
                e.OutputLog.WriteLine("(" + dfPct.ToString("P") + ") Global Iteration #" + nIteration.ToString());
                m_sw.Restart();
            }

            return(true);
        }
Пример #3
0
        protected override bool getData(GetDataArgs e)
        {
            Tuple <State, double, bool> state = null;

            if (e.Reset)
            {
                state = m_igym.Reset();
            }

            if (e.Action >= 0)
            {
                state = m_igym.Step(e.Action);
            }

            bool bIsOpen = (m_nUiId >= 0) ? true : false;
            Tuple <Bitmap, SimpleDatum> data = m_igym.Render(bIsOpen, 512, 512, true);
            int         nDataLen             = 0;
            SimpleDatum stateData            = state.Item1.GetData(false, out nDataLen);
            Observation obs = new Observation(data.Item1, ImageData.GetImage(data.Item2), m_igym.RequiresDisplayImage, stateData.RealData, state.Item2, state.Item3);

            e.State         = new StateBase(m_igym.GetActionSpace().Count());
            e.State.Reward  = obs.Reward;
            e.State.Data    = data.Item2;
            e.State.Done    = obs.Done;
            e.State.IsValid = true;

            if (m_gymui != null && m_nUiId >= 0)
            {
                m_gymui.Render(m_nUiId, obs);
                Thread.Sleep(m_igym.UiDelay);
            }

            if (m_sw.Elapsed.TotalMilliseconds > 1000)
            {
                double dfPct = (GlobalEpisodeMax == 0) ? 0 : (double)GlobalEpisodeCount / (double)GlobalEpisodeMax;
                e.OutputLog.Progress = dfPct;
                e.OutputLog.WriteLine("(" + dfPct.ToString("P") + ") Global Episode #" + GlobalEpisodeCount.ToString() + "  Global Reward = " + GlobalRewards.ToString() + " Exploration Rate = " + ExplorationRate.ToString("P") + " Optimal Selection Rate = " + OptimalSelectionRate.ToString("P"));
                m_sw.Restart();
            }

            return(true);
        }