/// <summary> /// Steps the gym one or more steps with a given action. /// </summary> /// <param name="nAction">Specifies the action to run.</param> /// <param name="nSteps">Specifies the number of steps to run the action.</param> /// <returns>A tuple containing a double[] with the data, a double with the reward and a bool with the terminal state is returned.</returns> public CurrentState Step(int nAction, int nSteps = 1) { if (m_igym == null) { throw new Exception("You must call 'Initialize' first!"); } for (int i = 0; i < nSteps - 1; i++) { m_igym.Step(nAction); } Tuple <State, double, bool> state = m_igym.Step(nAction); bool bIsOpen = (m_nUiId >= 0) ? true : false; Tuple <Bitmap, SimpleDatum> data = m_igym.Render(bIsOpen, 512, 512, true); int nDataLen = 0; SimpleDatum sd = state.Item1.GetData(false, out nDataLen); Observation obs = new Observation(data.Item1, ImageData.GetImage(data.Item2), m_igym.RequiresDisplayImage, sd.GetData <double>(), state.Item2, state.Item3); if (bIsOpen) { if (m_rgrgActionDistributions != null) { overlay(obs.ImageDisplay, m_rgrgActionDistributions); } m_gymui.Render(m_nUiId, obs); Thread.Sleep(m_igym.UiDelay); } if (m_igym.SelectedDataType == DATA_TYPE.BLOB) { sd = data.Item2; } else { sd.Clip(nDataLen, null, nDataLen, null); } m_state = new Tuple <SimpleDatum, double, bool>(sd, state.Item2, state.Item3); return(new CurrentState(m_state.Item1.GetData <double>(), m_state.Item2, m_state.Item3)); }
protected override bool getData(GetDataArgs e) { Tuple <State, double, bool> state = null; if (e.Reset) { if (m_firststate != null) { state = m_firststate; m_firststate = null; } else { state = m_igym.Reset(); } } if (e.Action >= 0) { state = m_igym.Step(e.Action); } bool bIsOpen = (m_nUiId >= 0) ? true : false; int nDataLen = 0; SimpleDatum stateData = state.Item1.GetData(false, out nDataLen); e.State = new StateBase(m_igym.GetActionSpace().Count()); e.State.Reward = 0; e.State.Data = stateData; e.State.Done = state.Item3; e.State.IsValid = true; if (m_sw.Elapsed.TotalMilliseconds > 1000) { int nMax = (int)GetProperty("GlobalMaxIterations"); int nIteration = (int)GetProperty("GlobalIteration"); double dfPct = (nMax == 0) ? 0 : (double)nIteration / (double)nMax; e.OutputLog.Progress = dfPct; e.OutputLog.WriteLine("(" + dfPct.ToString("P") + ") Global Iteration #" + nIteration.ToString()); m_sw.Restart(); } return(true); }
protected override bool getData(GetDataArgs e) { Tuple <State, double, bool> state = null; if (e.Reset) { state = m_igym.Reset(); } if (e.Action >= 0) { state = m_igym.Step(e.Action); } bool bIsOpen = (m_nUiId >= 0) ? true : false; Tuple <Bitmap, SimpleDatum> data = m_igym.Render(bIsOpen, 512, 512, true); int nDataLen = 0; SimpleDatum stateData = state.Item1.GetData(false, out nDataLen); Observation obs = new Observation(data.Item1, ImageData.GetImage(data.Item2), m_igym.RequiresDisplayImage, stateData.RealData, state.Item2, state.Item3); e.State = new StateBase(m_igym.GetActionSpace().Count()); e.State.Reward = obs.Reward; e.State.Data = data.Item2; e.State.Done = obs.Done; e.State.IsValid = true; if (m_gymui != null && m_nUiId >= 0) { m_gymui.Render(m_nUiId, obs); Thread.Sleep(m_igym.UiDelay); } if (m_sw.Elapsed.TotalMilliseconds > 1000) { double dfPct = (GlobalEpisodeMax == 0) ? 0 : (double)GlobalEpisodeCount / (double)GlobalEpisodeMax; e.OutputLog.Progress = dfPct; e.OutputLog.WriteLine("(" + dfPct.ToString("P") + ") Global Episode #" + GlobalEpisodeCount.ToString() + " Global Reward = " + GlobalRewards.ToString() + " Exploration Rate = " + ExplorationRate.ToString("P") + " Optimal Selection Rate = " + OptimalSelectionRate.ToString("P")); m_sw.Restart(); } return(true); }