/// <summary> /// Initializes the agent. Can be safely called multiple times. /// </summary> public void LazyInitialize() { if (m_Initialized) { return; } m_Initialized = true; // Grab the "static" properties for the Agent. m_EpisodeId = EpisodeIdCounter.GetEpisodeId(); m_PolicyFactory = GetComponent <BehaviorParameters>(); m_Info = new AgentInfo(); m_Action = new AgentAction(); sensors = new List <ISensor>(); Academy.Instance.AgentIncrementStep += AgentIncrementStep; Academy.Instance.AgentSendState += SendInfo; Academy.Instance.DecideAction += DecideAction; Academy.Instance.AgentAct += AgentStep; Academy.Instance.AgentForceReset += _AgentReset; m_Brain = m_PolicyFactory.GeneratePolicy(Heuristic); ResetData(); Initialize(); InitializeSensors(); // The first time the Academy resets, all Agents in the scene will be // forced to reset through the <see cref="AgentForceReset"/> event. // To avoid the Agent resetting twice, the Agents will not begin their // episode when initializing until after the Academy had its first reset. if (Academy.Instance.TotalStepCount != 0) { OnEpisodeBegin(); } }
void NotifyAgentDone(DoneReason doneReason) { m_Info.episodeId = m_EpisodeId; m_Info.reward = m_Reward; m_Info.done = true; m_Info.maxStepReached = doneReason == DoneReason.MaxStepReached; // Request the last decision with no callbacks // We request a decision so Python knows the Agent is done immediately m_Brain?.RequestDecision(m_Info, sensors); // We also have to write any to any DemonstationStores so that they get the "done" flag. foreach (var demoWriter in DemonstrationWriters) { demoWriter.Record(m_Info, sensors); } if (doneReason != DoneReason.Disabled) { // We don't want to udpate the reward stats when the Agent is disabled, because this will make // the rewards look lower than they actually are during shutdown. UpdateRewardStats(); } // The Agent is done, so we give it a new episode Id m_EpisodeId = EpisodeIdCounter.GetEpisodeId(); m_Reward = 0f; m_CumulativeReward = 0f; m_RequestAction = false; m_RequestDecision = false; }
/// <summary> /// Initializes the agent. Can be safely called multiple times. /// </summary> public void LazyInitialize() { if (m_Initialized) { return; } m_Initialized = true; // Grab the "static" properties for the Agent. m_EpisodeId = EpisodeIdCounter.GetEpisodeId(); m_PolicyFactory = GetComponent <BehaviorParameters>(); m_Info = new AgentInfo(); m_Action = new AgentAction(); sensors = new List <ISensor>(); Academy.Instance.AgentIncrementStep += AgentIncrementStep; Academy.Instance.AgentSendState += SendInfo; Academy.Instance.DecideAction += DecideAction; Academy.Instance.AgentAct += AgentStep; Academy.Instance.AgentForceReset += _AgentReset; m_Brain = m_PolicyFactory.GeneratePolicy(Heuristic); ResetData(); InitializeAgent(); InitializeSensors(); }
/// MonoBehaviour function that is called when the attached GameObject /// becomes enabled or active. void OnEnable() { m_EpisodeId = EpisodeIdCounter.GetEpisodeId(); OnEnableHelper(); m_Recorder = GetComponent <DemonstrationRecorder>(); }
void NotifyAgentDone(bool maxStepReached = false) { m_Info.reward = m_Reward; m_Info.done = true; m_Info.maxStepReached = maxStepReached; // Request the last decision with no callbacks // We request a decision so Python knows the Agent is done immediately m_Brain?.RequestDecision(m_Info, sensors, (a) => {}); // The Agent is done, so we give it a new episode Id m_EpisodeId = EpisodeIdCounter.GetEpisodeId(); m_Reward = 0f; m_CumulativeReward = 0f; m_RequestAction = false; m_RequestDecision = false; }
void NotifyAgentDone(bool maxStepReached = false) { m_Info.reward = m_Reward; m_Info.done = true; m_Info.maxStepReached = maxStepReached; // Request the last decision with no callbacks // We request a decision so Python knows the Agent is done immediately m_Brain?.RequestDecision(m_Info, sensors); if (m_Recorder != null && m_Recorder.record && Application.isEditor) { m_Recorder.WriteExperience(m_Info, sensors); } UpdateRewardStats(); // The Agent is done, so we give it a new episode Id m_EpisodeId = EpisodeIdCounter.GetEpisodeId(); m_Reward = 0f; m_CumulativeReward = 0f; m_RequestAction = false; m_RequestDecision = false; }
void NotifyAgentDone(bool maxStepReached = false) { m_Info.reward = m_Reward; m_Info.done = true; m_Info.maxStepReached = maxStepReached; // Request the last decision with no callbacks // We request a decision so Python knows the Agent is done immediately m_Brain?.RequestDecision(m_Info, sensors); // We also have to write any to any DemonstationStores so that they get the "done" flag. foreach (var demoWriter in DemonstrationWriters) { demoWriter.Record(m_Info, sensors); } UpdateRewardStats(); // The Agent is done, so we give it a new episode Id m_EpisodeId = EpisodeIdCounter.GetEpisodeId(); m_Reward = 0f; m_CumulativeReward = 0f; m_RequestAction = false; m_RequestDecision = false; }