void NotifyAgentDone(DoneReason doneReason) { m_Info.episodeId = m_EpisodeId; m_Info.reward = m_Reward; m_Info.done = true; m_Info.maxStepReached = doneReason == DoneReason.MaxStepReached; // Request the last decision with no callbacks // We request a decision so Python knows the Agent is done immediately m_Brain?.RequestDecision(m_Info, sensors); // We also have to write any to any DemonstationStores so that they get the "done" flag. foreach (var demoWriter in DemonstrationWriters) { demoWriter.Record(m_Info, sensors); } if (doneReason != DoneReason.Disabled) { // We don't want to udpate the reward stats when the Agent is disabled, because this will make // the rewards look lower than they actually are during shutdown. UpdateRewardStats(); } // The Agent is done, so we give it a new episode Id m_EpisodeId = EpisodeIdCounter.GetEpisodeId(); m_Reward = 0f; m_CumulativeReward = 0f; m_RequestAction = false; m_RequestDecision = false; }
/// <summary> /// Sends the Agent info to the linked Brain. /// </summary> void SendInfoToBrain() { if (m_Brain == null) { return; } m_Info.storedVectorActions = m_Action.vectorActions; m_ActionMasker.ResetMask(); UpdateSensors(); using (TimerStack.Instance.Scoped("CollectObservations")) { CollectObservations(); } m_Info.actionMasks = m_ActionMasker.GetMask(); m_Info.reward = m_Reward; m_Info.done = m_Done; m_Info.maxStepReached = m_MaxStepReached; m_Info.id = m_Id; m_Brain.RequestDecision(m_Info, sensors, UpdateAgentAction); if (m_Recorder != null && m_Recorder.record && Application.isEditor) { m_Recorder.WriteExperience(m_Info, sensors); } }
/// <summary> /// Sends the Agent info to the linked Brain. /// </summary> void SendInfoToBrain() { if (m_Brain == null) { return; } m_Info.storedVectorActions = m_Action.vectorActions; m_ActionMasker.ResetMask(); UpdateSensors(); using (TimerStack.Instance.Scoped("CollectObservations")) { CollectObservations(collectObservationsSensor, m_ActionMasker); } m_Info.actionMasks = m_ActionMasker.GetMask(); m_Info.reward = m_Reward; m_Info.done = false; m_Info.maxStepReached = false; m_Info.episodeId = m_EpisodeId; m_Brain.RequestDecision(m_Info, sensors); // If we have any DemonstrationWriters, write the AgentInfo and sensors to them. foreach (var demoWriter in DemonstrationWriters) { demoWriter.Record(m_Info, sensors); } }
void NotifyAgentDone() { m_Info.done = true; // Request the last decision with no callbacks // We request a decision so Python knows the Agent is disabled m_Brain?.RequestDecision(m_Info, sensors, (a) => { }); }
/// <summary> /// Sends the Agent info to the linked Brain. /// </summary> void SendInfoToBrain() { if (m_Brain == null) { return; } m_Info.memories = m_Action.memories; m_Info.storedVectorActions = m_Action.vectorActions; m_Info.storedTextActions = m_Action.textActions; m_Info.vectorObservation.Clear(); m_Info.compressedObservations.Clear(); m_ActionMasker.ResetMask(); using (TimerStack.Instance.Scoped("CollectObservations")) { CollectObservations(); } m_Info.actionMasks = m_ActionMasker.GetMask(); var param = m_PolicyFactory.brainParameters; if (m_Info.vectorObservation.Count != param.vectorObservationSize) { throw new UnityAgentsException(string.Format( "Vector Observation size mismatch in continuous " + "agent {0}. " + "Was Expecting {1} but received {2}. ", gameObject.name, param.vectorObservationSize, m_Info.vectorObservation.Count)); } Utilities.ShiftLeft(m_Info.stackedVectorObservation, param.vectorObservationSize); Utilities.ReplaceRange(m_Info.stackedVectorObservation, m_Info.vectorObservation, m_Info.stackedVectorObservation.Count - m_Info.vectorObservation.Count); m_Info.reward = m_Reward; m_Info.done = m_Done; m_Info.maxStepReached = m_MaxStepReached; m_Info.id = m_Id; m_Brain.RequestDecision(this); if (m_Recorder != null && m_Recorder.record && Application.isEditor) { // This is a bit of a hack - if we're in inference mode, compressed observations won't be generated // But we need these to be generated for the recorder. So generate them here. if (m_Info.compressedObservations.Count == 0) { GenerateSensorData(); } m_Recorder.WriteExperience(m_Info); } m_Info.textObservation = ""; }
/// <summary> /// Sends the Agent info to the linked Brain. /// </summary> void SendInfoToBrain() { if (!m_Initialized) { throw new UnityAgentsException("Call to SendInfoToBrain when Agent hasn't been initialized." + "Please ensure that you are calling 'base.OnEnable()' if you have overridden OnEnable."); } if (m_Brain == null) { return; } if (m_Info.done) { Array.Clear(m_Info.storedVectorActions, 0, m_Info.storedVectorActions.Length); } else { Array.Copy(m_Action.vectorActions, m_Info.storedVectorActions, m_Action.vectorActions.Length); } m_ActionMasker.ResetMask(); UpdateSensors(); using (TimerStack.Instance.Scoped("CollectObservations")) { CollectObservations(collectObservationsSensor); } using (TimerStack.Instance.Scoped("CollectDiscreteActionMasks")) { if (m_PolicyFactory.brainParameters.vectorActionSpaceType == SpaceType.Discrete) { CollectDiscreteActionMasks(m_ActionMasker); } } m_Info.discreteActionMasks = m_ActionMasker.GetMask(); m_Info.reward = m_Reward; m_Info.done = false; m_Info.maxStepReached = false; m_Info.episodeId = m_EpisodeId; m_Brain.RequestDecision(m_Info, sensors); // If we have any DemonstrationWriters, write the AgentInfo and sensors to them. foreach (var demoWriter in DemonstrationWriters) { demoWriter.Record(m_Info, sensors); } }
void NotifyAgentDone(bool maxStepReached = false) { m_Info.reward = m_Reward; m_Info.done = true; m_Info.maxStepReached = maxStepReached; // Request the last decision with no callbacks // We request a decision so Python knows the Agent is done immediately m_Brain?.RequestDecision(m_Info, sensors, (a) => {}); // The Agent is done, so we give it a new episode Id m_EpisodeId = EpisodeIdCounter.GetEpisodeId(); m_Reward = 0f; m_CumulativeReward = 0f; m_RequestAction = false; m_RequestDecision = false; }
void NotifyAgentDone(DoneReason doneReason) { if (m_Info.done) { // The Agent was already marked as Done and should not be notified again return; } m_Info.episodeId = m_EpisodeId; m_Info.reward = m_Reward; m_Info.done = true; m_Info.maxStepReached = doneReason == DoneReason.MaxStepReached; if (collectObservationsSensor != null) { // Make sure the latest observations are being passed to training. collectObservationsSensor.Reset(); CollectObservations(collectObservationsSensor); } // Request the last decision with no callbacks // We request a decision so Python knows the Agent is done immediately m_Brain?.RequestDecision(m_Info, sensors); ResetSensors(); // We also have to write any to any DemonstationStores so that they get the "done" flag. foreach (var demoWriter in DemonstrationWriters) { demoWriter.Record(m_Info, sensors); } if (doneReason != DoneReason.Disabled) { // We don't want to update the reward stats when the Agent is disabled, because this will make // the rewards look lower than they actually are during shutdown. m_CompletedEpisodes++; UpdateRewardStats(); } m_Reward = 0f; m_CumulativeReward = 0f; m_RequestAction = false; m_RequestDecision = false; Array.Clear(m_Info.storedVectorActions, 0, m_Info.storedVectorActions.Length); }
/// <summary> /// Sends the Agent info to the linked Brain. /// </summary> void SendInfoToBrain() { if (m_Brain == null) { return; } m_Info.storedVectorActions = m_Action.vectorActions; m_ActionMasker.ResetMask(); UpdateSensors(); using (TimerStack.Instance.Scoped("CollectObservations")) { CollectObservations(); } m_Info.actionMasks = m_ActionMasker.GetMask(); // var param = m_PolicyFactory.brainParameters; // look, no brain params! m_Info.reward = m_Reward; m_Info.done = m_Done; m_Info.maxStepReached = m_MaxStepReached; m_Info.id = m_Id; m_Brain.RequestDecision(m_Info, sensors, UpdateAgentAction); if (m_Recorder != null && m_Recorder.record && Application.isEditor) { if (m_VectorSensorBuffer == null) { // Create a buffer for writing uncompressed (i.e. float) sensor data to m_VectorSensorBuffer = new float[sensors.GetSensorFloatObservationSize()]; } // This is a bit of a hack - if we're in inference mode, observations won't be generated // But we need these to be generated for the recorder. So generate them here. var observations = new List <Observation>(); GenerateSensorData(sensors, m_VectorSensorBuffer, m_WriteAdapter, observations); m_Recorder.WriteExperience(m_Info, observations); } }
/// <summary> /// Sends the Agent info to the linked Brain. /// </summary> void SendInfoToBrain() { if (m_Brain == null) { return; } m_Info.storedVectorActions = m_Action.vectorActions; m_Info.observations.Clear(); m_ActionMasker.ResetMask(); UpdateSensors(); using (TimerStack.Instance.Scoped("CollectObservations")) { CollectObservations(); } m_Info.actionMasks = m_ActionMasker.GetMask(); // var param = m_PolicyFactory.brainParameters; // look, no brain params! m_Info.reward = m_Reward; m_Info.done = m_Done; m_Info.maxStepReached = m_MaxStepReached; m_Info.id = m_Id; m_Brain.RequestDecision(this); if (m_Recorder != null && m_Recorder.record && Application.isEditor) { // This is a bit of a hack - if we're in inference mode, observations won't be generated // But we need these to be generated for the recorder. So generate them here. if (m_Info.observations.Count == 0) { GenerateSensorData(); } m_Recorder.WriteExperience(m_Info); } }
void NotifyAgentDone(bool maxStepReached = false) { m_Info.reward = m_Reward; m_Info.done = true; m_Info.maxStepReached = maxStepReached; // Request the last decision with no callbacks // We request a decision so Python knows the Agent is done immediately m_Brain?.RequestDecision(m_Info, sensors); if (m_Recorder != null && m_Recorder.record && Application.isEditor) { m_Recorder.WriteExperience(m_Info, sensors); } UpdateRewardStats(); // The Agent is done, so we give it a new episode Id m_EpisodeId = EpisodeIdCounter.GetEpisodeId(); m_Reward = 0f; m_CumulativeReward = 0f; m_RequestAction = false; m_RequestDecision = false; }
void NotifyAgentDone(bool maxStepReached = false) { m_Info.reward = m_Reward; m_Info.done = true; m_Info.maxStepReached = maxStepReached; // Request the last decision with no callbacks // We request a decision so Python knows the Agent is done immediately m_Brain?.RequestDecision(m_Info, sensors); // We also have to write any to any DemonstationStores so that they get the "done" flag. foreach (var demoWriter in DemonstrationWriters) { demoWriter.Record(m_Info, sensors); } UpdateRewardStats(); // The Agent is done, so we give it a new episode Id m_EpisodeId = EpisodeIdCounter.GetEpisodeId(); m_Reward = 0f; m_CumulativeReward = 0f; m_RequestAction = false; m_RequestDecision = false; }