Esempio n. 1
0
        void NotifyAgentDone(DoneReason doneReason)
        {
            m_Info.episodeId      = m_EpisodeId;
            m_Info.reward         = m_Reward;
            m_Info.done           = true;
            m_Info.maxStepReached = doneReason == DoneReason.MaxStepReached;
            // Request the last decision with no callbacks
            // We request a decision so Python knows the Agent is done immediately
            m_Brain?.RequestDecision(m_Info, sensors);

            // We also have to write any to any DemonstationStores so that they get the "done" flag.
            foreach (var demoWriter in DemonstrationWriters)
            {
                demoWriter.Record(m_Info, sensors);
            }

            if (doneReason != DoneReason.Disabled)
            {
                // We don't want to udpate the reward stats when the Agent is disabled, because this will make
                // the rewards look lower than they actually are during shutdown.
                UpdateRewardStats();
            }

            // The Agent is done, so we give it a new episode Id
            m_EpisodeId        = EpisodeIdCounter.GetEpisodeId();
            m_Reward           = 0f;
            m_CumulativeReward = 0f;
            m_RequestAction    = false;
            m_RequestDecision  = false;
        }
Esempio n. 2
0
        /// <summary>
        /// Sends the Agent info to the linked Brain.
        /// </summary>
        void SendInfoToBrain()
        {
            if (m_Brain == null)
            {
                return;
            }

            m_Info.storedVectorActions = m_Action.vectorActions;
            m_ActionMasker.ResetMask();
            UpdateSensors();
            using (TimerStack.Instance.Scoped("CollectObservations"))
            {
                CollectObservations();
            }
            m_Info.actionMasks = m_ActionMasker.GetMask();

            m_Info.reward         = m_Reward;
            m_Info.done           = m_Done;
            m_Info.maxStepReached = m_MaxStepReached;
            m_Info.id             = m_Id;

            m_Brain.RequestDecision(m_Info, sensors, UpdateAgentAction);

            if (m_Recorder != null && m_Recorder.record && Application.isEditor)
            {
                m_Recorder.WriteExperience(m_Info, sensors);
            }
        }
Esempio n. 3
0
        /// <summary>
        /// Sends the Agent info to the linked Brain.
        /// </summary>
        void SendInfoToBrain()
        {
            if (m_Brain == null)
            {
                return;
            }

            m_Info.storedVectorActions = m_Action.vectorActions;
            m_ActionMasker.ResetMask();
            UpdateSensors();
            using (TimerStack.Instance.Scoped("CollectObservations"))
            {
                CollectObservations(collectObservationsSensor, m_ActionMasker);
            }
            m_Info.actionMasks = m_ActionMasker.GetMask();

            m_Info.reward         = m_Reward;
            m_Info.done           = false;
            m_Info.maxStepReached = false;
            m_Info.episodeId      = m_EpisodeId;

            m_Brain.RequestDecision(m_Info, sensors);

            // If we have any DemonstrationWriters, write the AgentInfo and sensors to them.
            foreach (var demoWriter in DemonstrationWriters)
            {
                demoWriter.Record(m_Info, sensors);
            }
        }
Esempio n. 4
0
 void NotifyAgentDone()
 {
     m_Info.done = true;
     // Request the last decision with no callbacks
     // We request a decision so Python knows the Agent is disabled
     m_Brain?.RequestDecision(m_Info, sensors, (a) => { });
 }
        /// <summary>
        /// Sends the Agent info to the linked Brain.
        /// </summary>
        void SendInfoToBrain()
        {
            if (m_Brain == null)
            {
                return;
            }

            m_Info.memories            = m_Action.memories;
            m_Info.storedVectorActions = m_Action.vectorActions;
            m_Info.storedTextActions   = m_Action.textActions;
            m_Info.vectorObservation.Clear();
            m_Info.compressedObservations.Clear();
            m_ActionMasker.ResetMask();
            using (TimerStack.Instance.Scoped("CollectObservations"))
            {
                CollectObservations();
            }
            m_Info.actionMasks = m_ActionMasker.GetMask();

            var param = m_PolicyFactory.brainParameters;

            if (m_Info.vectorObservation.Count != param.vectorObservationSize)
            {
                throw new UnityAgentsException(string.Format(
                                                   "Vector Observation size mismatch in continuous " +
                                                   "agent {0}. " +
                                                   "Was Expecting {1} but received {2}. ",
                                                   gameObject.name,
                                                   param.vectorObservationSize,
                                                   m_Info.vectorObservation.Count));
            }

            Utilities.ShiftLeft(m_Info.stackedVectorObservation, param.vectorObservationSize);
            Utilities.ReplaceRange(m_Info.stackedVectorObservation, m_Info.vectorObservation,
                                   m_Info.stackedVectorObservation.Count - m_Info.vectorObservation.Count);

            m_Info.reward         = m_Reward;
            m_Info.done           = m_Done;
            m_Info.maxStepReached = m_MaxStepReached;
            m_Info.id             = m_Id;

            m_Brain.RequestDecision(this);

            if (m_Recorder != null && m_Recorder.record && Application.isEditor)
            {
                // This is a bit of a hack - if we're in inference mode, compressed observations won't be generated
                // But we need these to be generated for the recorder. So generate them here.
                if (m_Info.compressedObservations.Count == 0)
                {
                    GenerateSensorData();
                }

                m_Recorder.WriteExperience(m_Info);
            }

            m_Info.textObservation = "";
        }
Esempio n. 6
0
        /// <summary>
        /// Sends the Agent info to the linked Brain.
        /// </summary>
        void SendInfoToBrain()
        {
            if (!m_Initialized)
            {
                throw new UnityAgentsException("Call to SendInfoToBrain when Agent hasn't been initialized." +
                                               "Please ensure that you are calling 'base.OnEnable()' if you have overridden OnEnable.");
            }

            if (m_Brain == null)
            {
                return;
            }

            if (m_Info.done)
            {
                Array.Clear(m_Info.storedVectorActions, 0, m_Info.storedVectorActions.Length);
            }
            else
            {
                Array.Copy(m_Action.vectorActions, m_Info.storedVectorActions, m_Action.vectorActions.Length);
            }
            m_ActionMasker.ResetMask();
            UpdateSensors();
            using (TimerStack.Instance.Scoped("CollectObservations"))
            {
                CollectObservations(collectObservationsSensor);
            }
            using (TimerStack.Instance.Scoped("CollectDiscreteActionMasks"))
            {
                if (m_PolicyFactory.brainParameters.vectorActionSpaceType == SpaceType.Discrete)
                {
                    CollectDiscreteActionMasks(m_ActionMasker);
                }
            }
            m_Info.discreteActionMasks = m_ActionMasker.GetMask();

            m_Info.reward         = m_Reward;
            m_Info.done           = false;
            m_Info.maxStepReached = false;
            m_Info.episodeId      = m_EpisodeId;

            m_Brain.RequestDecision(m_Info, sensors);

            // If we have any DemonstrationWriters, write the AgentInfo and sensors to them.
            foreach (var demoWriter in DemonstrationWriters)
            {
                demoWriter.Record(m_Info, sensors);
            }
        }
Esempio n. 7
0
 void NotifyAgentDone(bool maxStepReached = false)
 {
     m_Info.reward         = m_Reward;
     m_Info.done           = true;
     m_Info.maxStepReached = maxStepReached;
     // Request the last decision with no callbacks
     // We request a decision so Python knows the Agent is done immediately
     m_Brain?.RequestDecision(m_Info, sensors, (a) => {});
     // The Agent is done, so we give it a new episode Id
     m_EpisodeId        = EpisodeIdCounter.GetEpisodeId();
     m_Reward           = 0f;
     m_CumulativeReward = 0f;
     m_RequestAction    = false;
     m_RequestDecision  = false;
 }
Esempio n. 8
0
        void NotifyAgentDone(DoneReason doneReason)
        {
            if (m_Info.done)
            {
                // The Agent was already marked as Done and should not be notified again
                return;
            }
            m_Info.episodeId      = m_EpisodeId;
            m_Info.reward         = m_Reward;
            m_Info.done           = true;
            m_Info.maxStepReached = doneReason == DoneReason.MaxStepReached;
            if (collectObservationsSensor != null)
            {
                // Make sure the latest observations are being passed to training.
                collectObservationsSensor.Reset();
                CollectObservations(collectObservationsSensor);
            }
            // Request the last decision with no callbacks
            // We request a decision so Python knows the Agent is done immediately
            m_Brain?.RequestDecision(m_Info, sensors);
            ResetSensors();

            // We also have to write any to any DemonstationStores so that they get the "done" flag.
            foreach (var demoWriter in DemonstrationWriters)
            {
                demoWriter.Record(m_Info, sensors);
            }

            if (doneReason != DoneReason.Disabled)
            {
                // We don't want to update the reward stats when the Agent is disabled, because this will make
                // the rewards look lower than they actually are during shutdown.
                m_CompletedEpisodes++;
                UpdateRewardStats();
            }

            m_Reward           = 0f;
            m_CumulativeReward = 0f;
            m_RequestAction    = false;
            m_RequestDecision  = false;
            Array.Clear(m_Info.storedVectorActions, 0, m_Info.storedVectorActions.Length);
        }
Esempio n. 9
0
        /// <summary>
        /// Sends the Agent info to the linked Brain.
        /// </summary>
        void SendInfoToBrain()
        {
            if (m_Brain == null)
            {
                return;
            }

            m_Info.storedVectorActions = m_Action.vectorActions;
            m_ActionMasker.ResetMask();
            UpdateSensors();
            using (TimerStack.Instance.Scoped("CollectObservations"))
            {
                CollectObservations();
            }
            m_Info.actionMasks = m_ActionMasker.GetMask();

            // var param = m_PolicyFactory.brainParameters; // look, no brain params!

            m_Info.reward         = m_Reward;
            m_Info.done           = m_Done;
            m_Info.maxStepReached = m_MaxStepReached;
            m_Info.id             = m_Id;

            m_Brain.RequestDecision(m_Info, sensors, UpdateAgentAction);

            if (m_Recorder != null && m_Recorder.record && Application.isEditor)
            {
                if (m_VectorSensorBuffer == null)
                {
                    // Create a buffer for writing uncompressed (i.e. float) sensor data to
                    m_VectorSensorBuffer = new float[sensors.GetSensorFloatObservationSize()];
                }

                // This is a bit of a hack - if we're in inference mode, observations won't be generated
                // But we need these to be generated for the recorder. So generate them here.
                var observations = new List <Observation>();
                GenerateSensorData(sensors, m_VectorSensorBuffer, m_WriteAdapter, observations);

                m_Recorder.WriteExperience(m_Info, observations);
            }
        }
Esempio n. 10
0
        /// <summary>
        /// Sends the Agent info to the linked Brain.
        /// </summary>
        void SendInfoToBrain()
        {
            if (m_Brain == null)
            {
                return;
            }

            m_Info.storedVectorActions = m_Action.vectorActions;
            m_Info.observations.Clear();
            m_ActionMasker.ResetMask();
            UpdateSensors();
            using (TimerStack.Instance.Scoped("CollectObservations"))
            {
                CollectObservations();
            }
            m_Info.actionMasks = m_ActionMasker.GetMask();

            // var param = m_PolicyFactory.brainParameters; // look, no brain params!

            m_Info.reward         = m_Reward;
            m_Info.done           = m_Done;
            m_Info.maxStepReached = m_MaxStepReached;
            m_Info.id             = m_Id;

            m_Brain.RequestDecision(this);

            if (m_Recorder != null && m_Recorder.record && Application.isEditor)
            {
                // This is a bit of a hack - if we're in inference mode, observations won't be generated
                // But we need these to be generated for the recorder. So generate them here.
                if (m_Info.observations.Count == 0)
                {
                    GenerateSensorData();
                }

                m_Recorder.WriteExperience(m_Info);
            }
        }
Esempio n. 11
0
        void NotifyAgentDone(bool maxStepReached = false)
        {
            m_Info.reward         = m_Reward;
            m_Info.done           = true;
            m_Info.maxStepReached = maxStepReached;
            // Request the last decision with no callbacks
            // We request a decision so Python knows the Agent is done immediately
            m_Brain?.RequestDecision(m_Info, sensors);

            if (m_Recorder != null && m_Recorder.record && Application.isEditor)
            {
                m_Recorder.WriteExperience(m_Info, sensors);
            }

            UpdateRewardStats();

            // The Agent is done, so we give it a new episode Id
            m_EpisodeId        = EpisodeIdCounter.GetEpisodeId();
            m_Reward           = 0f;
            m_CumulativeReward = 0f;
            m_RequestAction    = false;
            m_RequestDecision  = false;
        }
Esempio n. 12
0
        void NotifyAgentDone(bool maxStepReached = false)
        {
            m_Info.reward         = m_Reward;
            m_Info.done           = true;
            m_Info.maxStepReached = maxStepReached;
            // Request the last decision with no callbacks
            // We request a decision so Python knows the Agent is done immediately
            m_Brain?.RequestDecision(m_Info, sensors);

            // We also have to write any to any DemonstationStores so that they get the "done" flag.
            foreach (var demoWriter in DemonstrationWriters)
            {
                demoWriter.Record(m_Info, sensors);
            }

            UpdateRewardStats();

            // The Agent is done, so we give it a new episode Id
            m_EpisodeId        = EpisodeIdCounter.GetEpisodeId();
            m_Reward           = 0f;
            m_CumulativeReward = 0f;
            m_RequestAction    = false;
            m_RequestDecision  = false;
        }