示例#1
0
        /// <summary>
        /// Initializes the agent. Can be safely called multiple times.
        /// </summary>
        public void LazyInitialize()
        {
            if (m_Initialized)
            {
                return;
            }
            m_Initialized = true;

            // Grab the "static" properties for the Agent.
            m_EpisodeId     = EpisodeIdCounter.GetEpisodeId();
            m_PolicyFactory = GetComponent <BehaviorParameters>();

            m_Info   = new AgentInfo();
            m_Action = new AgentAction();
            sensors  = new List <ISensor>();

            Academy.Instance.AgentIncrementStep += AgentIncrementStep;
            Academy.Instance.AgentSendState     += SendInfo;
            Academy.Instance.DecideAction       += DecideAction;
            Academy.Instance.AgentAct           += AgentStep;
            Academy.Instance.AgentForceReset    += _AgentReset;
            m_Brain = m_PolicyFactory.GeneratePolicy(Heuristic);
            ResetData();
            Initialize();
            InitializeSensors();

            // The first time the Academy resets, all Agents in the scene will be
            // forced to reset through the <see cref="AgentForceReset"/> event.
            // To avoid the Agent resetting twice, the Agents will not begin their
            // episode when initializing until after the Academy had its first reset.
            if (Academy.Instance.TotalStepCount != 0)
            {
                OnEpisodeBegin();
            }
        }
示例#2
0
        void NotifyAgentDone(DoneReason doneReason)
        {
            m_Info.episodeId      = m_EpisodeId;
            m_Info.reward         = m_Reward;
            m_Info.done           = true;
            m_Info.maxStepReached = doneReason == DoneReason.MaxStepReached;
            // Request the last decision with no callbacks
            // We request a decision so Python knows the Agent is done immediately
            m_Brain?.RequestDecision(m_Info, sensors);

            // We also have to write any to any DemonstationStores so that they get the "done" flag.
            foreach (var demoWriter in DemonstrationWriters)
            {
                demoWriter.Record(m_Info, sensors);
            }

            if (doneReason != DoneReason.Disabled)
            {
                // We don't want to udpate the reward stats when the Agent is disabled, because this will make
                // the rewards look lower than they actually are during shutdown.
                UpdateRewardStats();
            }

            // The Agent is done, so we give it a new episode Id
            m_EpisodeId        = EpisodeIdCounter.GetEpisodeId();
            m_Reward           = 0f;
            m_CumulativeReward = 0f;
            m_RequestAction    = false;
            m_RequestDecision  = false;
        }
示例#3
0
        /// <summary>
        /// Initializes the agent. Can be safely called multiple times.
        /// </summary>
        public void LazyInitialize()
        {
            if (m_Initialized)
            {
                return;
            }
            m_Initialized = true;

            // Grab the "static" properties for the Agent.
            m_EpisodeId     = EpisodeIdCounter.GetEpisodeId();
            m_PolicyFactory = GetComponent <BehaviorParameters>();

            m_Info   = new AgentInfo();
            m_Action = new AgentAction();
            sensors  = new List <ISensor>();

            Academy.Instance.AgentIncrementStep += AgentIncrementStep;
            Academy.Instance.AgentSendState     += SendInfo;
            Academy.Instance.DecideAction       += DecideAction;
            Academy.Instance.AgentAct           += AgentStep;
            Academy.Instance.AgentForceReset    += _AgentReset;
            m_Brain = m_PolicyFactory.GeneratePolicy(Heuristic);
            ResetData();
            InitializeAgent();
            InitializeSensors();
        }
示例#4
0
        /// MonoBehaviour function that is called when the attached GameObject
        /// becomes enabled or active.
        void OnEnable()
        {
            m_EpisodeId = EpisodeIdCounter.GetEpisodeId();
            OnEnableHelper();

            m_Recorder = GetComponent <DemonstrationRecorder>();
        }
示例#5
0
 void NotifyAgentDone(bool maxStepReached = false)
 {
     m_Info.reward         = m_Reward;
     m_Info.done           = true;
     m_Info.maxStepReached = maxStepReached;
     // Request the last decision with no callbacks
     // We request a decision so Python knows the Agent is done immediately
     m_Brain?.RequestDecision(m_Info, sensors, (a) => {});
     // The Agent is done, so we give it a new episode Id
     m_EpisodeId        = EpisodeIdCounter.GetEpisodeId();
     m_Reward           = 0f;
     m_CumulativeReward = 0f;
     m_RequestAction    = false;
     m_RequestDecision  = false;
 }
示例#6
0
        void NotifyAgentDone(bool maxStepReached = false)
        {
            m_Info.reward         = m_Reward;
            m_Info.done           = true;
            m_Info.maxStepReached = maxStepReached;
            // Request the last decision with no callbacks
            // We request a decision so Python knows the Agent is done immediately
            m_Brain?.RequestDecision(m_Info, sensors);

            if (m_Recorder != null && m_Recorder.record && Application.isEditor)
            {
                m_Recorder.WriteExperience(m_Info, sensors);
            }

            UpdateRewardStats();

            // The Agent is done, so we give it a new episode Id
            m_EpisodeId        = EpisodeIdCounter.GetEpisodeId();
            m_Reward           = 0f;
            m_CumulativeReward = 0f;
            m_RequestAction    = false;
            m_RequestDecision  = false;
        }
示例#7
0
        void NotifyAgentDone(bool maxStepReached = false)
        {
            m_Info.reward         = m_Reward;
            m_Info.done           = true;
            m_Info.maxStepReached = maxStepReached;
            // Request the last decision with no callbacks
            // We request a decision so Python knows the Agent is done immediately
            m_Brain?.RequestDecision(m_Info, sensors);

            // We also have to write any to any DemonstationStores so that they get the "done" flag.
            foreach (var demoWriter in DemonstrationWriters)
            {
                demoWriter.Record(m_Info, sensors);
            }

            UpdateRewardStats();

            // The Agent is done, so we give it a new episode Id
            m_EpisodeId        = EpisodeIdCounter.GetEpisodeId();
            m_Reward           = 0f;
            m_CumulativeReward = 0f;
            m_RequestAction    = false;
            m_RequestDecision  = false;
        }