Пример #1
0
        private IEnumerator Start()
        {
            var settings = new DQNInitSettings()
            {
            };

            double[] stateArr = new double[] { center.x - transform.position.x, center.y - transform.position.y };
            var      agent    = new DQNAgent(settings, stateArr.Length, 4);

            while (true)
            {
                int     action      = agent.Act(stateArr);
                Vector2 movementVec = default;
                switch (action)
                {
                case 0:
                    movementVec = new Vector2(1f, 0f) * Time.deltaTime * speed;
                    break;

                case 1:
                    movementVec = new Vector2(-1f, 0f) * Time.deltaTime * speed;
                    break;

                case 2:
                    movementVec = new Vector2(0f, 1f) * Time.deltaTime * speed;
                    break;

                case 3:
                    movementVec = new Vector2(0f, -1f) * Time.deltaTime * speed;
                    break;
                }

                Vector2 pos = new Vector2(transform.position.x, transform.position.y);
                if ((center - pos).sqrMagnitude > (center - pos + movementVec).sqrMagnitude)
                {
                    Debug.Log("negative");
                    agent.Learn(-1.0);
                }
                else
                {
                    Debug.Log("positive");
                    agent.Learn(1.0);
                }
                rgb.velocity = movementVec;
                yield return(new WaitForFixedUpdate());
            }
        }
Пример #2
0
        public DQNAgent(DQNInitSettings opt, int numStates, int numActions)
        {
            this.numStates  = numStates;
            this.numActions = numActions;

            gamma   = opt.gamma;
            epsilon = opt.epsilon;
            alpha   = opt.alpha;

            experience_add_every         = opt.experience_add_every;
            experience_size              = opt.experience_size;
            learning_steps_per_iteration = opt.learning_steps_per_iteration;
            tderror_clamp = opt.tderror_clamp;

            num_hidden_units = opt.num_hidden_units;

            Reset();
        }