private IEnumerator Start() { var settings = new DQNInitSettings() { }; double[] stateArr = new double[] { center.x - transform.position.x, center.y - transform.position.y }; var agent = new DQNAgent(settings, stateArr.Length, 4); while (true) { int action = agent.Act(stateArr); Vector2 movementVec = default; switch (action) { case 0: movementVec = new Vector2(1f, 0f) * Time.deltaTime * speed; break; case 1: movementVec = new Vector2(-1f, 0f) * Time.deltaTime * speed; break; case 2: movementVec = new Vector2(0f, 1f) * Time.deltaTime * speed; break; case 3: movementVec = new Vector2(0f, -1f) * Time.deltaTime * speed; break; } Vector2 pos = new Vector2(transform.position.x, transform.position.y); if ((center - pos).sqrMagnitude > (center - pos + movementVec).sqrMagnitude) { Debug.Log("negative"); agent.Learn(-1.0); } else { Debug.Log("positive"); agent.Learn(1.0); } rgb.velocity = movementVec; yield return(new WaitForFixedUpdate()); } }
public DQNAgent(DQNInitSettings opt, int numStates, int numActions) { this.numStates = numStates; this.numActions = numActions; gamma = opt.gamma; epsilon = opt.epsilon; alpha = opt.alpha; experience_add_every = opt.experience_add_every; experience_size = opt.experience_size; learning_steps_per_iteration = opt.learning_steps_per_iteration; tderror_clamp = opt.tderror_clamp; num_hidden_units = opt.num_hidden_units; Reset(); }