// Update is called once per frame void Update() { //The state of the world is the relative position of the zombie to the player Vector3 relativePositionToPlayer = player.transform.InverseTransformPoint(transform.position); relativePositionToPlayer.x = (float)Math.Round((double)relativePositionToPlayer.x); relativePositionToPlayer.z = (float)Math.Round((double)relativePositionToPlayer.z); float[] qState = new float[] { relativePositionToPlayer.x, relativePositionToPlayer.z }; //Debug.Log("qState " + qState[0] + ", " + qState[1]); int action = QL.getAction(qState, currentReward); //gets the optimal action from q-learning currentReward = 0f; //reinitializes reward Move(action); //moves the zombie currentReward += timePunishment; //punishment for step if (positiveTrigger) //checks if it hit the player { currentReward += positiveReward; } //uncomment when there's an object that produces negative rewards //if (negativeTrigger) //{ // currentReward += negativeReward; //} float currentDistance = (float)Math.Round((double)Vector3.Distance(transform.position, player.transform.position)); Debug.Log("before " + lastDistance + " after: " + currentDistance); if (currentDistance < lastDistance) //if the agent got closer to the player it'll get a reward { Debug.Log("closer"); currentReward += distanceReward; } lastDistance = currentDistance; rb.velocity = Vector3.zero; if (restartPostion) //for training { transform.position = new Vector3(xStartPosition, yStartPosition, zStartPosition); player.transform.position = new Vector3(random.Next(-xPlayer, xPlayer), yStartPosition, random.Next(-zPlayer, zPlayer)); restartPostion = false; //Debug.Log("restarting position"); } }