private void displayUI() { if (scoreText) { scoreText.text = "Score: " + GetCumulativeReward().ToString("0.00"); } if (stepText) { stepText.text = "Steps: " + StepCount.ToString() + " / " + maxEpisodeSteps; } if (lifeText) { lifeText.text = "Lives: " + controller.getLife() + " / 5"; } }
//GUI Stuff for connecting private void OnGUI() { GUILayout.BeginArea(new Rect(Screen.width / 2 - 100, 0, 200, 150)); //If the player hasn't connected yet, allow the player to connect if (!DarkRiftAPI.isConnected) { _IP = GUILayout.TextField(_IP); if (GUILayout.Button("Connect")) { DarkRiftAPI.Connect(_IP); //Connect to the DarkRift server hosted on the machine with _IP IP Address } } if (!DoVisualize) { Texture2D tex = new Texture2D(1, 1); tex.SetPixel(1, 1, new Color32(255, 255, 255, 255)); GUI.DrawTexture(new Rect(0, 0, Screen.width, Screen.height), tex); GUILayout.Label("Catching up", Style); } GUILayout.Label("Step Count: " + StepCount.ToString(), Style); GUILayout.EndArea(); }
/// <summary> /// This is an ML-agents framework method called when the agent observes its environment. /// These observations are passed to the model as inputs. /// They make up the agent's STATE (S) at the current step, or time (t). /// The model will use them to decide action to perform. /// /// NOTE: Aside from these manually created observations, we are also using /// some others built-in to ML-agents, such as Ray Perception Sensor, /// and stacking previous observations /// </summary> /// <param name="sensor">part of ML Agents API</param> public override void CollectObservations(VectorSensor sensor) { if (!target) { // if no target, we're in a bad state, so just pass data with all 0 sensor.AddObservation(0f); sensor.AddObservation(0f); sensor.AddObservation(0f); sensor.AddObservation(0f); sensor.AddObservation(0f); sensor.AddObservation(0f); return; } // Observations used by SimpleTankAgent (you can use whatever you want for your own agents) // 1. Relative angle to enemy, where 0 is dead-on, -1 is negative 180, +1 is positive 180 // 2. Distance to opponent, normalized // 3. Relative angle the enemy is pointing, where 0 is dead-on, -1 is negative 180, +1 is positive 180 // 4. Our current Health, normalized from 0 to 1 // 5. Enemy current Health, normalized from 0 to 1 // 6. If gun is cooling down, true/false /* 1 */ // calc relative angle between forward vector and vector from target to self, rotating about Y axis float relativeAngle = Vector3.SignedAngle(transform.forward, (target.transform.position - transform.position), Vector3.up); float relativeAngleObs = Mathf.Clamp(relativeAngle / 180f, -1f, 1f); sensor.AddObservation(relativeAngleObs); // we're adding a reward penalty here for when we're facing away from enemy, // hoping agent learns to generally face enemy to move toward them or fire at them if (relativeAngleObs < -0.5f || relativeAngleObs > 0.5f) { AddReward(-0.001f); } /* 2 */ // calc distance float distance = Vector3.Distance(target.transform.position, transform.position); float normalizedDistance = distance / 70f; // map hyponteneuse is roughly 70, use for normalizing float distanceObs = Mathf.Clamp(normalizedDistance, 0f, 1.0f); sensor.AddObservation(distanceObs); /* 3 */ // calc relative angle of opponent the same way float enemyRelativeAngle = Vector3.SignedAngle(target.transform.forward, (transform.position - target.transform.position), Vector3.up); float enemyRelativeAngleObs = Mathf.Clamp(enemyRelativeAngle / 180f, -1f, 1f); sensor.AddObservation(enemyRelativeAngleObs); /* 4 */ // our own health, normalized from 0-1 if (health) { sensor.AddObservation(health.NormalizedHealth); } else { sensor.AddObservation(0f); } /* 5 */ // target's health, normalized from 0-1 if (targetHealth) { sensor.AddObservation(targetHealth.NormalizedHealth); } else { sensor.AddObservation(0f); } /* 6 */ // observe whether our gun is cooling down and can't fire // this might not be useful, but agent might learn difference between weak/strong shots, // and it might want to fire faster/slower depending on situation if (shooting) { sensor.AddObservation(shooting.cooldown); } else { sensor.AddObservation(0f); } AddReward(-0.0001f); // tiny negative reward over time to incentivize agent to hurry up // do some debug outputting here if (debug && textOutput) { textOutput.output = "<b>Agent" + playerNumber.ToString() + "</b>\n"; textOutput.output += "<b>Relative Angle: </b>" + relativeAngleObs.ToString() + "\n"; textOutput.output += "<b>Distance: </b>" + distanceObs.ToString() + "\n"; textOutput.output += "<b>Enemy Relative Heading: </b>" + enemyRelativeAngleObs.ToString() + "\n"; textOutput.output += "<b>Health: </b>" + health.NormalizedHealth.ToString() + "\n"; textOutput.output += "<b>Enemy Health: </b>" + targetHealth.NormalizedHealth.ToString() + "\n"; textOutput.output += "<b>Cannon Cooldown: </b>" + shooting.cooldown.ToString() + "\n"; textOutput.output += "<b>Total Reward: </b>" + GetCumulativeReward().ToString() + "\n"; } if (GetCumulativeReward() == 0.0f) { Debug.unityLogger.Log(LOGTAG, "AGENT at value 0.0 rewards at step " + StepCount.ToString()); } if (float.Parse(GetCumulativeReward().ToString()) == 0.0f) { Debug.unityLogger.Log(LOGTAG, "AGENT at string value 0.0 rewards at step " + StepCount.ToString()); } }
public string GetRowKey() { return(IterationCount.ToString() + "_" + StepCount.ToString()); }