private IEnumerator Start() { var settings = new DQNInitSettings() { }; double[] stateArr = new double[] { center.x - transform.position.x, center.y - transform.position.y }; var agent = new DQNAgent(settings, stateArr.Length, 4); while (true) { int action = agent.Act(stateArr); Vector2 movementVec = default; switch (action) { case 0: movementVec = new Vector2(1f, 0f) * Time.deltaTime * speed; break; case 1: movementVec = new Vector2(-1f, 0f) * Time.deltaTime * speed; break; case 2: movementVec = new Vector2(0f, 1f) * Time.deltaTime * speed; break; case 3: movementVec = new Vector2(0f, -1f) * Time.deltaTime * speed; break; } Vector2 pos = new Vector2(transform.position.x, transform.position.y); if ((center - pos).sqrMagnitude > (center - pos + movementVec).sqrMagnitude) { Debug.Log("negative"); agent.Learn(-1.0); } else { Debug.Log("positive"); agent.Learn(1.0); } rgb.velocity = movementVec; yield return(new WaitForFixedUpdate()); } }
static void Main(string[] args) { var rnd = new Random(); int max = 10; int min = 1; int nextPrint = 0, act1 = 0, act0 = 0; double total = 0, correct = 0; var state = new[] { rnd.Next(min, max), rnd.Next(min, max), rnd.Next(min, max), rnd.Next(min, max) }; var opt = new TrainingOptions { Alpha = 0.001, Epsilon = 0, ErrorClamp = 0.002, ExperienceAddEvery = 10, ExperienceSize = 1000, ExperienceStart = 0, HiddenUnits = 5, LearningSteps = 400 }; //we take 4 states i.e random numbers between 1 and 10 //we have 2 actions 1 if average of set is >5 and 0 if otherwise //we reward agent with 1 for every correct and -1 otherwise var agent = new DQNAgent(opt, state.Length, 2); //how to properly use the DPAgent //var agent2= new MyDPAgent(); //agent2.Reset(state.Length,2); while (total < 50000) { state = new[] { rnd.Next(min, max), rnd.Next(min, max), rnd.Next(min, max), rnd.Next(min, max) }; var action = agent.Act(state); if (action == 1) { act1++; } else { act0++; } if (state.Average() > 5 && action == 1) { agent.Learn(1); correct++; } else if (state.Average() <= 5 && action == 0) { agent.Learn(1); correct++; } else { agent.Learn(-1); } total++; //nextPrint++; if (total >= nextPrint) { Console.WriteLine("Score: " + (correct / total).ToString("P") + "Epoch: " + nextPrint); Console.WriteLine("Action 1: " + act1 + " Action 0: " + act0); nextPrint += 1000; } } // Console.WriteLine("Score: " + (correct / total).ToString("P")); Console.WriteLine("End"); File.AppendAllText(AppDomain.CurrentDomain.BaseDirectory + "DNQ.trr", agent.AgentToJson()); Console.ReadKey(); }