private void Learn() { //Copy eval net to target net after `replace_target_iteration` iterations if (learn_step % replace_target_iteration == 0) { network_target.SetWeightsData(network_eval.GetWeightsData()); log.Add("Replacing `target net` with `eval net`"); } //Get batch from memory int[] batch_index = CreateMemoryBatch(); if (batch_index.Length == 0) { return; } //Compute network error for (int i = 0; i < batch_index.Length; i++) { //Compute `q_eval` and `q_target` float[] q_target = network_target.Compute(network_memory[batch_index[i]].next_state); float[] q_eval = network_eval.Compute(network_memory[batch_index[i]].current_state); //Compute reward float reward = network_memory[batch_index[i]].reward; //Add reward and reward decay int max_q_target = 0; for (int j = 0; j < q_target.Length; j++) { if (q_target[max_q_target] < q_target[j]) { max_q_target = j; } } q_target[max_q_target] = reward + reward_decay * q_target[max_q_target]; //Compute error float[] error = new float[actions.Length]; for (int j = 0; j < actions.Length; j++) { error[j] = q_target[j] - q_eval[j]; } //Update weights using RMS-PROP network_eval.UpdateWeights(error); } //Update epsilon if (epsilon < max_epsilon) { epsilon += epsilon_increment; } else { epsilon = max_epsilon; } //Update learn step learn_step++; }
private void LoadWeights() { TextReader read = new StreamReader("Weights.txt"); string[] str_weights = read.ReadToEnd().Split(','); read.Close(); if (str_weights.Length - 1 != network_target.GetWeightsLength()) { log.Add("'Weights.txt' file does not contain matching network weights"); return; } float[] weights = new float[str_weights.Length - 1]; for (int i = 0; i < weights.Length; i++) { if (!float.TryParse(str_weights[i], out weights[i])) { log.Add("Could not convert weight " + i + "(" + str_weights[i] + ") into a float"); return; } } network_eval.SetWeightsData(weights); network_target.SetWeightsData(weights); log.Add("Weights successfully loaded from 'Weights.txt'"); }
private IEnumerator TakeStep() { //Get current car state float[] current_state = car_camera.GetRays(); //Get action from current state float[] q_values = particles[working_particle].Compute(current_state); last_q_values = q_values; action_index = SelectAction(q_values); //Wait for action to complete yield return(new WaitForSeconds(0.1f)); //Get next state float[] next_state = car_camera.GetRays(); //Get max `a` of `q_target` float[] q_target = network_target.Compute(next_state); int max_q_target = 0; for (int i = 1; i < max_q_target; i++) { if (q_target[max_q_target] < q_target[i]) { max_q_target = i; } } //Get rward for action float velocity = car_body.gameObject.transform.InverseTransformDirection(car_body.velocity).z; current_reward = velocity + reward_decay * q_target[max_q_target]; particles[working_particle].SetNetworkScore(current_reward); //Reset car if stuck after 100 steps if (car_body.velocity.magnitude < 0.3f && current_step - reset_step > 100) { reset_step = current_step; car_body.transform.position = car_spawner.transform.position; car_body.transform.rotation = car_spawner.transform.rotation; car_body.velocity = Vector3.zero; car_body.angularVelocity = Vector3.zero; } //After 300 steps go to next particle if (current_step - particle_step > next_particle_wait) { working_particle++; //reset reward current_reward = 0; particle_step = current_step; } //Do a pso update after all particles if (working_particle == max_particles) { network_target.SetWeightsData(particle_swarm.GetBestWeights()); //PSO Update Step particle_swarm.ComputeEpoch(); particle_swarm.UpdateWeights(); working_particle = 0; } current_step++; if (!abort_learning) { StartCoroutine(TakeStep()); } }