Exemple #1
0
    private void Learn()
    {
        //Copy eval net to target net after `replace_target_iteration` iterations
        if (learn_step % replace_target_iteration == 0)
        {
            network_target.SetWeightsData(network_eval.GetWeightsData());
            log.Add("Replacing `target net` with `eval net`");
        }

        //Get batch from memory
        int[] batch_index = CreateMemoryBatch();
        if (batch_index.Length == 0)
        {
            return;
        }

        //Compute network error
        for (int i = 0; i < batch_index.Length; i++)
        {
            //Compute `q_eval` and `q_target`
            float[] q_target = network_target.Compute(network_memory[batch_index[i]].next_state);
            float[] q_eval   = network_eval.Compute(network_memory[batch_index[i]].current_state);

            //Compute reward
            float reward = network_memory[batch_index[i]].reward;

            //Add reward and reward decay
            int max_q_target = 0;
            for (int j = 0; j < q_target.Length; j++)
            {
                if (q_target[max_q_target] < q_target[j])
                {
                    max_q_target = j;
                }
            }
            q_target[max_q_target] = reward + reward_decay * q_target[max_q_target];
            //Compute error
            float[] error = new float[actions.Length];
            for (int j = 0; j < actions.Length; j++)
            {
                error[j] = q_target[j] - q_eval[j];
            }
            //Update weights using RMS-PROP
            network_eval.UpdateWeights(error);
        }

        //Update epsilon
        if (epsilon < max_epsilon)
        {
            epsilon += epsilon_increment;
        }
        else
        {
            epsilon = max_epsilon;
        }
        //Update learn step
        learn_step++;
    }
Exemple #2
0
    private void LoadWeights()
    {
        TextReader read = new StreamReader("Weights.txt");

        string[] str_weights = read.ReadToEnd().Split(',');
        read.Close();
        if (str_weights.Length - 1 != network_target.GetWeightsLength())
        {
            log.Add("'Weights.txt' file does not contain matching network weights");
            return;
        }
        float[] weights = new float[str_weights.Length - 1];
        for (int i = 0; i < weights.Length; i++)
        {
            if (!float.TryParse(str_weights[i], out weights[i]))
            {
                log.Add("Could not convert weight " + i + "(" + str_weights[i] + ") into a float");
                return;
            }
        }
        network_eval.SetWeightsData(weights);
        network_target.SetWeightsData(weights);
        log.Add("Weights successfully loaded from 'Weights.txt'");
    }
    private IEnumerator TakeStep()
    {
        //Get current car state
        float[] current_state = car_camera.GetRays();

        //Get action from current state
        float[] q_values = particles[working_particle].Compute(current_state);
        last_q_values = q_values;
        action_index  = SelectAction(q_values);

        //Wait for action to complete
        yield return(new WaitForSeconds(0.1f));

        //Get next state
        float[] next_state = car_camera.GetRays();
        //Get max `a` of `q_target`
        float[] q_target     = network_target.Compute(next_state);
        int     max_q_target = 0;

        for (int i = 1; i < max_q_target; i++)
        {
            if (q_target[max_q_target] < q_target[i])
            {
                max_q_target = i;
            }
        }
        //Get rward for action
        float velocity = car_body.gameObject.transform.InverseTransformDirection(car_body.velocity).z;

        current_reward = velocity + reward_decay * q_target[max_q_target];
        particles[working_particle].SetNetworkScore(current_reward);

        //Reset car if stuck after 100 steps
        if (car_body.velocity.magnitude < 0.3f && current_step - reset_step > 100)
        {
            reset_step = current_step;
            car_body.transform.position = car_spawner.transform.position;
            car_body.transform.rotation = car_spawner.transform.rotation;
            car_body.velocity           = Vector3.zero;
            car_body.angularVelocity    = Vector3.zero;
        }
        //After 300 steps go to next particle
        if (current_step - particle_step > next_particle_wait)
        {
            working_particle++;
            //reset reward
            current_reward = 0;
            particle_step  = current_step;
        }
        //Do a pso update after all particles
        if (working_particle == max_particles)
        {
            network_target.SetWeightsData(particle_swarm.GetBestWeights());
            //PSO Update Step
            particle_swarm.ComputeEpoch();
            particle_swarm.UpdateWeights();
            working_particle = 0;
        }
        current_step++;
        if (!abort_learning)
        {
            StartCoroutine(TakeStep());
        }
    }