Ejemplo n.º 1
0
    void FixedUpdate()
    {
        if (counter % reflexModulus == 0)
        {
            List <double> current_dist = MeasureDistances();            // distances

            int mid_sensor = (int)nsensor / 2;

            double avg_dist_ahead = (current_dist[mid_sensor - 1] + current_dist[mid_sensor] + current_dist[mid_sensor + 1] + current_dist.Min()) / 4;

            double current_speed = this.Velocity / MAX_VEL; // states

            current_dist.Add(current_speed);                // state

            d_queue.Dequeue();
            d_queue.Enqueue(current_dist);
            List <double> my_state = new List <double> ();
            foreach (var e in d_queue.ToArray())
            {
                my_state.AddRange(e);
            }

//			current_dist.Add (transform.rotation [1]); current_dist.Add (transform.rotation [3]);

            Matrix <double> state = Matrix <double> .Build.DenseOfColumnArrays(my_state.ToArray());

            Vector <double> Qhat = Qnetwork.Forward(state).Column(0);            //get q value predictions for current state

            if (display_qvalues)
            {
                ImageManipulation.MakeImage(qvaluesimg, Qhat);
                qvaluesimg.Apply();
            }

            int action = EpsGreedy(Qhat);


            takeAction(action);

            T += 1;

            if (training)
            {
                memory.Enqueue(state, action, current_speed, avg_dist_ahead);
                returnValues [returnValues.Count - 1] += memory.LastReward();
                returnCounts [returnCounts.Count - 1] += 1;

                if (T > learn_start && T % epsilonDecayInterval == 0)
                {
                    epsilon = epsilon > epsilon_min ? epsilon * epsilon_decay : epsilon_min;
                }

                if (T > learn_start && T % targetUpdateInterval == 0)
                {
                    Qnetwork.copyTo(QtargetNetwork);
                    if (report)
                    {
                        Debug.Log("TargetUpdate Info: Episode: " + episodes + " Epsilon: " + epsilon + " T: " + T + " lr: " + Qnetwork.lr);
                    }
                }
                //
                if (T > learn_start && T % feedbackModulus == 0)
                {
                    //epsilon = epsilon > epsilon_min ? epsilon * epsilon_decay : epsilon_min;
                    Matrix <double>[] dw = Qnetwork.similar();
                    foreach (Entry e in memory.Sample(batch))
                    {
                        Vector <double>   qhat    = Qnetwork.Forward(e.state).Column(0);
                        Vector <double>   qphat   = QtargetNetwork.Forward(e.nextState).Column(0);
                        double            qtarget = e.crash ? PENALTY : e.reward + gamma * qphat [qhat.MaximumIndex()];
                        Matrix <double>[] gs      = Qnetwork.Gradients(e.state, e.action, qtarget);
                        MLP.gclip(gs);
                        MLP.cumulate(dw, gs);
                    }
                    Qnetwork.update(dw);
                    if (report)
                    {
                        Debug.Log("Update Info: Episode: " + episodes + " Epsilon: " + epsilon + " T: " + T + " lr: " + Qnetwork.lr);
                    }
                }
            }
        }

        ApplyRotation();          /* Updates Rotation */

        ApplyVelocity();          /* Updates position */

        ApplyFriction();

        counter++;
    }