void FixedUpdate() { if (counter % reflexModulus == 0) { List <double> current_dist = MeasureDistances(); // distances int mid_sensor = (int)nsensor / 2; double avg_dist_ahead = (current_dist[mid_sensor - 1] + current_dist[mid_sensor] + current_dist[mid_sensor + 1] + current_dist.Min()) / 4; double current_speed = this.Velocity / MAX_VEL; // states current_dist.Add(current_speed); // state d_queue.Dequeue(); d_queue.Enqueue(current_dist); List <double> my_state = new List <double> (); foreach (var e in d_queue.ToArray()) { my_state.AddRange(e); } // current_dist.Add (transform.rotation [1]); current_dist.Add (transform.rotation [3]); Matrix <double> state = Matrix <double> .Build.DenseOfColumnArrays(my_state.ToArray()); Vector <double> Qhat = Qnetwork.Forward(state).Column(0); //get q value predictions for current state if (display_qvalues) { ImageManipulation.MakeImage(qvaluesimg, Qhat); qvaluesimg.Apply(); } int action = EpsGreedy(Qhat); takeAction(action); T += 1; if (training) { memory.Enqueue(state, action, current_speed, avg_dist_ahead); returnValues [returnValues.Count - 1] += memory.LastReward(); returnCounts [returnCounts.Count - 1] += 1; if (T > learn_start && T % epsilonDecayInterval == 0) { epsilon = epsilon > epsilon_min ? epsilon * epsilon_decay : epsilon_min; } if (T > learn_start && T % targetUpdateInterval == 0) { Qnetwork.copyTo(QtargetNetwork); if (report) { Debug.Log("TargetUpdate Info: Episode: " + episodes + " Epsilon: " + epsilon + " T: " + T + " lr: " + Qnetwork.lr); } } // if (T > learn_start && T % feedbackModulus == 0) { //epsilon = epsilon > epsilon_min ? epsilon * epsilon_decay : epsilon_min; Matrix <double>[] dw = Qnetwork.similar(); foreach (Entry e in memory.Sample(batch)) { Vector <double> qhat = Qnetwork.Forward(e.state).Column(0); Vector <double> qphat = QtargetNetwork.Forward(e.nextState).Column(0); double qtarget = e.crash ? PENALTY : e.reward + gamma * qphat [qhat.MaximumIndex()]; Matrix <double>[] gs = Qnetwork.Gradients(e.state, e.action, qtarget); MLP.gclip(gs); MLP.cumulate(dw, gs); } Qnetwork.update(dw); if (report) { Debug.Log("Update Info: Episode: " + episodes + " Epsilon: " + epsilon + " T: " + T + " lr: " + Qnetwork.lr); } } } } ApplyRotation(); /* Updates Rotation */ ApplyVelocity(); /* Updates position */ ApplyFriction(); counter++; }