Ejemplo n.º 1
0
    void FixedUpdate()
    {
        if (counter % reflexModulus == 0)
        {
            List <double> current_dist = MeasureDistances();            // distances

            int mid_sensor = (int)nsensor / 2;

            double dist_surroundings = (current_dist[mid_sensor - 1] + current_dist[mid_sensor] + current_dist[mid_sensor + 1]) / 3;

            double current_speed = this.Velocity / MAX_VEL;           // states

//			double dist_to_cube = EuclideanDistance ();

            current_dist.Add(current_speed);             // state

            d_queue.Dequeue();
            d_queue.Enqueue(current_dist);

            List <double> my_state = new List <double> ();
            foreach (var e in d_queue.ToArray())
            {
                my_state.AddRange(e);
            }

//			current_dist.Add (transform.rotation [0]);/////

            Matrix <double> state = Matrix <double> .Build.DenseOfColumnArrays(my_state.ToArray());

//			Debug.Log ("state:" + state);

            Vector <double> Qhat = Qnetwork.Forward(state).Column(0);            //get q value predictions for current state

            int action = EpsGreedy(Qhat);

            memory.Enqueue(state, action, current_speed, dist_surroundings, 0.0);

            takeAction(action);

            T += 1;
            //			Debug.Log ("t: " + T);

            if (T % feedbackModulus == 0)
            {
                epsilon = epsilon > epsilon_min ? epsilon * epsilon_decay : epsilon_min;
                if (!test_mode)
                {
                    foreach (Entry2 e in memory.Sample(batch))
                    {
                        Vector <double> qhat    = Qnetwork.Forward(e.state).Column(0);
                        Vector <double> qphat   = QtargetNetwork.Forward(e.nextState).Column(0);
                        double          qtarget = e.crash ? PENALTY : e.reward + gamma * qphat [qhat.MaximumIndex()];

                        Matrix <double>[] gs = Qnetwork.Gradients(e.state, e.action, qtarget);
                        Qnetwork.update(gs);
                        if (Qnetwork.t % targetUpdateInterval == 0)
                        {
                            Qnetwork.copyTo(QtargetNetwork);
                        }
                    }
                }
            }

            //			if (T % feedbackModulus == 0) {
            //
            //				Matrix<double>[] states = history.getAllStates ();
            //				double[] qtargets = history.getAllQTargets ();
            //				int[] actions = history.getAllActions ();
            //
            //				for (var i = 0; i < qtargets.Count(); i++) {
            //					if (rnd.NextDouble () < batch / qtargets.Count ()) {
            //						Matrix<double> current_state = states [i];
            //						double qtarget = qtargets [i];
            //						int a = actions [i];
            //						Matrix<double>[] gs = Qnetwork.Gradients (current_state, a, qtarget);
            //						Qnetwork.update (gs);
            //						if (Qnetwork.t % targetUpdateInterval == 0) {
            //							Qnetwork.copyTo (QtargetNetwork);
            //						}
            //					}
            //				}
            //			}
        }

        ApplyRotation();          /* Updates Rotation */

        ApplyVelocity();          /* Updates position */

        ApplyFriction();

        counter++;
    }