Example #1
0
        public double Train(Vector p_state0, int p_action0, Vector p_state1, float p_reward, bool p_final = false)
        {
            float mse     = 0;
            float maxQs1a = CalcMaxQa(p_state1);

            // updating phase for Q(s,a)
            _networkQ.Activate(p_state0);

            Vector target = Vector.Copy(_networkQ.Output);

            if (p_final)
            {
                target[p_action0] = p_reward;
            }
            else
            {
                target[p_action0] = p_reward + _gamma * maxQs1a;
            }

            mse = _optimizer.Train(p_state0, target);
            Vector.Release(target);

            _asyncUnit.Update(p_final);

            if (_asyncUnit.IsAsyncReady)
            {
                _optimizer.AsyncUpdate();
            }

            return(mse);
        }
Example #2
0
        virtual protected Vector CalcTarget(Vector p_s0, Vector p_s1, int p_action, float p_reward, bool p_final)
        {
            float maxQs1a = CalcMaxQa(p_s1);

            // updating phase for Q(s,a)
            _Qnetwork.Activate(p_s0);

            Vector target = Vector.Copy(_Qnetwork.Output);

            if (p_final)
            {
                target[p_action] = p_reward;
            }
            else
            {
                target[p_action] = p_reward + _gamma * maxQs1a;
            }

            return(target);
        }
Example #3
0
        override public double Train(Vector p_state0, int p_action, Vector p_state1, float p_reward, bool p_final)
        {
            double mse = 0;

            _stack.Push(new StackItem {
                State = Vector.Copy(p_state0), Action = p_action, Reward = p_reward
            });

            _asyncUnit.Update(p_final);

            if (_asyncUnit.IsAsyncReady)
            {
                int   criticOutput = _thNetwork.Output.Size - 1;
                float value1       = _thNetwork.Activate(p_state1)[criticOutput];

                float R = p_final ? 0f : value1;

                while (_stack.Count > 0)
                {
                    StackItem item = (StackItem)_stack.Pop();
                    R = item.Reward + _gamma * R;

                    Vector target = Vector.Copy(_thNetwork.Activate(item.State));

                    float value0 = _thNetwork.Activate(p_state0)[criticOutput];

                    target[p_action]     = R - value0;
                    target[criticOutput] = R;

                    mse += _thOptimizer.Train(item.State, target);

                    Vector.Release(item.State);
                    Vector.Release(target);
                }

                _optimizer.AsyncUpdate(_thOptimizer);
                _thNetwork.OverrideParams(_network);
            }

            return(mse);
        }
Example #4
0
        virtual public double Train(Vector p_state0, int p_action, Vector p_state1, float p_reward, bool p_final)
        {
            double mse          = 0;
            int    criticOutput = _network.Output.Size - 1;

            _network.Activate(p_state1);
            float value1 = _network.Output[criticOutput];

            _network.Activate(p_state0);
            float value0 = _network.Output[criticOutput];

            Vector target = Vector.Copy(_network.Output);

            target[p_action]     = p_reward + _gamma * value1 - value0;
            target[criticOutput] = p_reward + _gamma * value1;

            mse = _optimizer.Train(p_state0, target);

            Vector.Release(target);

            return(mse);
        }
Example #5
0
        public double Train(Vector p_state0, int p_action0, Vector p_state1, float p_reward, bool final = false)
        {
            float mse     = 0;
            float maxQs1a = CalcMaxQa(p_state1);

            // updating phase for Q(s,a)
            _network.Activate(p_state0);

            Vector target = _network.Output;

            if (final)
            {
                target[p_action0] = p_reward;
            }
            else
            {
                target[p_action0] = p_reward + _gamma * maxQs1a;
            }

            mse = _optimizer.Train(p_state0, target);

            return(mse);
        }
Example #6
0
        virtual protected float CalcMaxQa(Vector p_state)
        {
            _networkQt.Activate(p_state);

            float maxQa = _networkQt.Output[0];

            for (int i = 0; i < _networkQt.Output.Size; i++)
            {
                if (_networkQt.Output[i] > maxQa)
                {
                    maxQa = _networkQt.Output[i];
                }
            }

            return(maxQa);
        }