Exemple #1
0
        public double Train(Vector p_state0, int p_action0, Vector p_state1, float p_reward, bool p_final = false)
        {
            float mse     = 0;
            float maxQs1a = CalcMaxQa(p_state1);

            // updating phase for Q(s,a)
            _networkQ.Activate(p_state0);

            Vector target = Vector.Copy(_networkQ.Output);

            if (p_final)
            {
                target[p_action0] = p_reward;
            }
            else
            {
                target[p_action0] = p_reward + _gamma * maxQs1a;
            }

            mse = _optimizer.Train(p_state0, target);
            Vector.Release(target);

            _asyncUnit.Update(p_final);

            if (_asyncUnit.IsAsyncReady)
            {
                _optimizer.AsyncUpdate();
            }

            return(mse);
        }
Exemple #2
0
        override public double Train(Vector p_state0, int p_action, Vector p_state1, float p_reward, bool p_final)
        {
            double mse = 0;

            _stack.Push(new StackItem {
                State = Vector.Copy(p_state0), Action = p_action, Reward = p_reward
            });

            _asyncUnit.Update(p_final);

            if (_asyncUnit.IsAsyncReady)
            {
                int   criticOutput = _thNetwork.Output.Size - 1;
                float value1       = _thNetwork.Activate(p_state1)[criticOutput];

                float R = p_final ? 0f : value1;

                while (_stack.Count > 0)
                {
                    StackItem item = (StackItem)_stack.Pop();
                    R = item.Reward + _gamma * R;

                    Vector target = Vector.Copy(_thNetwork.Activate(item.State));

                    float value0 = _thNetwork.Activate(p_state0)[criticOutput];

                    target[p_action]     = R - value0;
                    target[criticOutput] = R;

                    mse += _thOptimizer.Train(item.State, target);

                    Vector.Release(item.State);
                    Vector.Release(target);
                }

                _optimizer.AsyncUpdate(_thOptimizer);
                _thNetwork.OverrideParams(_network);
            }

            return(mse);
        }