コード例 #1
0
ファイル: QLearningCNN.cs プロジェクト: nkf/deep-unity
        public override void TrainModel(List <SARS> batch)
        {
            var inp  = new StatePair[batch.Count];
            var outp = new TargetIndexPair[batch.Count];
            int i    = 0;

            foreach (var sars in batch)
            {
                inp[i] = sars.State.Features;
                float target;
                if (!sars.NextState.IsTerminal)
                {
                    var a0max = QMax(sars.NextState);
                    target = sars.Reward + Discount * a0max;
                }
                else
                {
                    target = sars.Reward;
                }
                outp[i++] = new TargetIndexPair(target, _amap[sars.Action.ActionId]);
            }
            for (int j = 0; j < batch.Count; j++)
            {
                _net.SGD(inp[j], outp[j]);
            }
        }
コード例 #2
0
        public void SGD(StatePair input, TargetIndexPair p)
        {
            _loss.Clear();
            _loss.At(p.Index, p.Target - Compute(input, true)[p.Index]);
            var split = _split.Visit(_outback.Visit(_loss, _params), _params);

            _backprop.BackPropagation(_unflatten.Visit(split.left, _params), _params);
            _hiddenBackprop.Visit(split.right, _params);
            IsOutputFromTraining = true;
        }