Ejemplo n.º 1
0
        void Start()
        {
            model = new MotionGenerator.Algorithm.Reinforcement.Models.Simple4Layer(
                inputDimention: 1, outputDimention: 5, hiddenDimention: 30);
            trainer = new MotionGenerator.Algorithm.Reinforcement.TemporalDifferenceQTrainer(
                epsilon: 0.3f, qNetwork: model, historySize: 100000, discountRatio: 0.9f, actionDimention: 5,
                replaySize: 32, rewardWeights: new[] { 1f });
            state = Matrix <float> .Build.DenseDiagonal(1, 0);

            lastReward = 0f;
        }
Ejemplo n.º 2
0
        private void CreateTrainer(int inputDimention)
        {
            if (_inputDimention != inputDimention)
            {
                _inputDimention = inputDimention;
                _model.AlterInputDimention(inputDimention);
            }

            _trainer = new MotionGenerator.Algorithm.Reinforcement.TemporalDifferenceQTrainer(
                epsilon: _randomActionProbability, qNetwork: _model,
                historySize: _historySize, discountRatio: _discountRatio, actionDimention: Actions.Count,
                replaySize: 32, alpha: _optimizerAlpha, rewardWeights: _soulWeights, optimizerType: _optimizerType,
                enableRandomForgetting: _enableRandomForgetting,
                initialHistory: _historySaveData != null
                    ? _historySaveData.Select(x => x.Instantiate()).ToList()
                    : null
                );

            _historySaveData = null; //FIXME(kogaki): _historySaveDataをインスタンス変数に持たないようにしたい
        }
Ejemplo n.º 3
0
        void Start()
        {
            _logger = new TSVLogger();

            if (IsMultiSoulModel)
            {
                model = new MotionGenerator.Algorithm.Reinforcement.Models.Simple4Layer(
                    inputDimention: 1, outputDimention: 5 * _soulWeights.Length, hiddenDimention: 30 * 2);
                trainer = new MotionGenerator.Algorithm.Reinforcement.TemporalDifferenceQTrainer(
                    epsilon: 0.3f, qNetwork: model, historySize: 100000, discountRatio: 0.9f, actionDimention: 5,
                    replaySize: 32, rewardWeights: _soulWeights, alpha: 0.01f);
            }
            else
            {
                model = new MotionGenerator.Algorithm.Reinforcement.Models.Simple4Layer(
                    inputDimention: 1, outputDimention: 5, hiddenDimention: 30 * 2);
                trainer = new MotionGenerator.Algorithm.Reinforcement.TemporalDifferenceQTrainer(
                    epsilon: 0.3f, qNetwork: model, historySize: 100000, discountRatio: 0.9f, actionDimention: 5,
                    replaySize: 32, rewardWeights: new[] { 1f }, alpha: 0.01f);
            }

            state = Matrix <float> .Build.DenseDiagonal(1, 0);
        }