示例#1
0
        public Action GetLearningAction(QState state)
        {
            if (!ModelReady())
            {
                Iteration = 1;
                Initialize(state.GridSize, state.VectorSize, state.Depth);
            }
            if (!_isFirstTurn)
            {
                if (state.IsTerminal)
                {
                    StoreSARS(new SARS(_prevState, _prevAction, state));
                    _isFirstTurn = true;
                    return(null);
                }
                if (state.Equals(_prevState) && (Discretize || (!Discretize && _stateCounter <= 10)))
                {
                    _stateCounter++;
                    return(_prevAction.Action);
                }
                StoreSARS(new SARS(_prevState, _prevAction, state));
            }
            var a = EpsilonGreedy(Epsilon(Iteration), state);

            _prevAction   = a;
            _prevState    = state;
            _stateCounter = 0;
            _isFirstTurn  = false;
            _trainingCounter++;
            if (_trainingCounter >= TrainingInterval && Time.timeScale != 0)
            {
                var ts = Time.timeScale;
                _trainingCounter = 0;
                Time.timeScale   = 0;
                QAIManager.RunCoroutine(PrioritySweeping ? RunPriotizedTraining(ts) : RunTraining(ts));
            }
            return(a.Action);
        }
示例#2
0
 public override bool Equals(object obj)
 {
     return(state.Equals(((QStateActionPair)obj).state) && action == ((QStateActionPair)obj).action);
 }
示例#3
0
 public bool Equals(SARS other)
 {
     return(State.Equals(other.State) && NextState.Equals(other.NextState) && Action.Equals(other.Action));
 }