public Action GetLearningAction(QState state) { if (!ModelReady()) { Iteration = 1; Initialize(state.GridSize, state.VectorSize, state.Depth); } if (!_isFirstTurn) { if (state.IsTerminal) { StoreSARS(new SARS(_prevState, _prevAction, state)); _isFirstTurn = true; return(null); } if (state.Equals(_prevState) && (Discretize || (!Discretize && _stateCounter <= 10))) { _stateCounter++; return(_prevAction.Action); } StoreSARS(new SARS(_prevState, _prevAction, state)); } var a = EpsilonGreedy(Epsilon(Iteration), state); _prevAction = a; _prevState = state; _stateCounter = 0; _isFirstTurn = false; _trainingCounter++; if (_trainingCounter >= TrainingInterval && Time.timeScale != 0) { var ts = Time.timeScale; _trainingCounter = 0; Time.timeScale = 0; QAIManager.RunCoroutine(PrioritySweeping ? RunPriotizedTraining(ts) : RunTraining(ts)); } return(a.Action); }
public override bool Equals(object obj) { return(state.Equals(((QStateActionPair)obj).state) && action == ((QStateActionPair)obj).action); }
public bool Equals(SARS other) { return(State.Equals(other.State) && NextState.Equals(other.NextState) && Action.Equals(other.Action)); }