protected virtual PartiallyObservableState GetRandomNextState(PartiallyObservableState initialState, IAction action) { var transitionProbabilityTotal = 0d; var possibleTransitions = this.stateTransitionProbabilities[initialState][action]; //gets a transition probability var transitionProbability = this.GetSmallValue(); //goes through the possible state transitions until one probability fits the accumulated probability var numTransitions = 0; foreach (var transition in possibleTransitions) { transitionProbabilityTotal += transition.Value; if ((++numTransitions == possibleTransitions.Count) || (transitionProbability <= transitionProbabilityTotal)) { return(transition.Key); } } //should not return null return(null); }
public override void Reset() { base.Reset(); //only reset current state, state transitions and reward table maintains this.currentState = null; }
public virtual double GetStateActionReward(PartiallyObservableState state, IAction action) { //returns the state-action reward from the reward table if exists, 0 otherwise return((state == null) || (action == null) || !this.rewardTable.ContainsKey(state) || !this.rewardTable[state].ContainsKey(action) ? 0 : this.rewardTable[state][action]); }
public override void Dispose() { base.Dispose(); this.rewardTable.Clear(); this.poStates.Clear(); this.stateTransitionProbabilities.Clear(); this.observableStates.Clear(); this.nonObservableStates.Clear(); this.currentState = null; }
public override IState GetUpdatedCurrentState() { //gets random next state according to transition table, previous state and chosen action var action = this.ShortTermMemory.CurrentAction; this.currentState = (this.currentState == null) ? this.GetRandomState() : this.GetRandomNextState(this.currentState, action); //only the observable part of the state remains "visible" to the system return(this.currentState.ObservablePart); }
protected virtual PartiallyObservableState GetNewTransition(PartiallyObservableState initialState, IAction action) { //gets a new final state for the given state //which is different from other initial state transitions and from initial state itself PartiallyObservableState finalState = null; while ((finalState == null) || (finalState == initialState) || (this.stateTransitionProbabilities[initialState][action].ContainsKey(finalState))) { finalState = this.poStates[Rand.Next(this.poStates.Count)]; } return(finalState); }