protected override void Transaction(IQState FromState, IQAction NextAction, Decimal Reward, IQState NextState) { if (this.LearnFlags[FromState][NextAction]) { this.AttemptedUpdates[FromState][NextAction] += (Reward + (this.Gamma * GetMaxQ(NextState))); this.NumSamplesInNextUpdate[FromState][NextAction] += 1; if (this.NumSamplesInNextUpdate[FromState][NextAction] >= this.NumSamplesNeeded) { if ((this.QValues[FromState][NextAction] - (this.AttemptedUpdates[FromState][NextAction] / this.NumSamplesNeeded)) >= (2.0M * this.Epsilon1)) { this.QValues[FromState][NextAction] = (this.AttemptedUpdates[FromState][NextAction] / this.NumSamplesNeeded) + this.Epsilon1; this.TimeOfMostRecentQChange = this.Time; } else if (this.TimeOfLastAttemptedUpdate[FromState][NextAction] >= this.TimeOfMostRecentQChange) { this.LearnFlags[FromState][NextAction] = false; } this.TimeOfLastAttemptedUpdate[FromState][NextAction] = this.Time; this.AttemptedUpdates[FromState][NextAction] = 0.0M; this.NumSamplesInNextUpdate[FromState][NextAction] = 0; } } else if (this.TimeOfLastAttemptedUpdate[FromState][NextAction] < this.TimeOfMostRecentQChange) { this.LearnFlags[FromState][NextAction] = true; } }
protected void GetMax(IQState State, out Decimal V, out IQAction NextAction) { Decimal maxQ = Decimal.MinValue; IQAction maxAction = null; // TODO: there is a potential optimisation using a priority queue foreach (IQAction action in this.Actions) { if (this.QValues[State][action] > maxQ) { maxQ = this.QValues[State][action]; maxAction = action; } } V = maxQ; if (maxQ != this.InitialQ) { NextAction = maxAction; } else { NextAction = this.Actions[this.Time % this.Actions.Count]; } }
protected decimal GetMaxQ(IQState State) { decimal v; IQAction action; GetMax(State, out v, out action); return(v); }
protected IQAction GetMaxAction(IQState State) { decimal v; IQAction action; GetMax(State, out v, out action); return(action); }
protected override void Transaction(IQState FromState, IQAction NextAction, Decimal Reward, IQState NextState) { decimal total = (Reward + (this.Gamma * GetMaxQ(NextState))); ++this.NumQ[FromState][NextAction]; decimal alphaQ = GetAlphaQ(this.NumQ[FromState][NextAction]); this.QValues[FromState][NextAction] = ((1.0M - alphaQ) * this.QValues[FromState][NextAction]) + (GetAlphaQ(this.NumQ[FromState][NextAction]) * (Reward + (this.Gamma * GetMaxQ(NextState)))); }
public void PerformTransition(IQState FromState) { ++Time; IQAction nextAction = GetMaxAction(FromState); Decimal reward; IQState nextState; this.Model.ProcessStateAction(FromState, nextAction, out reward, out nextState); Transaction(FromState, nextAction, reward, nextState); }
protected abstract void Transaction(IQState FromState, IQAction NextAction, Decimal Reward, IQState NextState);