public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore) { this.learn = learn; this.discount = discount; this.explore = explore; decimal score = 0; int actionsTaken = 0; while (!currentState.IsEnd() && GetOutcomes(currentState).Count > 0 && isRunning) { actionsTaken++; QAction a; bool exp; if (explore > 0 && (decimal)random.NextDouble() <= explore) { a = GetRandomAction(currentState); exp = true; } else { a = GetBestAction(currentState); exp = false; } QState newState = currentState.GetNewState(a); WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " " + (exp ? "Explore" : "Action") + ": '" + a + "' @ " + currentState.ToString()); newState.Inherit(currentState); newState.Step(); decimal r = GetReward(currentState, newState); score += r; QUpdate(actionsTaken, currentState, a, newState, r); WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " Gain " + Math.Round(r, 4) + ", Total " + Math.Round(score, 4)); foreach (KeyValuePair <QStateActionPair, QState> kv in newState.GetObservedStates(currentState, a)) { QState observedPriorState = kv.Key.state; QAction observedAction = kv.Key.action; QState observedState = kv.Value; decimal observedR = GetReward(observedPriorState, observedState); QUpdate(actionsTaken, observedPriorState, observedAction, observedState, observedR); WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " Observed: '" + observedAction + "' @ " + observedPriorState.ToString() + " | Gain " + Math.Round(observedR, 4)); } currentState = newState; } if (isRunning) { WriteOutput("Trial " + trialNum + ": " + Math.Round(score, 4) + " in " + actionsTaken + " step" + (actionsTaken == 1 ? "" : "s") + "."); } return(currentState); }