public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore) { QSearch qsearch = new QSearch(this); QSearchResult actions = qsearch.Depth_First(currentState, true); if (actions != null) { foreach (QAction action in actions.actionsList) { if (!currentState.IsEnd() && isRunning && currentState.GetActions().Contains(action)) { WriteOutput(currentState + ": " + action); QState newState = currentState.GetNewState(action); newState.Inherit(currentState); newState.Step(); currentState = newState; } } if (currentState.IsEnd()) { WriteOutput(currentState + ": End"); } else { WriteOutput("Existing solution no longer applicable. Re-solving..."); return(Run(currentState, trialNum, learn, discount, explore)); } } else { WriteOutput("No solution found.", true); } return(currentState); }
// This runs a single trial/instance of the QState problem. QLearner will automatically run many times for learning or once to apply what has been learned. // Must return the final state public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore) { while (!currentState.IsEnd() && currentState.GetActions().Length > 0 && isRunning) { QAction action = currentState.GetActions().ElementAt(r.Next(currentState.GetActions().Length)); QState newState = currentState.GetNewState(action); newState.Inherit(currentState); newState.Step(); WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + ": '" + action + "' @ " + currentState.ToString()); currentState = newState; } return(currentState); }
public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore) { this.learn = learn; this.discount = discount; this.explore = explore; decimal score = 0; int actionsTaken = 0; while (!currentState.IsEnd() && GetOutcomes(currentState).Count > 0 && isRunning) { actionsTaken++; QAction a; bool exp; if (explore > 0 && (decimal)random.NextDouble() <= explore) { a = GetRandomAction(currentState); exp = true; } else { a = GetBestAction(currentState); exp = false; } QState newState = currentState.GetNewState(a); WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " " + (exp ? "Explore" : "Action") + ": '" + a + "' @ " + currentState.ToString()); newState.Inherit(currentState); newState.Step(); decimal r = GetReward(currentState, newState); score += r; QUpdate(actionsTaken, currentState, a, newState, r); WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " Gain " + Math.Round(r, 4) + ", Total " + Math.Round(score, 4)); foreach (KeyValuePair <QStateActionPair, QState> kv in newState.GetObservedStates(currentState, a)) { QState observedPriorState = kv.Key.state; QAction observedAction = kv.Key.action; QState observedState = kv.Value; decimal observedR = GetReward(observedPriorState, observedState); QUpdate(actionsTaken, observedPriorState, observedAction, observedState, observedR); WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " Observed: '" + observedAction + "' @ " + observedPriorState.ToString() + " | Gain " + Math.Round(observedR, 4)); } currentState = newState; } if (isRunning) { WriteOutput("Trial " + trialNum + ": " + Math.Round(score, 4) + " in " + actionsTaken + " step" + (actionsTaken == 1 ? "" : "s") + "."); } return(currentState); }