// This runs a single trial/instance of the QState problem. QLearner will automatically run many times for learning or once to apply what has been learned. // Must return the final state public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore) { while (!currentState.IsEnd() && currentState.GetActions().Length > 0 && isRunning) { QAction action = currentState.GetActions().ElementAt(r.Next(currentState.GetActions().Length)); QState newState = currentState.GetNewState(action); newState.Inherit(currentState); newState.Step(); WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + ": '" + action + "' @ " + currentState.ToString()); currentState = newState; } return currentState; }
public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore) { this.learn = learn; this.discount = discount; this.explore = explore; decimal score = 0; int actionsTaken = 0; while (!currentState.IsEnd() && GetOutcomes(currentState).Count > 0 && isRunning) { actionsTaken++; QAction a; bool exp; if (explore > 0 && (decimal)random.NextDouble() <= explore) { a = GetRandomAction(currentState); exp = true; } else { a = GetBestAction(currentState); exp = false; } QState newState = currentState.GetNewState(a); WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " " + (exp ? "Explore" : "Action") + ": '" + a + "' @ " + currentState.ToString()); newState.Inherit(currentState); newState.Step(); decimal r = GetReward(currentState, newState); score += r; QUpdate(actionsTaken, currentState, a, newState, r); WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " Gain " + Math.Round(r, 4) + ", Total " + Math.Round(score, 4)); foreach (KeyValuePair<QStateActionPair, QState> kv in newState.GetObservedStates(currentState, a)) { QState observedPriorState = kv.Key.state; QAction observedAction = kv.Key.action; QState observedState = kv.Value; decimal observedR = GetReward(observedPriorState, observedState); QUpdate(actionsTaken, observedPriorState, observedAction, observedState, observedR); WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " Observed: '" + observedAction + "' @ " + observedPriorState.ToString() + " | Gain " + Math.Round(observedR, 4)); } currentState = newState; } if (isRunning) { WriteOutput("Trial " + trialNum + ": " + Math.Round(score, 4) + " in " + actionsTaken + " step" + (actionsTaken == 1 ? "" : "s") + "."); } return currentState; }
public void UpdateLearningTable(int n, QState s, QAction a, decimal qv) { if (InvokeRequired) { Invoke(new UpdateLearningTableD(UpdateLearningTable), n, s, a, qv); } else { QStateActionPair p = new QStateActionPair(s, a); if (LearningTableQStateKeys.ContainsKey(p)) { LearningTableQStateKeys[p].Cells["Num"].Value = n; LearningTableQStateKeys[p].Cells["QValue"].Value = qv; } else { LearningTable.Rows.Add(n, s.ToString(), a.ToString(), qv); LearningTableQStateKeys[p] = LearningTable.Rows[LearningTable.Rows.Count - 1];; } } }
public void UpdateLearningTable(int n, QState s, QAction a, decimal qv) { if (InvokeRequired) { Invoke(new UpdateLearningTableD(UpdateLearningTable), n, s, a, qv); } else { QStateActionPair p = new QStateActionPair(s, a); if (LearningTableQStateKeys.ContainsKey(p)) { LearningTableQStateKeys[p].Cells["Num"].Value = n; LearningTableQStateKeys[p].Cells["QValue"].Value = qv; } else { LearningTable.Rows.Add(n, s.ToString(), a.ToString(), qv); LearningTableQStateKeys[p] = LearningTable.Rows[LearningTable.Rows.Count - 1]; ; } } }
public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore) { this.learn = learn; this.discount = discount; this.explore = explore; decimal score = 0; int actionsTaken = 0; while (!currentState.IsEnd() && GetOutcomes(currentState).Count > 0 && isRunning) { actionsTaken++; QAction a; bool exp; if (explore > 0 && (decimal)random.NextDouble() <= explore) { a = GetRandomAction(currentState); exp = true; } else { a = GetBestAction(currentState); exp = false; } QState newState = currentState.GetNewState(a); WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " " + (exp ? "Explore" : "Action") + ": '" + a + "' @ " + currentState.ToString()); newState.Inherit(currentState); newState.Step(); decimal r = GetReward(currentState, newState); score += r; QUpdate(actionsTaken, currentState, a, newState, r); WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " Gain " + Math.Round(r, 4) + ", Total " + Math.Round(score, 4)); foreach (KeyValuePair <QStateActionPair, QState> kv in newState.GetObservedStates(currentState, a)) { QState observedPriorState = kv.Key.state; QAction observedAction = kv.Key.action; QState observedState = kv.Value; decimal observedR = GetReward(observedPriorState, observedState); QUpdate(actionsTaken, observedPriorState, observedAction, observedState, observedR); WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " Observed: '" + observedAction + "' @ " + observedPriorState.ToString() + " | Gain " + Math.Round(observedR, 4)); } currentState = newState; } if (isRunning) { WriteOutput("Trial " + trialNum + ": " + Math.Round(score, 4) + " in " + actionsTaken + " step" + (actionsTaken == 1 ? "" : "s") + "."); } return(currentState); }
protected override void TransitionTo(QState targetState) { if (_workcell.Process.IsStepMode && !_stepModeToggle && !_isNonIdleProcess && !_workcell.Process.IsInit && this.CurrentNestedStateName.Contains("[StateRun]")) { if (HSTMachine.Workcell.HSTSettings.Install.EnableDebugLog) { Log.Info(this, "{0}, ProcessName:{1}, StateName:TransitionTo State:{2}, Step mode at:{3}", LoggerCategory.StateTransition, _processName, targetState.ToString(), targetState.Method.Name); } _workcell.Process.Stop(); _stepModeToggle = true; } else { _stepModeToggle = false; } base.TransitionTo(targetState); }
public override string ToString() { return(string.Join("_", new string[] { state.ToString(), action.ToString() })); }
// This runs a single trial/instance of the QState problem. QLearner will automatically run many times for learning or once to apply what has been learned. // Must return the final state public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore) { while (!currentState.IsEnd() && currentState.GetActions().Length > 0 && isRunning) { QAction action = currentState.GetActions().ElementAt(r.Next(currentState.GetActions().Length)); QState newState = currentState.GetNewState(action); newState.Inherit(currentState); newState.Step(); WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + ": '" + action + "' @ " + currentState.ToString()); currentState = newState; } return(currentState); }