public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore) { QSearch qsearch = new QSearch(this); QSearchResult actions = qsearch.Breadth_First(currentState, true); if (actions != null) { foreach (QAction action in actions.actionsList) { if (!currentState.IsEnd() && isRunning && currentState.GetActions().Contains(action)) { WriteOutput(currentState + ": " + action); QState newState = currentState.GetNewState(action); newState.Inherit(currentState); newState.Step(); currentState = newState; } } if (currentState.IsEnd()) WriteOutput(currentState + ": End"); else { WriteOutput("Existing solution no longer applicable. Re-solving..."); return Run(currentState, trialNum, learn, discount, explore); } } else WriteOutput("No solution found.", true); return currentState; }
public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore) { QSearch qsearch = new QSearch(this); QSearchResult actions = qsearch.Depth_First(currentState, true); if (actions != null) { foreach (QAction action in actions.actionsList) { if (!currentState.IsEnd() && isRunning && currentState.GetActions().Contains(action)) { WriteOutput(currentState + ": " + action); QState newState = currentState.GetNewState(action); newState.Inherit(currentState); newState.Step(); currentState = newState; } } if (currentState.IsEnd()) { WriteOutput(currentState + ": End"); } else { WriteOutput("Existing solution no longer applicable. Re-solving..."); return(Run(currentState, trialNum, learn, discount, explore)); } } else { WriteOutput("No solution found.", true); } return(currentState); }
public QSearchResult Breadth_First(QState startState, bool output = false) { HashSet <QState> explored = new HashSet <QState>(); Dictionary <QState, decimal> bestSoFar = new Dictionary <QState, decimal>() { { startState, 0 } }; Queue <QState> toDo = new Queue <QState>(); toDo.Enqueue(startState); Dictionary <QState, QSearchResult> pathTo = new Dictionary <QState, QSearchResult>() { { startState, new QSearchResult() } }; if (output) { WriteOutput("Searching for shortest path via Breadth-First Search..."); } int steps = 0; while (toDo.Any() && isRunning) { steps++; QState current = toDo.Dequeue(); if (current.IsEnd()) { if (output) { WriteOutput("Shortest path of " + pathTo[current].Count + " step(s) found after " + steps + " iteration(s)."); } return(pathTo[current]); } else { explored.Add(current); foreach (QAction action in current.GetActions()) { QState newState = current.GetNewState(action); if (!explored.Contains(newState)) { decimal actualCost = bestSoFar[current] - current.GetValue(); if (!bestSoFar.ContainsKey(newState) || actualCost < bestSoFar[newState]) { pathTo[newState] = new QSearchResult(pathTo[current]); pathTo[newState].actionsList.Add(action); pathTo[newState].QStatesList.Add(newState); bestSoFar[newState] = actualCost; toDo.Enqueue(newState); } } } } } if (output) { WriteOutput("No path found after " + steps + " iteration(s)."); } return(null); }
public QSearchResult AStar(QState startState, bool output = false, int maxQueue = 1000) { HashSet <QState> explored = new HashSet <QState>(); Dictionary <QState, decimal> bestSoFar = new Dictionary <QState, decimal>() { { startState, 0 } }; HeapPriorityQueue <QStateContainer> toDo = new HeapPriorityQueue <QStateContainer>(maxQueue); toDo.Enqueue(new QStateContainer(startState), 0); Dictionary <QState, QSearchResult> pathTo = new Dictionary <QState, QSearchResult>() { { startState, new QSearchResult() } }; if (output) { WriteOutput("Searching for shortest path via A-Star Search..."); } int steps = 0; while (toDo.Count > 0 && isRunning) { steps++; QState current = toDo.Dequeue().qstate; if (current.IsEnd()) { if (output) { WriteOutput("Shortest path of " + pathTo[current].Count + " step(s) found after " + steps + " iteration(s)."); } return(pathTo[current]); } else { explored.Add(current); foreach (QAction action in current.GetActions()) { QState newState = current.GetNewState(action); if (!explored.Contains(newState)) { decimal actualCost = bestSoFar[current] - current.GetValue(); if (!bestSoFar.ContainsKey(newState) || actualCost < bestSoFar[newState]) { pathTo[newState] = new QSearchResult(pathTo[current]); pathTo[newState].actionsList.Add(action); pathTo[newState].QStatesList.Add(newState); bestSoFar[newState] = actualCost; toDo.Enqueue(new QStateContainer(newState), bestSoFar[newState] - 1 * newState.GetValueHeuristic()); } } } } } if (output) { WriteOutput("No path found after " + steps + " iteration(s)."); } return(null); }
// This runs a single trial/instance of the QState problem. QLearner will automatically run many times for learning or once to apply what has been learned. // Must return the final state public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore) { while (!currentState.IsEnd() && currentState.GetActions().Length > 0 && isRunning) { QAction action = currentState.GetActions().ElementAt(r.Next(currentState.GetActions().Length)); QState newState = currentState.GetNewState(action); newState.Inherit(currentState); newState.Step(); WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + ": '" + action + "' @ " + currentState.ToString()); currentState = newState; } return currentState; }
// This runs a single trial/instance of the QState problem. QLearner will automatically run many times for learning or once to apply what has been learned. // Must return the final state public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore) { while (!currentState.IsEnd() && currentState.GetActions().Length > 0 && isRunning) { QAction action = currentState.GetActions().ElementAt(r.Next(currentState.GetActions().Length)); QState newState = currentState.GetNewState(action); newState.Inherit(currentState); newState.Step(); WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + ": '" + action + "' @ " + currentState.ToString()); currentState = newState; } return(currentState); }
public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore) { this.learn = learn; this.discount = discount; this.explore = explore; decimal score = 0; int actionsTaken = 0; while (!currentState.IsEnd() && GetOutcomes(currentState).Count > 0 && isRunning) { actionsTaken++; QAction a; bool exp; if (explore > 0 && (decimal)random.NextDouble() <= explore) { a = GetRandomAction(currentState); exp = true; } else { a = GetBestAction(currentState); exp = false; } QState newState = currentState.GetNewState(a); WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " " + (exp ? "Explore" : "Action") + ": '" + a + "' @ " + currentState.ToString()); newState.Inherit(currentState); newState.Step(); decimal r = GetReward(currentState, newState); score += r; QUpdate(actionsTaken, currentState, a, newState, r); WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " Gain " + Math.Round(r, 4) + ", Total " + Math.Round(score, 4)); foreach (KeyValuePair <QStateActionPair, QState> kv in newState.GetObservedStates(currentState, a)) { QState observedPriorState = kv.Key.state; QAction observedAction = kv.Key.action; QState observedState = kv.Value; decimal observedR = GetReward(observedPriorState, observedState); QUpdate(actionsTaken, observedPriorState, observedAction, observedState, observedR); WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " Observed: '" + observedAction + "' @ " + observedPriorState.ToString() + " | Gain " + Math.Round(observedR, 4)); } currentState = newState; } if (isRunning) { WriteOutput("Trial " + trialNum + ": " + Math.Round(score, 4) + " in " + actionsTaken + " step" + (actionsTaken == 1 ? "" : "s") + "."); } return(currentState); }
public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore) { this.learn = learn; this.discount = discount; this.explore = explore; decimal score = 0; int actionsTaken = 0; while (!currentState.IsEnd() && GetOutcomes(currentState).Count > 0 && isRunning) { actionsTaken++; QAction a; bool exp; if (explore > 0 && (decimal)random.NextDouble() <= explore) { a = GetRandomAction(currentState); exp = true; } else { a = GetBestAction(currentState); exp = false; } QState newState = currentState.GetNewState(a); WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " " + (exp ? "Explore" : "Action") + ": '" + a + "' @ " + currentState.ToString()); newState.Inherit(currentState); newState.Step(); decimal r = GetReward(currentState, newState); score += r; QUpdate(actionsTaken, currentState, a, newState, r); WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " Gain " + Math.Round(r, 4) + ", Total " + Math.Round(score, 4)); foreach (KeyValuePair<QStateActionPair, QState> kv in newState.GetObservedStates(currentState, a)) { QState observedPriorState = kv.Key.state; QAction observedAction = kv.Key.action; QState observedState = kv.Value; decimal observedR = GetReward(observedPriorState, observedState); QUpdate(actionsTaken, observedPriorState, observedAction, observedState, observedR); WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " Observed: '" + observedAction + "' @ " + observedPriorState.ToString() + " | Gain " + Math.Round(observedR, 4)); } currentState = newState; } if (isRunning) { WriteOutput("Trial " + trialNum + ": " + Math.Round(score, 4) + " in " + actionsTaken + " step" + (actionsTaken == 1 ? "" : "s") + "."); } return currentState; }