Exemplo n.º 1
0
        public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore)
        {
            QSearch       qsearch = new QSearch(this);
            QSearchResult actions = qsearch.Depth_First(currentState, true);

            if (actions != null)
            {
                foreach (QAction action in actions.actionsList)
                {
                    if (!currentState.IsEnd() && isRunning && currentState.GetActions().Contains(action))
                    {
                        WriteOutput(currentState + ": " + action);
                        QState newState = currentState.GetNewState(action);
                        newState.Inherit(currentState);
                        newState.Step();
                        currentState = newState;
                    }
                }
                if (currentState.IsEnd())
                {
                    WriteOutput(currentState + ": End");
                }
                else
                {
                    WriteOutput("Existing solution no longer applicable.  Re-solving...");
                    return(Run(currentState, trialNum, learn, discount, explore));
                }
            }
            else
            {
                WriteOutput("No solution found.", true);
            }
            return(currentState);
        }
Exemplo n.º 2
0
 // This runs a single trial/instance of the QState problem.  QLearner will automatically run many times for learning or once to apply what has been learned.
 // Must return the final state
 public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore)
 {
     while (!currentState.IsEnd() && currentState.GetActions().Length > 0 && isRunning)
     {
         QAction action   = currentState.GetActions().ElementAt(r.Next(currentState.GetActions().Length));
         QState  newState = currentState.GetNewState(action);
         newState.Inherit(currentState);
         newState.Step();
         WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + ": '" + action + "' @ " + currentState.ToString());
         currentState = newState;
     }
     return(currentState);
 }
Exemplo n.º 3
0
        public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore)
        {
            this.learn = learn; this.discount = discount; this.explore = explore;
            decimal score        = 0;
            int     actionsTaken = 0;

            while (!currentState.IsEnd() && GetOutcomes(currentState).Count > 0 && isRunning)
            {
                actionsTaken++;
                QAction a;
                bool    exp;
                if (explore > 0 && (decimal)random.NextDouble() <= explore)
                {
                    a   = GetRandomAction(currentState);
                    exp = true;
                }
                else
                {
                    a   = GetBestAction(currentState);
                    exp = false;
                }
                QState newState = currentState.GetNewState(a);
                WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " " + (exp ? "Explore" : "Action") + ": '" + a + "' @ " + currentState.ToString());
                newState.Inherit(currentState);
                newState.Step();
                decimal r = GetReward(currentState, newState);
                score += r;
                QUpdate(actionsTaken, currentState, a, newState, r);
                WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " Gain " + Math.Round(r, 4) + ",  Total " + Math.Round(score, 4));

                foreach (KeyValuePair <QStateActionPair, QState> kv in newState.GetObservedStates(currentState, a))
                {
                    QState  observedPriorState = kv.Key.state;
                    QAction observedAction     = kv.Key.action;
                    QState  observedState      = kv.Value;
                    decimal observedR          = GetReward(observedPriorState, observedState);
                    QUpdate(actionsTaken, observedPriorState, observedAction, observedState, observedR);
                    WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " Observed: '" + observedAction + "' @ " + observedPriorState.ToString() + " | Gain " + Math.Round(observedR, 4));
                }

                currentState = newState;
            }
            if (isRunning)
            {
                WriteOutput("Trial " + trialNum + ": " + Math.Round(score, 4) + " in " + actionsTaken + " step" + (actionsTaken == 1 ? "" : "s") + ".");
            }
            return(currentState);
        }