Exemplo n.º 1
0
        public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore)
        {
            this.learn = learn; this.discount = discount; this.explore = explore;
            decimal score        = 0;
            int     actionsTaken = 0;

            while (!currentState.IsEnd() && GetOutcomes(currentState).Count > 0 && isRunning)
            {
                actionsTaken++;
                QAction a;
                bool    exp;
                if (explore > 0 && (decimal)random.NextDouble() <= explore)
                {
                    a   = GetRandomAction(currentState);
                    exp = true;
                }
                else
                {
                    a   = GetBestAction(currentState);
                    exp = false;
                }
                QState newState = currentState.GetNewState(a);
                WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " " + (exp ? "Explore" : "Action") + ": '" + a + "' @ " + currentState.ToString());
                newState.Inherit(currentState);
                newState.Step();
                decimal r = GetReward(currentState, newState);
                score += r;
                QUpdate(actionsTaken, currentState, a, newState, r);
                WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " Gain " + Math.Round(r, 4) + ",  Total " + Math.Round(score, 4));

                foreach (KeyValuePair <QStateActionPair, QState> kv in newState.GetObservedStates(currentState, a))
                {
                    QState  observedPriorState = kv.Key.state;
                    QAction observedAction     = kv.Key.action;
                    QState  observedState      = kv.Value;
                    decimal observedR          = GetReward(observedPriorState, observedState);
                    QUpdate(actionsTaken, observedPriorState, observedAction, observedState, observedR);
                    WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " Observed: '" + observedAction + "' @ " + observedPriorState.ToString() + " | Gain " + Math.Round(observedR, 4));
                }

                currentState = newState;
            }
            if (isRunning)
            {
                WriteOutput("Trial " + trialNum + ": " + Math.Round(score, 4) + " in " + actionsTaken + " step" + (actionsTaken == 1 ? "" : "s") + ".");
            }
            return(currentState);
        }