Exemplo n.º 1
0
 // This runs a single trial/instance of the QState problem.  QLearner will automatically run many times for learning or once to apply what has been learned.
 // Must return the final state
 public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore)
 {
      while (!currentState.IsEnd() && currentState.GetActions().Length > 0 && isRunning)
      {
          QAction action = currentState.GetActions().ElementAt(r.Next(currentState.GetActions().Length));
         QState newState = currentState.GetNewState(action);
         newState.Inherit(currentState);
         newState.Step();
         WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + ": '" + action + "' @ " + currentState.ToString());
         currentState = newState;
     }
     return currentState;
 }
Exemplo n.º 2
0
        public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore)
        {
            this.learn = learn; this.discount = discount; this.explore = explore;
            decimal score = 0;
            int actionsTaken = 0;
            while (!currentState.IsEnd() && GetOutcomes(currentState).Count > 0 && isRunning)
            {
                actionsTaken++;
                QAction a;
                bool exp;
                if (explore > 0 && (decimal)random.NextDouble() <= explore)
                {
                    a = GetRandomAction(currentState);
                    exp = true;
                }
                else
                {
                    a = GetBestAction(currentState);
                    exp = false;
                }
                QState newState = currentState.GetNewState(a);
                WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " " + (exp ? "Explore" : "Action") + ": '" + a + "' @ " + currentState.ToString());
                newState.Inherit(currentState);
                newState.Step();
                decimal r = GetReward(currentState, newState);
                score += r;
                QUpdate(actionsTaken, currentState, a, newState, r);
                WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " Gain " + Math.Round(r, 4) + ",  Total " + Math.Round(score, 4));
                
                foreach (KeyValuePair<QStateActionPair, QState> kv in newState.GetObservedStates(currentState, a))
                {
                    QState observedPriorState = kv.Key.state;
                    QAction observedAction = kv.Key.action;
                    QState observedState = kv.Value;
                    decimal observedR = GetReward(observedPriorState, observedState);
                    QUpdate(actionsTaken, observedPriorState, observedAction, observedState, observedR);
                    WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " Observed: '" + observedAction + "' @ " + observedPriorState.ToString() + " | Gain " + Math.Round(observedR, 4));
                }

                currentState = newState;
            }
            if (isRunning)
            {
                WriteOutput("Trial " + trialNum + ": " + Math.Round(score, 4) + " in " + actionsTaken + " step" + (actionsTaken == 1 ? "" : "s") + ".");
            }
            return currentState;
        }
Exemplo n.º 3
0
 public void UpdateLearningTable(int n, QState s, QAction a, decimal qv)
 {
     if (InvokeRequired)
     {
         Invoke(new UpdateLearningTableD(UpdateLearningTable), n, s, a, qv);
     }
     else
     {
         QStateActionPair p = new QStateActionPair(s, a);
         if (LearningTableQStateKeys.ContainsKey(p))
         {
             LearningTableQStateKeys[p].Cells["Num"].Value    = n;
             LearningTableQStateKeys[p].Cells["QValue"].Value = qv;
         }
         else
         {
             LearningTable.Rows.Add(n, s.ToString(), a.ToString(), qv);
             LearningTableQStateKeys[p] = LearningTable.Rows[LearningTable.Rows.Count - 1];;
         }
     }
 }
Exemplo n.º 4
0
 public void UpdateLearningTable(int n, QState s, QAction a, decimal qv)
 {
     if (InvokeRequired)
     {
         Invoke(new UpdateLearningTableD(UpdateLearningTable), n, s, a, qv);
     }
     else
     {
         QStateActionPair p = new QStateActionPair(s, a);
         if (LearningTableQStateKeys.ContainsKey(p))
         {
             LearningTableQStateKeys[p].Cells["Num"].Value = n;
             LearningTableQStateKeys[p].Cells["QValue"].Value = qv;
         }
         else
         {
             LearningTable.Rows.Add(n, s.ToString(), a.ToString(), qv);
             LearningTableQStateKeys[p] = LearningTable.Rows[LearningTable.Rows.Count - 1]; ;
         }
     }
 }
Exemplo n.º 5
0
        public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore)
        {
            this.learn = learn; this.discount = discount; this.explore = explore;
            decimal score        = 0;
            int     actionsTaken = 0;

            while (!currentState.IsEnd() && GetOutcomes(currentState).Count > 0 && isRunning)
            {
                actionsTaken++;
                QAction a;
                bool    exp;
                if (explore > 0 && (decimal)random.NextDouble() <= explore)
                {
                    a   = GetRandomAction(currentState);
                    exp = true;
                }
                else
                {
                    a   = GetBestAction(currentState);
                    exp = false;
                }
                QState newState = currentState.GetNewState(a);
                WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " " + (exp ? "Explore" : "Action") + ": '" + a + "' @ " + currentState.ToString());
                newState.Inherit(currentState);
                newState.Step();
                decimal r = GetReward(currentState, newState);
                score += r;
                QUpdate(actionsTaken, currentState, a, newState, r);
                WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " Gain " + Math.Round(r, 4) + ",  Total " + Math.Round(score, 4));

                foreach (KeyValuePair <QStateActionPair, QState> kv in newState.GetObservedStates(currentState, a))
                {
                    QState  observedPriorState = kv.Key.state;
                    QAction observedAction     = kv.Key.action;
                    QState  observedState      = kv.Value;
                    decimal observedR          = GetReward(observedPriorState, observedState);
                    QUpdate(actionsTaken, observedPriorState, observedAction, observedState, observedR);
                    WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " Observed: '" + observedAction + "' @ " + observedPriorState.ToString() + " | Gain " + Math.Round(observedR, 4));
                }

                currentState = newState;
            }
            if (isRunning)
            {
                WriteOutput("Trial " + trialNum + ": " + Math.Round(score, 4) + " in " + actionsTaken + " step" + (actionsTaken == 1 ? "" : "s") + ".");
            }
            return(currentState);
        }
Exemplo n.º 6
0
 protected override void TransitionTo(QState targetState)
 {
     if (_workcell.Process.IsStepMode &&
         !_stepModeToggle &&
         !_isNonIdleProcess && !_workcell.Process.IsInit &&
         this.CurrentNestedStateName.Contains("[StateRun]"))
     {
         if (HSTMachine.Workcell.HSTSettings.Install.EnableDebugLog)
         {
             Log.Info(this, "{0}, ProcessName:{1}, StateName:TransitionTo State:{2}, Step mode at:{3}", LoggerCategory.StateTransition, _processName, targetState.ToString(), targetState.Method.Name);
         }
         _workcell.Process.Stop();
         _stepModeToggle = true;
     }
     else
     {
         _stepModeToggle = false;
     }
     base.TransitionTo(targetState);
 }
Exemplo n.º 7
0
 public override string ToString()
 {
     return(string.Join("_", new string[] { state.ToString(), action.ToString() }));
 }
Exemplo n.º 8
0
 // This runs a single trial/instance of the QState problem.  QLearner will automatically run many times for learning or once to apply what has been learned.
 // Must return the final state
 public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore)
 {
     while (!currentState.IsEnd() && currentState.GetActions().Length > 0 && isRunning)
     {
         QAction action   = currentState.GetActions().ElementAt(r.Next(currentState.GetActions().Length));
         QState  newState = currentState.GetNewState(action);
         newState.Inherit(currentState);
         newState.Step();
         WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + ": '" + action + "' @ " + currentState.ToString());
         currentState = newState;
     }
     return(currentState);
 }