Beispiel #1
        double MaxQ(string stateName)
            const double defaultValue = 0;

            if (!StateLookup.ContainsKey(stateName))

            QState state            = StateLookup[stateName];
            var    actionsFromState = state.Actions;
            double?maxValue         = null;

            foreach (var nextState in actionsFromState)
                foreach (var actionResult in nextState.ActionsResult)
                    double value = actionResult.QEstimated;
                    if (value > maxValue || !maxValue.HasValue)
                        maxValue = value;

            // no update
            if (!maxValue.HasValue && ShowWarning)
                QMethod.Log(string.Format("Warning: No MaxQ value for stateName {0}",

            return(maxValue.HasValue ? maxValue.Value : defaultValue);
Beispiel #2
 public string GetActionName()
     if (To == null)
     return(QMethod.ActionNameFromTo(From, To));
Beispiel #3
        public void RunTraining()

             * For each episode: Select random initial state
             * Do while not reach goal state
             *  Select one among all possible actions for the current state
             *  Using this possible action, consider to go to the next state
             *  Get maximum Q value of this next state based on all possible actions
             *  Set the next state as the current state

            // For each episode
            var  rand = new Random();
            long maxloopEventCount = 0;

            // Train episodes
            for (long i = 0; i < Episodes; i++)
                long maxloop = 0;
                // Select random initial state
                int     stateIndex = rand.Next(States.Count);
                QState  state      = States[stateIndex];
                QAction action     = null;
                    if (++maxloop > MaxExploreStepsWithinOneEpisode)
                        if (ShowWarning)
                            string msg = string.Format(
                                "{0} !! MAXLOOP state: {1} action: {2}, {3} endstate is to difficult to reach?",
                                ++maxloopEventCount, state, action, "maybe your path setup is wrong or the ");


                    // no actions, skip this state
                    if (state.Actions.Count == 0)

                    // Selection strategy is random based on probability
                    int index = rand.Next(state.Actions.Count);
                    action = state.Actions[index];

                    // Using this possible action, consider to go to the next state
                    // Pick random Action outcome
                    QActionResult nextStateResult = action.PickActionByProbability();
                    string        nextStateName   = nextStateResult.StateName;

                    double q    = nextStateResult.QEstimated;
                    double r    = nextStateResult.Reward;
                    double maxQ = MaxQ(nextStateName);

                    // Q(s,a)= Q(s,a) + alpha * (R(s,a) + gamma * Max(next state, all actions) - Q(s,a))
                    double value = q + Alpha * (r + Gamma * maxQ - q); // q-learning
                    nextStateResult.QValue = value;                    // update

                    // is end state go to next episode
                    if (EndStates.Contains(nextStateResult.StateName))

                    // Set the next state as the current state
                    state = StateLookup[nextStateResult.StateName];
                } while (true);