Exemple #1
0
        public void RunTraining()
        {
            QMethod.Validate(this);

            /*
             * For each episode: Select random initial state
             * Do while not reach goal state
             *  Select one among all possible actions for the current state
             *  Using this possible action, consider to go to the next state
             *  Get maximum Q value of this next state based on all possible actions
             *  Set the next state as the current state
             */

            // For each episode
            var  rand = new Random();
            long maxloopEventCount = 0;

            // Train episodes
            for (long i = 0; i < Episodes; i++)
            {
                long maxloop = 0;
                // Select random initial state
                int     stateIndex = rand.Next(States.Count);
                QState  state      = States[stateIndex];
                QAction action     = null;
                do
                {
                    if (++maxloop > MaxExploreStepsWithinOneEpisode)
                    {
                        if (ShowWarning)
                        {
                            string msg = string.Format(
                                "{0} !! MAXLOOP state: {1} action: {2}, {3} endstate is to difficult to reach?",
                                ++maxloopEventCount, state, action, "maybe your path setup is wrong or the ");
                            QMethod.Log(msg);
                        }

                        break;
                    }

                    // no actions, skip this state
                    if (state.Actions.Count == 0)
                    {
                        break;
                    }

                    // Selection strategy is random based on probability
                    int index = rand.Next(state.Actions.Count);
                    action = state.Actions[index];

                    // Using this possible action, consider to go to the next state
                    // Pick random Action outcome
                    QActionResult nextStateResult = action.PickActionByProbability();
                    string        nextStateName   = nextStateResult.StateName;

                    double q    = nextStateResult.QEstimated;
                    double r    = nextStateResult.Reward;
                    double maxQ = MaxQ(nextStateName);

                    // Q(s,a)= Q(s,a) + alpha * (R(s,a) + gamma * Max(next state, all actions) - Q(s,a))
                    double value = q + Alpha * (r + Gamma * maxQ - q); // q-learning
                    nextStateResult.QValue = value;                    // update

                    // is end state go to next episode
                    if (EndStates.Contains(nextStateResult.StateName))
                    {
                        break;
                    }

                    // Set the next state as the current state
                    state = StateLookup[nextStateResult.StateName];
                } while (true);
            }
        }
Exemple #2
0
 public void AddActionResult(QActionResult actionResult)
 {
     ActionsResult.Add(actionResult);
 }