示例#1
0
        static QState ArgMax(Dictionary <QState, double> values)
        {
            QState max = values.First().Key;

            foreach (var state in values)
            {
                if (state.Value > values[max])
                {
                    max = state.Key;
                }
            }

            return(max);
        }
示例#2
0
        public void Train(QState goal, int iterations, double learningRate, double discountRate)
        {
            var rng = new Random();

            for (int epoch = 0; epoch < iterations; ++epoch)
            {
                foreach (var item in initialStateProvider)
                {
                    item.SetReward();
                }

                QState prevState     = null;
                var    state         = initialStateProvider[0];
                QState nextState     = state.GetRandNextState(null);
                QState nextNextState = null;
                while (true)
                {
                    List <QState> possNextNextStates = nextState.GetAvailableStates();
                    double        maxQ    = double.MinValue;
                    double        maxNrAp = double.MaxValue;

                    for (int j = 0; j < possNextNextStates.Count; ++j)
                    {
                        var    possNextState = possNextNextStates[j];
                        double q             = Q[nextState][possNextState];
                        if (q > maxQ)
                        {
                            maxQ          = q;
                            maxNrAp       = (Nsa[nextState][possNextState] - 1) * 1000;
                            nextNextState = possNextState;
                        }
                    }

                    Nsa[state][nextState] += 0.004;

                    Q[state][nextState] = Q[state][nextState] + learningRate / Nsa[state][nextState] * (state.GetReward().ComputeReward() + discountRate * maxQ - 1 * Q[state][nextState]);

                    if (state.Equals(goal))
                    {
                        Q[state][state] = state.GetReward().ComputeReward(); break;
                    }

                    prevState = state;
                    state     = nextState;

                    nextState = nextNextState;
                }
            }
        }
示例#3
0
        public List <QState> Walk(QState start, QState goal)
        {
            const int     maxListLen = 300;
            List <QState> result     = new List <QState>();

            result.Add(start);
            QState current = start; QState next;

            Console.Write(current.ToString() + "->");
            while (!current.Equals(goal))
            {
                next = ArgMax(Q[current]);
                Console.Write(next + "->");
                result.Add(next);
                current = next;

                if (result.Count == maxListLen)
                {
                    break;
                }
            }

            return(result);
        }
示例#4
0
 public abstract QState GetRandNextState(QState prevState);