Ejemplo n.º 1
0
        private void QValueIteration()
        {
            StateActionValueTable qValueTable = new StateActionValueTable(m_initMode, m_initValue,
                                                                          5,                          // actions
                                                                          this.EnvRows, this.EnvCols, // my pos
                                                                          this.EnvRows, this.EnvCols, // opp pos
                                                                          2);                         // ball ownership status

            m_valueTable = qValueTable;

            long stateCounts  = qValueTable.NumStates;
            int  actionCounts = qValueTable.NumActions;

            double delta = 0.0;

            do
            {
                delta = 0.0;

                // foreach state
                for (int s = 0; s < stateCounts; s++)
                {
                    // foreach action
                    for (int a = 0; a < actionCounts; a++)
                    {
                        double q    = qValueTable.GetValueLinear(a, s);
                        double newQ = EstimateNewQValue(s, a);
                        qValueTable.SetValueLinear(newQ, a, s);
                        delta = Math.Max(delta, Math.Abs(q - newQ));
                    }
                }
            } while (delta > Theta);
        }
Ejemplo n.º 2
0
        private double GetMaxQForState(long linStateInd, out int maxA)
        {
            StateActionValueTable qTable = m_valueTable as StateActionValueTable;

            if (qTable == null)
            {
                throw new Exception("value table is not a Q-Table");
            }

            maxA = -1;
            double maxQ = Double.MinValue;

            for (int a = 0; a < m_numActions; a++)
            {
                double curQ = qTable.GetValueLinear(a, (int)linStateInd);
                if (curQ > maxQ)
                {
                    maxQ = curQ;
                    maxA = a;
                }
            }

            return(maxQ);
        }