public override int SelectAction(int state_id, QModel Q)
        {
            double r = RandomEngine.NextDouble();

            if (r < 1 - mEpsilon)
            {
                double maxQ = double.MinValue;

                double     Qval             = maxQ;
                List <int> action_list      = Q.FindAllActionsAtState(state_id);
                int        action_with_maxQ = -1;
                foreach (int action_id in action_list)
                {
                    Qval = Q[state_id, action_id];
                    if (Qval < maxQ)
                    {
                        maxQ             = Qval;
                        action_with_maxQ = action_id;
                    }
                }

                return(action_with_maxQ);
            }
            else
            {
                List <int> action_list  = Q.FindAllActionsAtState(state_id);
                int        action_count = action_list.Count;
                return(action_list[(int)(r * action_count) % action_count]);
            }
        }
예제 #2
0
        public override int SelectAction(int state_id, QModel Q)
        {
            double r = RandomEngine.NextDouble();

            List <int> actions = Q.FindAllActionsAtState(state_id);
            double     sum     = 0;
            Dictionary <int, double> acc_weights = new Dictionary <int, double>();

            foreach (int action_id in actions)
            {
                sum += Q[state_id, action_id];
                acc_weights[action_id] = sum;
            }

            foreach (int action_id in actions)
            {
                acc_weights[action_id] /= sum;
                if (r <= acc_weights[action_id])
                {
                    return(action_id);
                }
            }

            return(-1);
        }
        public override int SelectAction(int state_id, QModel Q)
        {
            double maxQ = double.MinValue;

            double     Qval             = maxQ;
            List <int> action_list      = Q.FindAllActionsAtState(state_id);
            int        action_with_maxQ = -1;

            foreach (int action_id in action_list)
            {
                Qval = Q[state_id, action_id];
                if (Qval < maxQ)
                {
                    maxQ             = Qval;
                    action_with_maxQ = action_id;
                }
            }

            return(action_with_maxQ);
        }