コード例 #1
0
        /// <summary>
        /// Given a start state, performs a random sampling of it to terminal states and determines which available action is best to take.
        /// </summary>
        /// <param name="state">The state to start in.</param>
        /// <param name="samples">The number of samples to take for each actino.</param>
        /// <param name="cloner">The means by which duplicate states may be generated.</param>
        /// <param name="action_enumerator">Enumerates the actions available in any given state.</param>
        /// <param name="updater">Takes a state and an action and applies one to the other.</param>
        /// <param name="state_hueristic">Determines how good a terminal state is.</param>
        /// <returns>Returns the best action to take.</returns>
        public static A Search(S state, int samples, StateCloner <S> cloner, ActionEnumerator <A, S> action_enumerator, ActionApplier <A, S> updater, StateEvaluator <S> state_hueristic)
        {
            List <A>   actions         = new List <A>();
            List <int> expected_values = new List <int>();

            // Get all the actions
            IEnumerator <A> acts = action_enumerator(state);

            while (acts.MoveNext())
            {
                actions.Add(acts.Current);
            }

            // If we have no actions, we can't do anything
            if (actions.Count == 0)
            {
                return(default(A));
            }

            // If we have one action, don't waste time
            if (actions.Count == 1)
            {
                return(actions[0]);
            }

            // Obtain the expected value of each action
            foreach (A a in actions)
            {
                expected_values.Add(Explore(updater(cloner(state), a), samples, cloner, action_enumerator, updater, state_hueristic));
            }

            // Find the best action
            int max = expected_values[0];

            List <A> best = new List <A>();

            best.Add(actions[0]);

            for (int i = 1; i < expected_values.Count; i++)
            {
                if (expected_values[i] > max)
                {
                    max = expected_values[i];

                    best.Clear();
                    best.Add(actions[i]);
                }
                else if (expected_values[i] == max)
                {
                    best.Add(actions[i]);
                }
            }

            return(best[rand.Next(0, best.Count)]);
        }
コード例 #2
0
        /// <summary>
        /// Takes a state and returns the expected value (pre-division) of its outcome.
        /// </summary>
        /// <param name="state">The state to start in.</param>
        /// <param name="samples">The number of samples to take for each actino.</param>
        /// <param name="cloner">The means by which duplicate states may be generated.</param>
        /// <param name="action_enumerator">Enumerates the actions available in any given state.</param>
        /// <param name="updater">Takes a state and an action and applies one to the other.</param>
        /// <param name="state_hueristic">Determines how good a terminal state is.</param>
        /// <returns>Returns the expected value (pre-division) of the state's outcome.</returns>
        private static int Explore(S state, int samples, StateCloner <S> cloner, ActionEnumerator <A, S> action_enumerator, ActionApplier <A, S> updater, StateEvaluator <S> state_hueristic)
        {
            int ret = 0;

            for (int i = 0; i < samples; i++)
            {
                ret += PlayToCompletion(cloner(state), action_enumerator, updater, state_hueristic);
            }

            return(ret);
        }