/// <summary> /// Given a start state, performs a random sampling of it to terminal states and determines which available action is best to take. /// </summary> /// <param name="state">The state to start in.</param> /// <param name="samples">The number of samples to take for each actino.</param> /// <param name="cloner">The means by which duplicate states may be generated.</param> /// <param name="action_enumerator">Enumerates the actions available in any given state.</param> /// <param name="updater">Takes a state and an action and applies one to the other.</param> /// <param name="state_hueristic">Determines how good a terminal state is.</param> /// <returns>Returns the best action to take.</returns> public static A Search(S state, int samples, StateCloner <S> cloner, ActionEnumerator <A, S> action_enumerator, ActionApplier <A, S> updater, StateEvaluator <S> state_hueristic) { List <A> actions = new List <A>(); List <int> expected_values = new List <int>(); // Get all the actions IEnumerator <A> acts = action_enumerator(state); while (acts.MoveNext()) { actions.Add(acts.Current); } // If we have no actions, we can't do anything if (actions.Count == 0) { return(default(A)); } // If we have one action, don't waste time if (actions.Count == 1) { return(actions[0]); } // Obtain the expected value of each action foreach (A a in actions) { expected_values.Add(Explore(updater(cloner(state), a), samples, cloner, action_enumerator, updater, state_hueristic)); } // Find the best action int max = expected_values[0]; List <A> best = new List <A>(); best.Add(actions[0]); for (int i = 1; i < expected_values.Count; i++) { if (expected_values[i] > max) { max = expected_values[i]; best.Clear(); best.Add(actions[i]); } else if (expected_values[i] == max) { best.Add(actions[i]); } } return(best[rand.Next(0, best.Count)]); }
/// <summary> /// Takes a state and returns the expected value (pre-division) of its outcome. /// </summary> /// <param name="state">The state to start in.</param> /// <param name="samples">The number of samples to take for each actino.</param> /// <param name="cloner">The means by which duplicate states may be generated.</param> /// <param name="action_enumerator">Enumerates the actions available in any given state.</param> /// <param name="updater">Takes a state and an action and applies one to the other.</param> /// <param name="state_hueristic">Determines how good a terminal state is.</param> /// <returns>Returns the expected value (pre-division) of the state's outcome.</returns> private static int Explore(S state, int samples, StateCloner <S> cloner, ActionEnumerator <A, S> action_enumerator, ActionApplier <A, S> updater, StateEvaluator <S> state_hueristic) { int ret = 0; for (int i = 0; i < samples; i++) { ret += PlayToCompletion(cloner(state), action_enumerator, updater, state_hueristic); } return(ret); }