/// <summary> /// Selects an action using the e-greedy algorithm. /// </summary> /// <param name="state">The game state.</param> /// <returns><see cref="SabberStoneAction"/>.</returns> private SabberStoneAction SelectEGreedy(SabberStoneState state) { // Determine whether or not to be greedy (chance is 1-e to use best action) if (Util.RNG.NextDouble() < EGreedyThreshold) { // Explore a random action return(RandomPlayoutBot.CreateRandomAction(state)); } var action = new SabberStoneAction(); var stateClone = state.Game.Clone(); // Repeatedly exploit the highest (average) reward task that is available in this state do { SabberStonePlayerTask selectedTask; // Get the stats of the tasks currently available in this state var availableTasks = stateClone.Game.CurrentPlayer.Options().Where(i => i.ZonePosition <= 0).Select(i => (SabberStonePlayerTask)i).ToList(); var availableTaskHashes = availableTasks.Select(i => i.GetHashCode()).ToList(); var availableStatistics = MASTTable.Where(i => availableTaskHashes.Contains(i.Key)).ToList(); // Find the task with the highest average value var bestTask = availableStatistics.OrderByDescending(i => i.Value.AverageValue()).FirstOrDefault(); // If no best task was found, randomly choose an available task if (bestTask.IsDefault()) { var randomTask = availableTasks.RandomElementOrDefault(); // If we also can't randomly find a task, stop if (randomTask == null) { break; } selectedTask = randomTask; } else { // Find all available tasks that have an average value similar to the best var bestValue = bestTask.Value.AverageValue(); var compTasks = availableStatistics.Where(i => Math.Abs(i.Value.AverageValue() - bestValue) < AVThesis.Constants.DOUBLE_EQUALITY_TOLERANCE).ToList(); // Select one of the tasks selectedTask = compTasks.RandomElementOrDefault().Value.Task; } // Add the task to the action we are building action.AddTask(selectedTask); // Process the task stateClone.Process(selectedTask.Task); // Continue until we have created a complete action, or the game has completed } while (!action.IsComplete() && stateClone.Game.State != State.COMPLETE); // Return the action we've created return(action); }
/// <summary> /// Selects and action using the UCB1 algorithm. /// </summary> /// <param name="state">The game state.</param> /// <returns><see cref="SabberStoneAction"/>.</returns> private SabberStoneAction SelectUCB(SabberStoneState state) { var action = new SabberStoneAction(); var stateClone = state.Game.Clone(); // Repeatedly exploit the highest UCB-value task that is available in this state do { SabberStonePlayerTask selectedTask; // Get the stats of the tasks currently available in this state var availableTasks = stateClone.Game.CurrentPlayer.Options().Where(i => i.ZonePosition <= 0).Select(i => (SabberStonePlayerTask)i).ToList(); var availableTaskHashes = availableTasks.Select(i => i.GetHashCode()).ToList(); var availableStatistics = MASTTable.Where(i => availableTaskHashes.Contains(i.Key)).ToList(); var totalVisits = availableStatistics.Sum(i => i.Value.Visits); // Find the task with the highest UCB value var bestTask = availableStatistics.OrderByDescending(i => i.Value.UCB(totalVisits, UCBConstantC)).FirstOrDefault(); // If no best task was found, randomly choose an available task if (bestTask.IsDefault()) { var randomTask = availableTasks.RandomElementOrDefault(); // If we also can't randomly find a task, stop if (randomTask == null) { break; } selectedTask = randomTask; } else { // Find all available tasks that have an UCB value similar to the best var bestValue = bestTask.Value.UCB(totalVisits, UCBConstantC); var compTasks = availableStatistics.Where(i => Math.Abs(i.Value.UCB(totalVisits, UCBConstantC) - bestValue) < AVThesis.Constants.DOUBLE_EQUALITY_TOLERANCE).ToList(); // Select one of the tasks selectedTask = compTasks.RandomElementOrDefault().Value.Task; } // Add the task to the action we are building action.AddTask(selectedTask); // Process the task stateClone.Process(selectedTask.Task); // Continue until we have created a complete action, or the game has completed } while (!action.IsComplete() && stateClone.Game.State != State.COMPLETE); // Return the action we've created return(action); }