/// <summary> /// Returns a SabberStoneAction for the current state. /// </summary> /// <param name="state">The current game state.</param> /// <returns>SabberStoneAction</returns> public SabberStoneAction Act(SabberStoneState state) { // Check to make sure the player to act in the game-state matches our player. if (state.CurrentPlayer() != Player.Id) { return(null); } SabberStoneAction selectedAction; // When we have to act, check which policy we are going to use switch (Selection) { case SelectionType.UCB: selectedAction = SelectUCB(state); break; case SelectionType.EGreedy: selectedAction = SelectEGreedy(state); break; case SelectionType.Random: selectedAction = RandomPlayoutBot.CreateRandomAction(state); break; default: throw new InvalidEnumArgumentException($"SelectionType `{Selection}' is not supported."); } // Remember the action that was selected. ActionsTaken.Add(selectedAction); return(selectedAction); }
/// <summary> /// Selects an action using the e-greedy algorithm. /// </summary> /// <param name="state">The game state.</param> /// <returns><see cref="SabberStoneAction"/>.</returns> private SabberStoneAction SelectEGreedy(SabberStoneState state) { // Determine whether or not to be greedy (chance is 1-e to use best action) if (Util.RNG.NextDouble() < EGreedyThreshold) { // Explore a random action return(RandomPlayoutBot.CreateRandomAction(state)); } var action = new SabberStoneAction(); var stateClone = state.Game.Clone(); // Repeatedly exploit the highest (average) reward task that is available in this state do { SabberStonePlayerTask selectedTask; // Get the stats of the tasks currently available in this state var availableTasks = stateClone.Game.CurrentPlayer.Options().Where(i => i.ZonePosition <= 0).Select(i => (SabberStonePlayerTask)i).ToList(); var availableTaskHashes = availableTasks.Select(i => i.GetHashCode()).ToList(); var availableStatistics = MASTTable.Where(i => availableTaskHashes.Contains(i.Key)).ToList(); // Find the task with the highest average value var bestTask = availableStatistics.OrderByDescending(i => i.Value.AverageValue()).FirstOrDefault(); // If no best task was found, randomly choose an available task if (bestTask.IsDefault()) { var randomTask = availableTasks.RandomElementOrDefault(); // If we also can't randomly find a task, stop if (randomTask == null) { break; } selectedTask = randomTask; } else { // Find all available tasks that have an average value similar to the best var bestValue = bestTask.Value.AverageValue(); var compTasks = availableStatistics.Where(i => Math.Abs(i.Value.AverageValue() - bestValue) < AVThesis.Constants.DOUBLE_EQUALITY_TOLERANCE).ToList(); // Select one of the tasks selectedTask = compTasks.RandomElementOrDefault().Value.Task; } // Add the task to the action we are building action.AddTask(selectedTask); // Process the task stateClone.Process(selectedTask.Task); // Continue until we have created a complete action, or the game has completed } while (!action.IsComplete() && stateClone.Game.State != State.COMPLETE); // Return the action we've created return(action); }