/// <summary> /// Runs a single iteration of MCTS, returning a resulting delta value. /// </summary> public float OneIteration(UctNode root, TeamColor startingTeam) { UctNode v = TreePolicy(root, startingTeam); float delta = DefaultPolicy(v.State, startingTeam); Backup(v, delta); return(delta); }
/// <summary> /// Runs the actual MCTS search on a given initial state. /// </summary> public UctSearchResult UctSearch(GameInstance initialState) { var root = new UctNode(0, 0, UctAction.NullAction(), initialState.CopyStateOnly()); if (!initialState.CurrentTeam.HasValue) { throw new ArgumentException("Trying to do UCT search on a finished game."); } var stopwatch = new Stopwatch(); stopwatch.Start(); var iterationStopwatch = new Stopwatch(); var startingTeam = initialState.CurrentTeam.Value; int iterations = 0; do { iterations++; TotalIterationCount++; iterationStopwatch.Restart(); OneIteration(root, startingTeam); iterationStopwatch.Stop(); MillisecondsPerIterationAverage.Add(iterationStopwatch.Elapsed.TotalMilliseconds); if (_iterationsOverTime) { if (iterations >= _thinkTime) { break; } } else { if (stopwatch.ElapsedMilliseconds >= _thinkTime) { break; } } } while (true); stopwatch.Stop(); #if DOTGRAPH UctDebug.PrintTreeRepresentation(root); #endif Interlocked.Increment(ref SearchCount); var actions = SelectBestActions(root); var millisecondsPerIteration = (double)stopwatch.ElapsedMilliseconds / (double)iterations; return(new UctSearchResult(actions, millisecondsPerIteration)); }
/// <summary> /// Returns the best child based on the UCB-1 value. /// </summary> public static UctNode BestChild(UctNode node, TeamColor startingTeam, double k = 2) { if (node.Children.Count == 0) { return(null); } return(node.Children.FastMax(c => UcbValue(node, c, k, startingTeam))); }
/// <summary> /// Backpropagates the reward up the tree. /// </summary> public static void Backup(UctNode node, float delta) { while (node != null) { node.N++; node.Q += delta; node = node.Parent; } }
/// <summary> /// Calculates the UCB-1 value of a given node. /// </summary> public static float UcbValue(UctNode parent, UctNode node, double k, TeamColor startingTeam) { float value = node.Q / node.N; if (node.State.CurrentTeam != startingTeam) { value = 1 - value; } return((float)(value + Math.Sqrt(k * Math.Log(parent.N) / node.N))); }
/// <summary> /// Returns a list of best possible actions until the end of a turn. /// </summary> private List <UctAction> SelectBestActions(UctNode root) { var result = new List <UctAction>(); UctNode current = root; do { if (current.Children.Count == 0) { break; } UctNode max = current.Children.FastMax(c => c.Q / c.N); if (max.Q / max.N < 0.2) { var state = current.State.CopyStateOnly(); do { var action = ActionGenerator.RuleBasedAction(state); state = ActionEvaluator.F(state, action); if (action.Type == UctActionType.EndTurn) { goto done; } else { result.Add(action); if (action.Type == UctActionType.DefensiveMove) { goto done; } } } while (true); } if (max.Action.Type != UctActionType.EndTurn) { if (max.IsTerminal) { //Console.WriteLine("Found terminal"); } result.Add(max.Action); } current = max; } while (current.Action.Type != UctActionType.EndTurn); done: return(result); }
/// <summary> /// Generates a list of possible actions, truncated for the purposes of MCTS. /// </summary> public static List <UctAction> PossibleActions(GameInstance game, UctNode parent, bool allowMove, bool allowEndTurn) { var result = new List <UctAction>(10); var currentMob = game.CurrentMob; if (currentMob.HasValue) { var mob = game.CachedMob(currentMob.Value); GameInvariants.AssertMobPlayable(game, mob); bool foundAbilityUse = GenerateDirectAbilityUse(game, mob, result); // We disable movement if there is a possibility to cast abilities. if (allowMove && (Constants.AlwaysAttackMove || !foundAbilityUse)) { GenerateAttackMoveActions(game, game.CachedMob(mob.MobId), result); } if (allowMove) { if (parent == null || parent.Action.Type != UctActionType.DefensiveMove) { GenerateDefensiveMoveActions(game, mob, result); } } } else { Utils.Log(LogSeverity.Warning, nameof(UctNode), "Final state reached while trying to compute possible actions."); throw new InvalidOperationException(); } if (allowEndTurn) { // We would skip end turn if there are not enough actions. if (!Constants.EndTurnAsLastResort || result.Count <= 1) { result.Add(UctAction.EndTurnAction()); } } GameInvariants.AssertValidActions(game, result); return(result); }
/// <summary> /// Expands a given node, adding a new possible state. /// </summary> /// <param name="node"></param> /// <returns></returns> public static UctNode Expand(UctNode node) { var type = node.Action.Type; var allowMove = type != UctActionType.Move && type != UctActionType.DefensiveMove; node.PrecomputePossibleActions(allowMove, true || type != UctActionType.EndTurn); var action = node.PossibleActions[node.Children.Count]; var child = new UctNode(0, 0, action, ActionEvaluator.F(node.State, action)); child.Parent = node; node.Children.Add(child); return(child); }
/// <summary> /// Runs the tree policy on a given node, selecting the most promising child. /// </summary> public UctNode TreePolicy(UctNode node, TeamColor startingTeam) { bool wasDefense = node.Action.Type == UctActionType.DefensiveMove; while (!node.IsTerminal) { if (!node.IsFullyExpanded) { Interlocked.Increment(ref ExpandCount); var expanded = Expand(node); var type = expanded.Action.Type; var allowMove = type != UctActionType.Move && type != UctActionType.DefensiveMove; // We double-expand nodes which are a single child to reduce // the number of needed simulations. if (!expanded.IsTerminal) { expanded.PrecomputePossibleActions(allowMove, true); if (expanded.PossibleActions.Count == 1) { expanded = Expand(expanded); } } return(expanded); } else { Interlocked.Increment(ref BestChildCount); node = BestChild(node, startingTeam); if (node.Action.Type == UctActionType.DefensiveMove) { if (wasDefense) { throw new InvalidOperationException(); } } wasDefense = node.Action.Type == UctActionType.DefensiveMove; } } return(node); }
private XmlElement PrintNode(UctNode node, int budget) { var element = _doc.CreateElement("action"); if (budget > 0) { foreach (var child in node.Children) { var xmlElement = PrintNode(child, budget - 1); element.AppendChild(xmlElement); } } element.SetAttribute("Q", node.Q.ToString()); element.SetAttribute("N", node.N.ToString()); element.SetAttribute("Action", node.Action.ToString()); element.SetAttribute("IsTerminal", node.IsTerminal.ToString()); return(element); }
public XmlTreePrinter(UctNode root) { _root = root; _doc = new XmlDocument(); }