private double CalculateUCB(DecisionNode parent, ChanceNode child) { double v = child.AverageValue; double n = Math.Log(parent.VisitCount) / child.VisitCount; return(v + EXPLORATION_RATE * Math.Sqrt(n)); }
/* * Uses the UCB for Trees algorithm to select children for exploration. * If any children are unexplored, a child is selected from them uniformly * at random. Otherwise, the child with the maximal UCB is chosen. */ private ChanceNode SelectChildToVisit(DecisionNode node) { double maxUCB = double.MinValue; ChanceNode bestChild = null; var unvisitedChildren = new List <ChanceNode>(); foreach (var child in node.Children.Values) { if (child.VisitCount == 0) { unvisitedChildren.Add(child); } else { double ucb = CalculateUCB(node, child); if (ucb > maxUCB) { maxUCB = ucb; bestChild = child; } } } if (unvisitedChildren.Count == 0) { return(bestChild); } else { return(RandomProvider.Select(unvisitedChildren)); } }
public DecisionNode GenerateChild() { var nextState = new GameState(State); var addedTile = nextState.AddRandomTile(); if (!Children.TryGetValue(addedTile.Cell, out DecisionNode child)) { child = new DecisionNode(nextState); Children.Add(addedTile.Cell, child); } return(child); }
/* * Plays randomly until a terminal state is reached. */ private double DoRollout(DecisionNode node) { var currentState = new GameState(node.State); var legalActions = new List <Action>(currentState.GetLegalActions()); while (legalActions.Count > 0) { Action randomAction = RandomProvider.Select(legalActions); currentState.ApplyAction(randomAction); currentState.AddRandomTile(); legalActions = new List <Action>(currentState.GetLegalActions()); } return(GetValueFor(currentState)); }
/// <summary> /// Returns the set of legal actions in the given state with their corresponding /// expected values. /// </summary> /// <param name="state">the game state</param> /// <param name="searchLimit">a search limit for the algorithm</param> public IEnumerable <ActionValue> GetPolicies(GameState state, ISearchLimit searchLimit) { TreeRoot = new DecisionNode(state); while (!searchLimit.Done()) { SampleSearchTree(TreeRoot); } var results = new List <ActionValue>(TreeRoot.Children.Select(pair => new ActionValue() { Action = pair.Key, Value = pair.Value.VisitCount > 0 ? pair.Value.AverageValue : 0 })); RandomProvider.Shuffle(results); // break ties randomly return(results); }
private double GetDecisionNodeValue(DecisionNode node) { if (node != TreeRoot && node.VisitCount == 0) { return(DoRollout(node)); } else { node.ExpandChildren(); if (node.Children.Count == 0) { return(GetValueFor(node.State)); } else { var child = SelectChildToVisit(node); return(SampleSearchTree(child)); } } }