public IOperator action(IState s) { IEnumerable <UCTNode> children; UCTNode node = new UCTNode(); node.State = s; node.Parent = null; if (!uct.childrenDic.TryGetValue(node, out children)) { throw new NoChildrenException(); } UCTNode bestChild = children.ElementAt(0); double maxQ = bestChild.Q / bestChild.N; foreach (UCTNode child in children) { if (maxQ < child.Q / child.N) { maxQ = child.Q / child.N; bestChild = child; } } return(bestChild.UsedOperator); }
private UCTNode BestChild(UCTNode node) { IEnumerable <UCTNode> children; if (!childrenDic.TryGetValue(node, out children)) { throw new NoChildrenException("no children"); } if (children.Count() == 0) { return(node); } UCTNode bestChild = children.ElementAt(0); double UCTFuncValueOfBestChild = UCTFunc(bestChild); foreach (UCTNode child in children) { double UCTFuncValue = UCTFunc(child); if (UCTFuncValueOfBestChild > UCTFuncValue) { UCTFuncValueOfBestChild = UCTFuncValue; bestChild = child; } } return(bestChild); }
private UCTNode TreePolicy(UCTNode node) { UCTNode Node = node; IEnumerable <UCTNode> children; while (!isTerminal(Node.State) && stopwatch.Elapsed.TotalMilliseconds < timeAvailable && !stopped) { if (childrenDic.TryGetValue(Node, out children)) { foreach (UCTNode child in children) { if (!isTried(child)) { return(child); } } Node = BestChild(children); Node.N += 1; } else { Expand(Node); } } if (isTerminal(Node)) { FoundGoal = Node; } return(Node); }
public bool definedFor(IState s) { UCTNode node = new UCTNode(); node.State = s; node.Parent = null; return(uct.childrenDic.ContainsKey(node)); }
private double UCTFunc(UCTNode node) { double Q = Math.Abs(node.Q); UCTNode parent = (UCTNode)node.Parent; return(Q / node.N + Cp * Math.Sqrt(2 * Math.Log(parent.N) / node.N)); }
private void Backup(UCTNode node, double delta) { while (node != null) { node.Q = node.Q + delta; //node.N = node.N + 1; node = (UCTNode)node.Parent; } }
public bool InitStates(INode start, INode goal) { Start = new UCTNode(start); Start.Q = double.MaxValue; Start.N = 0; Goal = new UCTNode(goal); Goal.Q = 100; Goal.N = 0; return(true); }
private void Expand(UCTNode node) { if (childrenDic.ContainsKey(node)) { return; } IEnumerable <UCTNode> children; children = SuccGen.Generate(node); childrenDic.Add(node, children); NumberOfGeneratedNodes += children.Count(); }
private double DefaultPolicy(UCTNode node) { if (isTerminal(node.State)) { return(node.Q); } node.Q = 0; node.N = 1; Random rand = new Random(); int depth = 0; double sumQ = 0; IOutcome outcome; IState currentState = node.State; IOperator usedOp = null; while (depth < maxDepth && stopwatch.Elapsed.TotalMilliseconds < timeAvailable && !isTerminal(currentState) && !stopped) { IEnumerable <IOperator> operators = Env.ApplicableOperators(currentState); int opCount = operators.Count(); if (opCount == 0) { return(sumQ); } usedOp = operators.ElementAt(rand.Next(opCount)); outcome = Env.act(currentState, usedOp); sumQ += outcome.Reward; depth += 1; currentState = outcome.State; } if (isTerminal(currentState)) { return(Goal.Q); } return(sumQ); }
private UCTNode BestChild(IEnumerable <UCTNode> children) { if (children.Count() == 0) { return(null); } UCTNode bestChild = children.ElementAt(0); double UCTFuncValueOfBestChild = UCTFunc(bestChild); foreach (UCTNode child in children) { double UCTFuncValue = UCTFunc(child); if (UCTFuncValueOfBestChild < UCTFuncValue) { UCTFuncValueOfBestChild = UCTFuncValue; bestChild = child; } } return(bestChild); }
private bool isTried(UCTNode node) { return(node.Q != double.MaxValue); }
private bool isTerminal(UCTNode node) { return(isTerminal(node.State)); }