protected override void Backpropagate(MCTSNode node, Reward reward) { /* * while v is not null do * N(v) <- N(v) + 1 * Q(v) <- Q(v) + r(v, Player(Parent(v))) * v <- parent of v */ int player = 0; MCTSNode currentNode = node; Pair <int, Action> par = new Pair <int, Action>(0, new Action("asdasdas")); while (currentNode != null) { currentNode.N = currentNode.N + 1; currentNode.Q = currentNode.Q + reward.GetRewardForNode(node); if (currentNode.Parent != null) { par.Left = currentNode.Parent.PlayerID; par.Right = currentNode.Action; ActionHistory.Add(par); } currentNode = currentNode.Parent; if (currentNode != null) { player = currentNode.PlayerID; /* protected List<Pair<int, Action>> ActionHistory { get; set; }*/ foreach (MCTSNode child in currentNode.ChildNodes) { par.Left = player; par.Right = child.Action; if (ActionHistory.Contains(par))//Pair<player,child.Action>) { child.NRAVE = child.NRAVE + 1; child.QRAVE = child.QRAVE + reward.GetRewardForNode(child); } } } /*if v is not null do * p < -Player(v) * foreach (c in Children(v)) * if ((p, A(c)) in actionHistory) do * Nrave(c) < -Nrave(c) + 1 * Qrave(c) < -Qrave(c) + r(c, p)*/ } }
protected virtual void Backpropagate(MCTSNode node, Reward reward) { MCTSNode currentNode = node; while (currentNode != null) { currentNode.N = currentNode.N + 1; currentNode.Q = currentNode.Q + reward.GetRewardForNode(node); currentNode = currentNode.Parent; } }
protected override Reward Playout(WorldModel initialPlayoutState) { WorldModel state = initialPlayoutState.GenerateChildWorldModel(); while (!state.IsTerminal()) { getRandomAction(state).ApplyActionEffects(state); } Reward r = new Reward(); r.Value = r.GetRewardForNode(new MCTSNode(state)); return(r); }