コード例 #1
0
        protected override void Backpropagate(MCTSNode node, Reward reward)
        {
            /*
             *          while v is not null do
             *             N(v) <- N(v) + 1
             *             Q(v) <- Q(v) + r(v, Player(Parent(v)))
             *             v <- parent of v
             */
            int                player      = 0;
            MCTSNode           currentNode = node;
            Pair <int, Action> par         = new Pair <int, Action>(0, new Action("asdasdas"));

            while (currentNode != null)
            {
                currentNode.N = currentNode.N + 1;
                currentNode.Q = currentNode.Q + reward.GetRewardForNode(node);

                if (currentNode.Parent != null)
                {
                    par.Left  = currentNode.Parent.PlayerID;
                    par.Right = currentNode.Action;
                    ActionHistory.Add(par);
                }

                currentNode = currentNode.Parent;

                if (currentNode != null)
                {
                    player = currentNode.PlayerID;
                    /* protected List<Pair<int, Action>> ActionHistory { get; set; }*/
                    foreach (MCTSNode child in currentNode.ChildNodes)
                    {
                        par.Left  = player;
                        par.Right = child.Action;
                        if (ActionHistory.Contains(par))//Pair<player,child.Action>)
                        {
                            child.NRAVE = child.NRAVE + 1;
                            child.QRAVE = child.QRAVE + reward.GetRewardForNode(child);
                        }
                    }
                }

                /*if v is not null do
                 *  p < -Player(v)
                 *  foreach (c in Children(v))
                 *      if ((p, A(c)) in actionHistory) do
                 *          Nrave(c) < -Nrave(c) + 1
                 *          Qrave(c) < -Qrave(c) + r(c, p)*/
            }
        }
コード例 #2
0
        protected virtual void Backpropagate(MCTSNode node, Reward reward)
        {
            MCTSNode currentNode = node;

            while (currentNode != null)
            {
                currentNode.N = currentNode.N + 1;
                currentNode.Q = currentNode.Q + reward.GetRewardForNode(node);
                currentNode   = currentNode.Parent;
            }
        }
コード例 #3
0
        protected override Reward Playout(WorldModel initialPlayoutState)
        {
            WorldModel state = initialPlayoutState.GenerateChildWorldModel();

            while (!state.IsTerminal())
            {
                getRandomAction(state).ApplyActionEffects(state);
            }
            Reward r = new Reward();

            r.Value = r.GetRewardForNode(new MCTSNode(state));
            return(r);
        }