예제 #1
0
        public void Update(Node node, IStateProtocol finalState)
        {
            var rewardDifferences = new double[MAX_PLAYER_COUNT];

            for (uint i = 0; i < finalState.PlayerCount; i++)
            {
                var reward            = finalState.GetReward(i);
                var otherPlayerReward = double.MinValue;
                for (uint j = 0; j < finalState.PlayerCount; j++)
                {
                    if (i != j)
                    {
                        otherPlayerReward = Math.Max(otherPlayerReward, finalState.GetReward(j));
                    }
                }

                rewardDifferences[i] = Normalizer.Normalize(reward - otherPlayerReward);
            }

            var currentNode = node;

            while (!currentNode.IsRoot)
            {
                var playerIndex = currentNode.PlayerIndex;
                currentNode.AddReward(rewardDifferences[playerIndex]);
                currentNode.IncreaseVisitCount();
                currentNode = currentNode.Parent;
            }

            // Root node
            currentNode.IncreaseVisitCount();
        }
예제 #2
0
        public void Update(Node node, IStateProtocol finalState)
        {
            var rewards = new double[MAX_PLAYER_COUNT];

            for (uint i = 0; i < finalState.PlayerCount; i++)
            {
                rewards[i] = Normalizer.Normalize(finalState.GetReward(i));
            }

            var currentNode = node;

            while (!currentNode.IsRoot)
            {
                var playerIndex = currentNode.PlayerIndex;
                currentNode.AddReward(rewards[playerIndex]);
                currentNode.IncreaseVisitCount();
                currentNode = currentNode.Parent;
            }

            // Root node
            currentNode.IncreaseVisitCount();
        }