예제 #1
0
        /*
         * Phase 1: Selection
         * Select until EITHER not fully expanded OR leaf node
         */
        public MonteCarloNode Select(GameState state)
        {
            if (!this.nodes.ContainsKey(state.GetId()))
            {
                UnityEngine.Debug.LogError("Key not found in the map: " + String.Join(",", this.nodes.Keys) + ", key = " + state.GetId());
            }
            MonteCarloNode node = this.nodes[state.GetId()];

            while (node.IsFullyExpanded() && !node.IsLeaf())
            {
                List <int> actions    = node.AllActions();
                int        bestAction = -1;
                double     bestUCB1   = Double.NegativeInfinity;

                foreach (int action in actions)
                {
                    double childUCB1 = node.ChildNode(action).GetUCB1(this.UCB1ExploreParam);
                    if (childUCB1 > bestUCB1)
                    {
                        bestAction = action;
                        bestUCB1   = childUCB1;
                    }
                }
                node = node.ChildNode(bestAction);
            }
            return(node);
        }
예제 #2
0
        public float CalculateMCTSActionReward(float[] vectorAction)
        {
            float          mctsReward  = 0;
            MonteCarloNode rootNode    = mcts.GetRootNode();
            List <int>     mctsActions = mcts.GetActionsWithoutMove(rootNode);
            ISet <int>     annActions  = mcts.ConvertMCTSActions(vectorAction);

            double MCTSBestUCB = Double.NegativeInfinity;
            double ANNBestUCB  = Double.NegativeInfinity;
            double UCBMin      = Double.PositiveInfinity;
            double UCB         = 0;

            // Find best UCB for MCTS and ANN actions
            foreach (int action in mctsActions)
            {
                MonteCarloNode childNode = rootNode.ChildNode(action);
                if (childNode != null)
                {
                    UCB = childNode.GetUCB1(UCB1ExploreParam);
                    // Set MCTS action max UCB
                    if (UCB > MCTSBestUCB)
                    {
                        MCTSBestUCB = UCB;
                    }
                    // Set ANN action max UCB
                    if (annActions.Contains(action) && UCB > ANNBestUCB)
                    {
                        ANNBestUCB = UCB;
                    }
                    // Set min UCB
                    if (UCB < UCBMin)
                    {
                        UCBMin = UCB;
                    }
                }
            }

            // No reward will be given if suitable action not found
            // Move actions eliminated here
            if (ANNBestUCB != Double.NegativeInfinity)
            {
                if (ANNBestUCB == MCTSBestUCB)
                {
                    mctsReward = 1;
                }
                else
                {
                    mctsReward = -1;
                }

                /*
                 * // Prevent divide by zero assign too little values
                 * UCBMin = UCBMin == MCTSBestUCB ? 0 : UCBMin;
                 * MCTSBestUCB = MCTSBestUCB == 0 ? 000000000.1d : MCTSBestUCB;
                 * ANNBestUCB = ANNBestUCB == 0 ? 000000000.1d : ANNBestUCB;
                 * // Normalize the ANN UCB [0,1] -> (currentValue - minValue) / (maxValue - minValue)
                 * double normalizedANNRate = (ANNBestUCB - UCBMin) / (MCTSBestUCB - UCBMin);
                 * double differenceFromMax = 1 - normalizedANNRate;
                 * double diffSquare = Math.Pow(differenceFromMax, 2);
                 * mctsReward = (float)(1.3d * Math.Exp(-5.84d * diffSquare) - 0.01d);
                 */
            }
            else if (mctsActions.Count > 0)
            {
                // Give negative reward for non move actions that mcts does not recommend
                mctsReward = -1f;
            }
            if (name.Equals(BattleArena.RED_AGENT_TAG) && mctsReward != 0)
            {
                Debug.Log(name + " " + mctsReward + " reward given: vectorActions=" + vectorAction[0] + "," + vectorAction[1] + "," + vectorAction[2] +
                          " convertedActions=" + String.Join(",", annActions) + " mctsActions=" + String.Join(",", mctsActions));
            }
            return(mctsReward);
        }