/* * Phase 1: Selection * Select until EITHER not fully expanded OR leaf node */ public MonteCarloNode Select(GameState state) { if (!this.nodes.ContainsKey(state.GetId())) { UnityEngine.Debug.LogError("Key not found in the map: " + String.Join(",", this.nodes.Keys) + ", key = " + state.GetId()); } MonteCarloNode node = this.nodes[state.GetId()]; while (node.IsFullyExpanded() && !node.IsLeaf()) { List <int> actions = node.AllActions(); int bestAction = -1; double bestUCB1 = Double.NegativeInfinity; foreach (int action in actions) { double childUCB1 = node.ChildNode(action).GetUCB1(this.UCB1ExploreParam); if (childUCB1 > bestUCB1) { bestAction = action; bestUCB1 = childUCB1; } } node = node.ChildNode(bestAction); } return(node); }
public float CalculateMCTSActionReward(float[] vectorAction) { float mctsReward = 0; MonteCarloNode rootNode = mcts.GetRootNode(); List <int> mctsActions = mcts.GetActionsWithoutMove(rootNode); ISet <int> annActions = mcts.ConvertMCTSActions(vectorAction); double MCTSBestUCB = Double.NegativeInfinity; double ANNBestUCB = Double.NegativeInfinity; double UCBMin = Double.PositiveInfinity; double UCB = 0; // Find best UCB for MCTS and ANN actions foreach (int action in mctsActions) { MonteCarloNode childNode = rootNode.ChildNode(action); if (childNode != null) { UCB = childNode.GetUCB1(UCB1ExploreParam); // Set MCTS action max UCB if (UCB > MCTSBestUCB) { MCTSBestUCB = UCB; } // Set ANN action max UCB if (annActions.Contains(action) && UCB > ANNBestUCB) { ANNBestUCB = UCB; } // Set min UCB if (UCB < UCBMin) { UCBMin = UCB; } } } // No reward will be given if suitable action not found // Move actions eliminated here if (ANNBestUCB != Double.NegativeInfinity) { if (ANNBestUCB == MCTSBestUCB) { mctsReward = 1; } else { mctsReward = -1; } /* * // Prevent divide by zero assign too little values * UCBMin = UCBMin == MCTSBestUCB ? 0 : UCBMin; * MCTSBestUCB = MCTSBestUCB == 0 ? 000000000.1d : MCTSBestUCB; * ANNBestUCB = ANNBestUCB == 0 ? 000000000.1d : ANNBestUCB; * // Normalize the ANN UCB [0,1] -> (currentValue - minValue) / (maxValue - minValue) * double normalizedANNRate = (ANNBestUCB - UCBMin) / (MCTSBestUCB - UCBMin); * double differenceFromMax = 1 - normalizedANNRate; * double diffSquare = Math.Pow(differenceFromMax, 2); * mctsReward = (float)(1.3d * Math.Exp(-5.84d * diffSquare) - 0.01d); */ } else if (mctsActions.Count > 0) { // Give negative reward for non move actions that mcts does not recommend mctsReward = -1f; } if (name.Equals(BattleArena.RED_AGENT_TAG) && mctsReward != 0) { Debug.Log(name + " " + mctsReward + " reward given: vectorActions=" + vectorAction[0] + "," + vectorAction[1] + "," + vectorAction[2] + " convertedActions=" + String.Join(",", annActions) + " mctsActions=" + String.Join(",", mctsActions)); } return(mctsReward); }