public int SelectAction(Agent agent) { Debug.Assert(agent.MaximumReward() != null, "this is weird place, - in selection action"); double exploreBias = (double)agent.Horizon * agent.MaximumReward().Value; double explorationNumerator = this.ExplorationConstant * Math.Log(this.Visits); int bestAction = -1; double bestPriority = double.NegativeInfinity; foreach (int action in agent.Environment.ValidActions) { MonteCarloSearchNode node = null; if (this.Children.ContainsKey(action)) { node = this.Children[action]; } double priority; if (node == null || node.Visits == 0) { // previously unexplored node priority = this.UnexploredBias; //unexplored bias } else { priority = node.Mean + exploreBias * Math.Sqrt(explorationNumerator / node.Visits); } if (priority > (bestPriority + Utils.RandomDouble(0, 0.001))) { bestAction = action; bestPriority = priority; } } return(bestAction); }
public int SelectAction(Agent agent) { Debug.Assert(agent.MaximumReward() != null, "this is weird place, - in selection action"); double exploreBias = (double)agent.Horizon * agent.MaximumReward().Value; double explorationNumerator = this.ExplorationConstant * Math.Log(this.Visits); int bestAction = -1; double bestPriority = double.NegativeInfinity; foreach (int action in agent.Environment.ValidActions) { MonteCarloSearchNode node=null; if (this.Children.ContainsKey(action)) { node=this.Children[action]; } double priority; if (node == null || node.Visits == 0) { // previously unexplored node priority = this.UnexploredBias; //unexplored bias } else{ priority = node.Mean + exploreBias * Math.Sqrt(explorationNumerator / node.Visits); } if (priority > (bestPriority+Utils.RandomDouble(0, 0.001))){ bestAction=action; bestPriority=priority; } } return bestAction; }