MaximumReward() public method

public MaximumReward ( ) : int?
return int?
Esempio n. 1
0
        public int SelectAction(Agent agent)
        {
            Debug.Assert(agent.MaximumReward() != null, "this is weird place, - in selection action");

            double exploreBias          = (double)agent.Horizon * agent.MaximumReward().Value;
            double explorationNumerator = this.ExplorationConstant * Math.Log(this.Visits);
            int    bestAction           = -1;
            double bestPriority         = double.NegativeInfinity;

            foreach (int action in agent.Environment.ValidActions)
            {
                MonteCarloSearchNode node = null;
                if (this.Children.ContainsKey(action))
                {
                    node = this.Children[action];
                }
                double priority;
                if (node == null || node.Visits == 0)
                {
                    // previously unexplored node
                    priority = this.UnexploredBias;    //unexplored bias
                }
                else
                {
                    priority = node.Mean + exploreBias * Math.Sqrt(explorationNumerator / node.Visits);
                }

                if (priority > (bestPriority + Utils.RandomDouble(0, 0.001)))
                {
                    bestAction   = action;
                    bestPriority = priority;
                }
            }
            return(bestAction);
        }
Esempio n. 2
0
        public int SelectAction(Agent agent)
        {
            Debug.Assert(agent.MaximumReward() != null, "this is weird place, - in selection action");

            double exploreBias = (double)agent.Horizon * agent.MaximumReward().Value;
            double explorationNumerator = this.ExplorationConstant * Math.Log(this.Visits);
            int bestAction = -1;
            double bestPriority = double.NegativeInfinity;

            foreach (int action in agent.Environment.ValidActions) {
                MonteCarloSearchNode node=null;
                if (this.Children.ContainsKey(action)) {
                    node=this.Children[action];
                }
                double priority;
                if (node == null || node.Visits == 0) {
                    // previously unexplored node
                    priority = this.UnexploredBias;    //unexplored bias
                }
                else{
                    priority = node.Mean + exploreBias * Math.Sqrt(explorationNumerator / node.Visits);
                }

                if (priority > (bestPriority+Utils.RandomDouble(0, 0.001))){
                    bestAction=action;
                    bestPriority=priority;
                }

            }
            return bestAction;
        }