override public int Search() { CtwContextTreeUndo undoInstance = new CtwContextTreeUndo(this); MonteCarloSearchNode searchTree = new MonteCarloSearchNode(MonteCarloSearchNode.DecisionNode); for (int i = 0; i < this.McSimulations; i++) { searchTree.Sample(this, this.Horizon); this.model_revert(undoInstance); } //searchTree.PrintBs(); int bestAction = -1; double bestMean = double.NegativeInfinity; foreach (int action in this.Environment.ValidActions) { if (!searchTree.Children.ContainsKey(action)) { continue; } double mean = searchTree.Children[action].Mean + Utils.RandomDouble(0, 0.0001); if (mean > bestMean) { bestMean = mean; bestAction = action; } } return(bestAction); }
public int SelectAction(Agent agent) { Debug.Assert(agent.MaximumReward() != null, "this is weird place, - in selection action"); double exploreBias = (double)agent.Horizon * agent.MaximumReward().Value; double explorationNumerator = this.ExplorationConstant * Math.Log(this.Visits); int bestAction = -1; double bestPriority = double.NegativeInfinity; foreach (int action in agent.Environment.ValidActions) { MonteCarloSearchNode node = null; if (this.Children.ContainsKey(action)) { node = this.Children[action]; } double priority; if (node == null || node.Visits == 0) { // previously unexplored node priority = this.UnexploredBias; //unexplored bias } else { priority = node.Mean + exploreBias * Math.Sqrt(explorationNumerator / node.Visits); } if (priority > (bestPriority + Utils.RandomDouble(0, 0.001))) { bestAction = action; bestPriority = priority; } } return(bestAction); }
override public double Playout(int horizon) { return(horizon * Utils.RandomDouble((int)this.Environment.minimum_reward(), (int)this.Environment.maximum_reward())); }