override public int Search() { CtwContextTreeUndo undoInstance = new CtwContextTreeUndo(this); MonteCarloSearchNode searchTree = new MonteCarloSearchNode(MonteCarloSearchNode.DecisionNode); for (int i = 0; i < this.McSimulations; i++) { searchTree.Sample(this, this.Horizon); this.model_revert(undoInstance); } //searchTree.PrintBs(); int bestAction = -1; double bestMean = double.NegativeInfinity; foreach (int action in this.Environment.ValidActions) { if (!searchTree.Children.ContainsKey(action)) { continue; } double mean = searchTree.Children[action].Mean + Utils.RandomDouble(0, 0.0001); if (mean > bestMean) { bestMean = mean; bestAction = action; } } return(bestAction); }
public double Sample(Agent agent, int horizon) { double reward = 0.0; if (horizon == 0) { return((int)reward); } else if (this.Type == ChanceNode) { var percept = agent.GeneratePerceptAndUpdate(); int observation = percept.Item1; int randomReward = percept.Item2; if (!this.Children.ContainsKey(observation)) //new node ->add it as decision node { this.Children[observation] = new MonteCarloSearchNode(DecisionNode); } MonteCarloSearchNode observationChild = this.Children[observation]; reward = randomReward + observationChild.Sample(agent, horizon - 1); } else if (this.Visits == 0) //unvisited decision node or we have exceeded maximum tree depth { reward = agent.Playout(horizon); // Console.WriteLine("from playout: reward ="+reward); } else //Previously visited decision node { int actionNullable = this.SelectAction(agent); int action = actionNullable; agent.ModelUpdateAction(action); if (!this.Children.ContainsKey(action)) //this action is new chance child { this.Children[action] = new MonteCarloSearchNode(ChanceNode); } MonteCarloSearchNode actionChild = this.Children[action]; reward = actionChild.Sample(agent, horizon); //it is not clear if not horizon-1. (asks pyaixi) } double visitsDouble = this.Visits; //Console.WriteLine("> {3} - {0}, {1}, {2}", this.mean, reward, (reward + (visitsDouble * this.mean) / (visitsDouble + 1.0)), visitsDouble); this.Mean = (reward + (visitsDouble * this.Mean)) / (1.0 + visitsDouble); this.Visits = this.Visits + 1; return(reward); }
public override int Search() { CtwContextTreeUndo undoInstance = new CtwContextTreeUndo(this); MonteCarloSearchNode searchTree = new MonteCarloSearchNode(MonteCarloSearchNode.DecisionNode); for (int i = 0; i < this.McSimulations; i++) { searchTree.Sample(this, this.Horizon); this.model_revert(undoInstance); } //searchTree.PrintBs(); int bestAction=-1; double bestMean = double.NegativeInfinity; foreach (int action in this.Environment.ValidActions) { if (!searchTree.Children.ContainsKey(action)) { continue; } double mean = searchTree.Children[action].Mean + Utils.RandomDouble(0, 0.0001); if (mean > bestMean) { bestMean = mean; bestAction = action; } } return bestAction; }