override public int Search() { CtwContextTreeUndo undoInstance = new CtwContextTreeUndo(this); MonteCarloSearchNode searchTree = new MonteCarloSearchNode(MonteCarloSearchNode.DecisionNode); for (int i = 0; i < this.McSimulations; i++) { searchTree.Sample(this, this.Horizon); this.model_revert(undoInstance); } //searchTree.PrintBs(); int bestAction = -1; double bestMean = double.NegativeInfinity; foreach (int action in this.Environment.ValidActions) { if (!searchTree.Children.ContainsKey(action)) { continue; } double mean = searchTree.Children[action].Mean + Utils.RandomDouble(0, 0.0001); if (mean > bestMean) { bestMean = mean; bestAction = action; } } return(bestAction); }
public void model_revert(CtwContextTreeUndo undoInstance) { while (this.history_size() > undoInstance.HistorySize) { if (this.LastUpdate == PerceptUpdate) { this.ContextTree.revert_tree(this.Environment.perceptBits()); this.LastUpdate = ActionUpdate; } else { this.ContextTree.revert_tree_history(this.Environment.actionBits()); //this.context_tree.revert_tree_history(this.environment.actionBits()); this.LastUpdate = PerceptUpdate; } } this.Age = undoInstance.Age; this.TotalReward = undoInstance.TotalReward; this.LastUpdate = undoInstance.LastUpdate; }
public override int Search() { CtwContextTreeUndo undoInstance = new CtwContextTreeUndo(this); MonteCarloSearchNode searchTree = new MonteCarloSearchNode(MonteCarloSearchNode.DecisionNode); for (int i = 0; i < this.McSimulations; i++) { searchTree.Sample(this, this.Horizon); this.model_revert(undoInstance); } //searchTree.PrintBs(); int bestAction=-1; double bestMean = double.NegativeInfinity; foreach (int action in this.Environment.ValidActions) { if (!searchTree.Children.ContainsKey(action)) { continue; } double mean = searchTree.Children[action].Mean + Utils.RandomDouble(0, 0.0001); if (mean > bestMean) { bestMean = mean; bestAction = action; } } return bestAction; }
public void model_revert(CtwContextTreeUndo undoInstance) { while (this.history_size()>undoInstance.HistorySize){ if (this.LastUpdate == PerceptUpdate) { this.ContextTree.revert_tree(this.Environment.perceptBits()); this.LastUpdate = ActionUpdate; } else { this.ContextTree.revert_tree_history(this.Environment.actionBits()); //this.context_tree.revert_tree_history(this.environment.actionBits()); this.LastUpdate = PerceptUpdate; } } this.Age = undoInstance.Age; this.TotalReward = undoInstance.TotalReward; this.LastUpdate = undoInstance.LastUpdate; }