예제 #1
0
        override public int Search()
        {
            CtwContextTreeUndo undoInstance = new CtwContextTreeUndo(this);

            MonteCarloSearchNode searchTree = new MonteCarloSearchNode(MonteCarloSearchNode.DecisionNode);

            for (int i = 0; i < this.McSimulations; i++)
            {
                searchTree.Sample(this, this.Horizon);
                this.model_revert(undoInstance);
            }

            //searchTree.PrintBs();


            int    bestAction = -1;
            double bestMean   = double.NegativeInfinity;

            foreach (int action in this.Environment.ValidActions)
            {
                if (!searchTree.Children.ContainsKey(action))
                {
                    continue;
                }

                double mean = searchTree.Children[action].Mean + Utils.RandomDouble(0, 0.0001);
                if (mean > bestMean)
                {
                    bestMean   = mean;
                    bestAction = action;
                }
            }
            return(bestAction);
        }
예제 #2
0
        public void model_revert(CtwContextTreeUndo undoInstance)
        {
            while (this.history_size() > undoInstance.HistorySize)
            {
                if (this.LastUpdate == PerceptUpdate)
                {
                    this.ContextTree.revert_tree(this.Environment.perceptBits());

                    this.LastUpdate = ActionUpdate;
                }
                else
                {
                    this.ContextTree.revert_tree_history(this.Environment.actionBits());
                    //this.context_tree.revert_tree_history(this.environment.actionBits());

                    this.LastUpdate = PerceptUpdate;
                }
            }

            this.Age         = undoInstance.Age;
            this.TotalReward = undoInstance.TotalReward;
            this.LastUpdate  = undoInstance.LastUpdate;
        }
예제 #3
0
        public override int Search()
        {
            CtwContextTreeUndo undoInstance = new CtwContextTreeUndo(this);

            MonteCarloSearchNode searchTree = new MonteCarloSearchNode(MonteCarloSearchNode.DecisionNode);

            for (int i = 0; i < this.McSimulations; i++) {
                searchTree.Sample(this, this.Horizon);
                this.model_revert(undoInstance);
            }

            //searchTree.PrintBs();

            int bestAction=-1;
            double bestMean = double.NegativeInfinity;

            foreach (int action in this.Environment.ValidActions) {

                if (!searchTree.Children.ContainsKey(action)) {
                    continue;
                }

                double mean = searchTree.Children[action].Mean + Utils.RandomDouble(0, 0.0001);
                if (mean > bestMean) {
                    bestMean = mean;
                    bestAction = action;
                }
            }
            return bestAction;
        }
예제 #4
0
        public void model_revert(CtwContextTreeUndo undoInstance)
        {
            while (this.history_size()>undoInstance.HistorySize){
                if (this.LastUpdate == PerceptUpdate)
                {
                    this.ContextTree.revert_tree(this.Environment.perceptBits());

                    this.LastUpdate = ActionUpdate;
                }
                else {
                    this.ContextTree.revert_tree_history(this.Environment.actionBits());
                    //this.context_tree.revert_tree_history(this.environment.actionBits());

                    this.LastUpdate = PerceptUpdate;
                }
            }

            this.Age = undoInstance.Age;
            this.TotalReward = undoInstance.TotalReward;
            this.LastUpdate = undoInstance.LastUpdate;
        }