Exemplo n.º 1
0
        public static PlayerTask selectSimulationPolicy(string simulationPolicy, POGame poGame, Random Rnd, ParametricGreedyAgent greedyAgent,
                                                        double CHILDREN_CONSIDERED_SIMULATING)
        {
            PlayerTask task;

            switch (simulationPolicy)
            {
            case "RandomPolicy":
                task = randomTask(poGame, Rnd);
                break;

            case "GreedyPolicy":
                task = greedyTask(poGame, greedyAgent, Rnd, CHILDREN_CONSIDERED_SIMULATING);
                break;

            default:
                task = null;
                break;
            }

            return(task);
        }
Exemplo n.º 2
0
        public static double ucb1Heuristic(Node node, int iterations, double EXPLORE_CONSTANT, ref POGame.POGame poGame, double SCORE_IMPORTANCE, ParametricGreedyAgent greedyAgent)
        {
            double value;

            if (node.timesVisited > 0)
            {
                List <PlayerTask> taskToSimulate = new List <PlayerTask>();
                taskToSimulate.Add(node.task);

                POGame.POGame stateAfterSimulate = poGame.Simulate(taskToSimulate)[node.task];

                double score = greedyAgent.scoreTask(poGame, stateAfterSimulate);
                value = (node.totalValue / (double)node.timesVisited) + EXPLORE_CONSTANT * Math.Sqrt(Math.Log(iterations) / node.timesVisited) + SCORE_IMPORTANCE * (score / (double)node.timesVisited);
            }
            else
            {
                value = Double.MaxValue;
            }
            return(value);
        }