public static PlayerTask selectSimulationPolicy(string simulationPolicy, POGame poGame, Random Rnd, ParametricGreedyAgent greedyAgent, double CHILDREN_CONSIDERED_SIMULATING) { PlayerTask task; switch (simulationPolicy) { case "RandomPolicy": task = randomTask(poGame, Rnd); break; case "GreedyPolicy": task = greedyTask(poGame, greedyAgent, Rnd, CHILDREN_CONSIDERED_SIMULATING); break; default: task = null; break; } return(task); }
public static double ucb1Heuristic(Node node, int iterations, double EXPLORE_CONSTANT, ref POGame.POGame poGame, double SCORE_IMPORTANCE, ParametricGreedyAgent greedyAgent) { double value; if (node.timesVisited > 0) { List <PlayerTask> taskToSimulate = new List <PlayerTask>(); taskToSimulate.Add(node.task); POGame.POGame stateAfterSimulate = poGame.Simulate(taskToSimulate)[node.task]; double score = greedyAgent.scoreTask(poGame, stateAfterSimulate); value = (node.totalValue / (double)node.timesVisited) + EXPLORE_CONSTANT * Math.Sqrt(Math.Log(iterations) / node.timesVisited) + SCORE_IMPORTANCE * (score / (double)node.timesVisited); } else { value = Double.MaxValue; } return(value); }