public void TestAgainstPolicy1() { Rollout[] results = RolloutPerformer.Winner(PolicyProvider.Policy, SubmittedPolicyProvider.Policy1); double winRate = results.Average(x => x.WinRate); Assert.IsTrue(winRate > 0.5); }
public void TestEqualMatch() { Rollout[] results = RolloutPerformer.Winner(SubmittedPolicyProvider.Policy1, SubmittedPolicyProvider.Policy1) .OrderBy(x => x.Matchup.Team0[0]) .ThenBy(x => x.Matchup.Team0[1]) .ThenBy(x => x.Matchup.Team1[0]) .ThenBy(x => x.Matchup.Team1[1]) .ToArray(); /* * foreach (MatchupResult result in results) { * Debug.WriteLine( * "{0}\t{1}\t{2}\t{3}\t{4}", * result.Matchup.Team0[0], * result.Matchup.Team0[1], * result.Matchup.Team1[0], * result.Matchup.Team1[1], * result.WinRate); * } */ double winRate = results.Average(x => x.WinRate); Assert.AreEqual(0.5, winRate, 0.05); }
private PolicyCandidate EvaluatePolicy(Policy policy, Policy incumbent) { Rollout[] rollouts = RolloutPerformer.Rollout(policy, incumbent); return(new PolicyCandidate { Policy = policy, Rollouts = rollouts, WinRate = rollouts.Average(r => IntermediateWinner(r)), }); }
public static void Main(string[] args) { #if DEBUGCUDA Telogis.RouteCloud.GPUManagement.KernelManager.SynchroniseAfterEveryKernel = true; #endif string outputPath = args[0]; Console.WriteLine("Environment.ProcessorCount: " + Environment.ProcessorCount); Console.WriteLine("64-bit process? " + Environment.Is64BitProcess); Console.WriteLine("World.NumHeroesPerTeam = " + World.NumHeroesPerTeam); Console.WriteLine("World.SpawnMinions = " + World.SpawnMinions); Console.WriteLine("World.EnableBuySell = " + World.EnableBuySell); Console.WriteLine("World.EnableSpells = " + World.EnableSpells); using (Trainer trainer = new Trainer()) { Policy policy = PolicyProvider.Policy; Policy[] incumbents = SubmittedPolicyProvider.Submissions.ToArray(); double[] incumbentWinRates = incumbents .Select(oldPolicy => RolloutPerformer.Winner(policy, oldPolicy).Average(x => x.WinRate)).ToArray(); Console.WriteLine("Initial win rates: " + string.Join(" ", incumbentWinRates)); int improvements = 0; for (int iteration = 0; ; ++iteration) { Policy incumbent = policy; Console.WriteLine(string.Format("Iteration #{0}", iteration)); TrainingResult trainingResult = trainer.StrengthenPolicy(policy, incumbent); policy = trainingResult.Policy; List <Rollout[]> incumbentResults = incumbents.Select(oldPolicy => RolloutPerformer.Winner(policy, oldPolicy)).ToList(); incumbentWinRates = incumbentResults.Select(submissionResult => submissionResult.Average(x => x.WinRate)).ToArray(); Console.WriteLine("Against incumbents: " + string.Join(" ", incumbentWinRates)); policy.HeroMatchups = MatchupOptimizer.Optimize(incumbentResults.SelectMany(x => x)); if (trainingResult.IsImprovement) { ++improvements; Console.WriteLine(string.Format("Improvement #{0}", improvements)); } if (outputPath != null) { File.WriteAllText(outputPath, PolicySerializer.Serialize(policy)); } Console.WriteLine(); } } }