public static Rollout[] Winner(Policy policy, Policy adversary, bool storeRollout = false) { Policy[] teamPolicies = new[] { policy, adversary }; return(WorldGenerator.AllMatchups().AsParallel().Select(matchup => { World current = WorldGenerator.GenerateInitial(matchup.Team0, matchup.Team1); // Debug.WriteLine("Simulating " + matchup.ToString()); List <RolloutTick> ticks = storeRollout ? new List <RolloutTick>() : null; int?winner = null; while (winner == null) { Dictionary <int, Tactic> heroTactics = storeRollout ? new Dictionary <int, Tactic>() : null; Dictionary <int, GameAction> actions = new Dictionary <int, GameAction>(); Simulator.AddEnvironmentalActions(current, actions); foreach (Unit unit in current.Units) { if (unit.UnitType == UnitType.Hero) { Tactic tactic = PolicyEvaluator.Evaluate(current, unit, teamPolicies[unit.Team]); actions[unit.UnitId] = PolicyEvaluator.TacticToAction(current, unit, tactic); heroTactics?.Add(unit.UnitId, tactic); } } ticks?.Add(new RolloutTick { HeroTactics = heroTactics, World = current, }); current = Simulator.Forward(current, actions); winner = current.Winner(); } double winRate; if (winner == 0) { winRate = 1.0; } else if (winner == 1) { winRate = 0.0; } else { winRate = 0.5; } return new Rollout { Matchup = matchup, WinRate = winRate, Ticks = ticks, FinalWorld = storeRollout ? current : null, FinalEvaluation = IntermediateEvaluator.Evaluate(current, 0), }; }).ToArray()); }
private double CalculatePolicyAccuracy(List <Episode> trainingSet, Policy policy) { double correct = trainingSet.AsParallel() .Sum(episode => PolicyEvaluator.Evaluate(episode.World, episode.Hero, policy) == episode.Tactic ? episode.Weight : 0); double total = trainingSet.Sum(ep => ep.Weight); return(correct / total); }
private static PartialRollout GenerateRollout( World current, Policy[] policies, int myTeam, int myHeroId, Tactic?initialTactic) { List <World> snapshots = new List <World> { current }; List <Episode> episodes = new List <Episode>(); for (int tick = 0; tick < NumLearningTicks + TicksForward; ++tick) { Dictionary <int, GameAction> actions = new Dictionary <int, GameAction>(); Simulator.AddEnvironmentalActions(current, actions); foreach (Unit unit in current.Units) { if (unit.UnitType == UnitType.Hero) { Tactic tactic; if (tick == 0 && unit.UnitId == myHeroId && initialTactic.HasValue) { tactic = initialTactic.Value; } else { tactic = PolicyEvaluator.Evaluate(current, unit, policies[unit.Team]); } if (unit.UnitId == myHeroId && tick < NumLearningTicks) { episodes.Add(new Episode { World = current, Hero = unit, Tactic = tactic, Weight = 1.0, }); } actions[unit.UnitId] = PolicyEvaluator.TacticToAction(current, unit, tactic); } } current = Simulator.Forward(current, actions); snapshots.Add(current); } List <IntermediateEvaluator.Evaluation> evaluations = snapshots.Select(world => IntermediateEvaluator.Evaluate(world, myTeam)).ToList(); return(new PartialRollout { Episodes = episodes, Evaluations = evaluations, Score = DiscountedRewards(evaluations.Select(ev => ev.Score), DiscountRate), }); }
public static World Forward(World current, Policy[] teamPolicies) { Dictionary <int, GameAction> actions = new Dictionary <int, GameAction>(); Simulator.AddEnvironmentalActions(current, actions); foreach (Unit unit in current.Units) { if (unit.UnitType == UnitType.Hero) { Tactic tactic = PolicyEvaluator.Evaluate(current, unit, teamPolicies[unit.Team]); actions[unit.UnitId] = PolicyEvaluator.TacticToAction(current, unit, tactic); // Debug.WriteLine(string.Format("{0}> {1} team {2}: {3}: {4}", current.Tick, unit.HeroType, unit.Team, tactic, CodinGame.Program.FormatHeroAction(actions[unit.UnitId]))); } } current = Simulator.Forward(current, actions); return(current); }