public static Rollout[] Winner(Policy policy, Policy adversary, bool storeRollout = false)
        {
            Policy[] teamPolicies = new[] { policy, adversary };

            return(WorldGenerator.AllMatchups().AsParallel().Select(matchup => {
                World current = WorldGenerator.GenerateInitial(matchup.Team0, matchup.Team1);

                // Debug.WriteLine("Simulating " + matchup.ToString());
                List <RolloutTick> ticks = storeRollout ? new List <RolloutTick>() : null;
                int?winner = null;
                while (winner == null)
                {
                    Dictionary <int, Tactic> heroTactics = storeRollout ? new Dictionary <int, Tactic>() : null;

                    Dictionary <int, GameAction> actions = new Dictionary <int, GameAction>();
                    Simulator.AddEnvironmentalActions(current, actions);
                    foreach (Unit unit in current.Units)
                    {
                        if (unit.UnitType == UnitType.Hero)
                        {
                            Tactic tactic = PolicyEvaluator.Evaluate(current, unit, teamPolicies[unit.Team]);
                            actions[unit.UnitId] = PolicyEvaluator.TacticToAction(current, unit, tactic);

                            heroTactics?.Add(unit.UnitId, tactic);
                        }
                    }
                    ticks?.Add(new RolloutTick {
                        HeroTactics = heroTactics,
                        World = current,
                    });

                    current = Simulator.Forward(current, actions);

                    winner = current.Winner();
                }

                double winRate;
                if (winner == 0)
                {
                    winRate = 1.0;
                }
                else if (winner == 1)
                {
                    winRate = 0.0;
                }
                else
                {
                    winRate = 0.5;
                }
                return new Rollout {
                    Matchup = matchup,
                    WinRate = winRate,
                    Ticks = ticks,
                    FinalWorld = storeRollout ? current : null,
                    FinalEvaluation = IntermediateEvaluator.Evaluate(current, 0),
                };
            }).ToArray());
        }
Esempio n. 2
0
        private double CalculatePolicyAccuracy(List <Episode> trainingSet, Policy policy)
        {
            double correct =
                trainingSet.AsParallel()
                .Sum(episode => PolicyEvaluator.Evaluate(episode.World, episode.Hero, policy) == episode.Tactic ? episode.Weight : 0);
            double total = trainingSet.Sum(ep => ep.Weight);

            return(correct / total);
        }
Esempio n. 3
0
        private static PartialRollout GenerateRollout(
            World current,
            Policy[] policies,
            int myTeam,
            int myHeroId,
            Tactic?initialTactic)
        {
            List <World> snapshots = new List <World> {
                current
            };
            List <Episode> episodes = new List <Episode>();

            for (int tick = 0; tick < NumLearningTicks + TicksForward; ++tick)
            {
                Dictionary <int, GameAction> actions = new Dictionary <int, GameAction>();
                Simulator.AddEnvironmentalActions(current, actions);

                foreach (Unit unit in current.Units)
                {
                    if (unit.UnitType == UnitType.Hero)
                    {
                        Tactic tactic;
                        if (tick == 0 && unit.UnitId == myHeroId && initialTactic.HasValue)
                        {
                            tactic = initialTactic.Value;
                        }
                        else
                        {
                            tactic = PolicyEvaluator.Evaluate(current, unit, policies[unit.Team]);
                        }

                        if (unit.UnitId == myHeroId && tick < NumLearningTicks)
                        {
                            episodes.Add(new Episode {
                                World  = current,
                                Hero   = unit,
                                Tactic = tactic,
                                Weight = 1.0,
                            });
                        }

                        actions[unit.UnitId] = PolicyEvaluator.TacticToAction(current, unit, tactic);
                    }
                }

                current = Simulator.Forward(current, actions);
                snapshots.Add(current);
            }

            List <IntermediateEvaluator.Evaluation> evaluations = snapshots.Select(world => IntermediateEvaluator.Evaluate(world, myTeam)).ToList();

            return(new PartialRollout {
                Episodes = episodes,
                Evaluations = evaluations,
                Score = DiscountedRewards(evaluations.Select(ev => ev.Score), DiscountRate),
            });
        }
        public static World Forward(World current, Policy[] teamPolicies)
        {
            Dictionary <int, GameAction> actions = new Dictionary <int, GameAction>();

            Simulator.AddEnvironmentalActions(current, actions);
            foreach (Unit unit in current.Units)
            {
                if (unit.UnitType == UnitType.Hero)
                {
                    Tactic tactic = PolicyEvaluator.Evaluate(current, unit, teamPolicies[unit.Team]);
                    actions[unit.UnitId] = PolicyEvaluator.TacticToAction(current, unit, tactic);
                    // Debug.WriteLine(string.Format("{0}> {1} team {2}: {3}: {4}", current.Tick, unit.HeroType, unit.Team, tactic, CodinGame.Program.FormatHeroAction(actions[unit.UnitId])));
                }
            }
            current = Simulator.Forward(current, actions);
            return(current);
        }
Esempio n. 5
0
 public ForbidUnauthenticatedPolicyEvaluator(PolicyEvaluator defaultEvaluator)
 {
     _defaultEvaluator = defaultEvaluator;
 }