public static Rollout[] Winner(Policy policy, Policy adversary, bool storeRollout = false)
        {
            Policy[] teamPolicies = new[] { policy, adversary };

            return(WorldGenerator.AllMatchups().AsParallel().Select(matchup => {
                World current = WorldGenerator.GenerateInitial(matchup.Team0, matchup.Team1);

                // Debug.WriteLine("Simulating " + matchup.ToString());
                List <RolloutTick> ticks = storeRollout ? new List <RolloutTick>() : null;
                int?winner = null;
                while (winner == null)
                {
                    Dictionary <int, Tactic> heroTactics = storeRollout ? new Dictionary <int, Tactic>() : null;

                    Dictionary <int, GameAction> actions = new Dictionary <int, GameAction>();
                    Simulator.AddEnvironmentalActions(current, actions);
                    foreach (Unit unit in current.Units)
                    {
                        if (unit.UnitType == UnitType.Hero)
                        {
                            Tactic tactic = PolicyEvaluator.Evaluate(current, unit, teamPolicies[unit.Team]);
                            actions[unit.UnitId] = PolicyEvaluator.TacticToAction(current, unit, tactic);

                            heroTactics?.Add(unit.UnitId, tactic);
                        }
                    }
                    ticks?.Add(new RolloutTick {
                        HeroTactics = heroTactics,
                        World = current,
                    });

                    current = Simulator.Forward(current, actions);

                    winner = current.Winner();
                }

                double winRate;
                if (winner == 0)
                {
                    winRate = 1.0;
                }
                else if (winner == 1)
                {
                    winRate = 0.0;
                }
                else
                {
                    winRate = 0.5;
                }
                return new Rollout {
                    Matchup = matchup,
                    WinRate = winRate,
                    Ticks = ticks,
                    FinalWorld = storeRollout ? current : null,
                    FinalEvaluation = IntermediateEvaluator.Evaluate(current, 0),
                };
            }).ToArray());
        }
Exemple #2
0
        private double IntermediateWinner(Rollout rollout)
        {
            double team0 = IntermediateEvaluator.Evaluate(rollout.FinalWorld, 0).Score;
            double team1 = IntermediateEvaluator.Evaluate(rollout.FinalWorld, 1).Score;

            if (team0 > team1)
            {
                return(1);
            }
            else if (team0 < team1)
            {
                return(0);
            }
            else
            {
                return(0.5);
            }
        }
Exemple #3
0
        private static PartialRollout GenerateRollout(
            World current,
            Policy[] policies,
            int myTeam,
            int myHeroId,
            Tactic?initialTactic)
        {
            List <World> snapshots = new List <World> {
                current
            };
            List <Episode> episodes = new List <Episode>();

            for (int tick = 0; tick < NumLearningTicks + TicksForward; ++tick)
            {
                Dictionary <int, GameAction> actions = new Dictionary <int, GameAction>();
                Simulator.AddEnvironmentalActions(current, actions);

                foreach (Unit unit in current.Units)
                {
                    if (unit.UnitType == UnitType.Hero)
                    {
                        Tactic tactic;
                        if (tick == 0 && unit.UnitId == myHeroId && initialTactic.HasValue)
                        {
                            tactic = initialTactic.Value;
                        }
                        else
                        {
                            tactic = PolicyEvaluator.Evaluate(current, unit, policies[unit.Team]);
                        }

                        if (unit.UnitId == myHeroId && tick < NumLearningTicks)
                        {
                            episodes.Add(new Episode {
                                World  = current,
                                Hero   = unit,
                                Tactic = tactic,
                                Weight = 1.0,
                            });
                        }

                        actions[unit.UnitId] = PolicyEvaluator.TacticToAction(current, unit, tactic);
                    }
                }

                current = Simulator.Forward(current, actions);
                snapshots.Add(current);
            }

            List <IntermediateEvaluator.Evaluation> evaluations = snapshots.Select(world => IntermediateEvaluator.Evaluate(world, myTeam)).ToList();

            return(new PartialRollout {
                Episodes = episodes,
                Evaluations = evaluations,
                Score = DiscountedRewards(evaluations.Select(ev => ev.Score), DiscountRate),
            });
        }