Ejemplo n.º 1
0
    public AIAgent(int player) : base(player)
    {
        playerNum   = player;
        opponentNum = playerNum == 1 ? 2 : 1;

        // Create a new, default QLearner. Usually want to replace this value for more control
        QLearner   = new QLearner();
        IsLearning = false;

        // Set strategy
        strategy = Strategy.StrategyWithType(playerNum, StrategyType.RunAway);


        level1Searcher = new DiscreteAdversarialSearch(playerNum,
                                                       strategy.Level1Heuristic,
                                                       getFillerAction,
                                                       getNewPathIndex,
                                                       Level1StepSize,
                                                       4);
        decisionTimer  = 0;
        level2Searcher = new AStar(Level2MaxNodesInPrioQueue, Level2MaxExpansions, strategy.Level2CostFunction,
                                   strategy.Level2GoalFunction, strategy.Level2HeuristicFunction);
        fillerAction           = WorldAction.NoAction;
        isFirstTime            = true;
        strategyTimer          = MaxStrategyTime;
        calculatePathNextFrame = false;
    }
Ejemplo n.º 2
0
    // The center of the AI - get an action
    override public List <WorldAction> GetAction(World world)
    {
        // The immediate action comes from level 1
        WorldAction bestAction = WorldAction.NoAction;

        // Update level 1 heuristic parameters
        World.Player player = playerNum == 1 ? world.Player1 : world.Player2;


        // Calculate new level 1 action if timer is up
        if (decisionTimer <= 0)
        {
            ActionWithFiller decision = level1Searcher.ComputeBestAction(world, fillerAction, strategy.NextPathIndex);
            bestAction   = decision.Action;
            fillerAction = decision.FillerAction;

            decisionTimer = Level1StepSize;

            // Otherwise do the filler action
        }
        else
        {
            bestAction = fillerAction;

            // Check distance to path
            bool doneWithPath = false;
            if (strategy.SearchPath != null)
            {
                doneWithPath = strategy.NextPathIndex >= strategy.SearchPath.States.Count - 1;
            }

            // Calculate the path if this frame has been designated to it
            if (calculatePathNextFrame)
            {
                // Run A*
                Path path = level2Searcher.ComputeBestPath(blockWorld);

                // Must be set before using the level 1 heuristic with a path
                strategy.SearchPath    = path;
                strategy.NextPathIndex = 0;
                calculatePathNextFrame = false;

                // If no path is able to be calculated, then check again sooner than normal
                if (path == null)
                {
                    strategyTimer = NoPathFoundRefreshTimer;
                }
            }
            else
            {
                // Compute a new strategy if the old one is no longer valid
                SimplifiedWorld currentState = new SimplifiedWorld(world, playerNum);

                if (isFirstTime ||
                    !previousState.IsEquivalent(currentState) ||
                    doneWithPath ||
                    dangerZoneShifted(world) ||
                    playerLeftPath(world, strategy.SearchPath) ||
                    strategyTimer <= 0 ||
                    world.IsTerminal())
                {
                    if (isFirstTime)
                    {
                        previousState = currentState;
                    }
                    isFirstTime = false;

                    // Get reward and update QValues if learning
                    if (IsLearning)
                    {
                        float reward = SimplifiedWorld.Reward(previousState, strategy.Type, currentState);
                        QLearner.UpdateQValue(previousState, strategy.Type, currentState, reward);

                        // Don't learn once world is terminal
                        if (world.IsTerminal())
                        {
                            IsLearning = false;
                        }
                    }

                    // Get a new strategy
                    StrategyType newStrategy = QLearner.GetStrategy(currentState);

#if STRATEGY_PRINT
                    Debug.Log("Player " + playerNum.ToString() + " selects strategy: " + newStrategy.ToString());
#endif
                    strategy = Strategy.StrategyWithType(playerNum, newStrategy);

                    level1Searcher = new DiscreteAdversarialSearch(playerNum,
                                                                   strategy.Level1Heuristic,
                                                                   getFillerAction,
                                                                   getNewPathIndex,
                                                                   Level1StepSize,
                                                                   4);
                    level2Searcher = new AStar(Level2MaxNodesInPrioQueue, Level2MaxExpansions, strategy.Level2CostFunction,
                                               strategy.Level2GoalFunction, strategy.Level2HeuristicFunction);

                    // Create block world and danger zone
                    blockWorld = new BlockWorld(playerNum, world);

                    // Recalc danger zone
                    dangerZone = new DangerZone(opponentNum, world, blockWorld);

                    // Must be set before using the level 2 reward, cost, and goal functions
                    strategy.Level2DangerZone = dangerZone;

                    // Calculate the path in the next frame
                    calculatePathNextFrame = true;

                    // Speeds up framerate after player has died
                    if (world.IsTerminal())
                    {
                        calculatePathNextFrame = false;
                    }

                    // Reset previous state
                    previousState = currentState;
                    strategyTimer = MaxStrategyTime;
                }
            }

            // Debug rendering of danger zone
#if DANGER_RENDER
            dangerZone.Render(ResourceScript);
            dangerZone.RenderPlayerBeliefs(ResourceScript);
#endif
        }

        // Advance path position
        strategy.NextPathIndex = getNewPathIndex(player, strategy.NextPathIndex);

        decisionTimer--;
        strategyTimer--;

#if PATH_RENDER
        if (strategy.SearchPath != null)
        {
            strategy.SearchPath.Render(ResourceScript, strategy.NextPathIndex);
        }
#endif

        // Return a single-valued list with the best action
        return(new List <WorldAction>()
        {
            bestAction
        });
    }