Пример #1
0
    // A commander with 2 planes with an abstract command list just says "First plane does this, second plane does that" without any regard for what the plane actually is.
    // The Q-learning algorithm learns in this way with the understanding that what each of those "first" and second "plane" (s) are is stored as the game state


    /// <summary>
    /// Convert command list with specific Aircraft into an abstract command list that just references them by this commander's list of planes
    /// </summary>
    /// <param name="commander"></param>
    /// <param name="commands"></param>
    /// <returns></returns>
    CommandListAbstract CommandListConcreteToAbstract(CommandList commands, Commander commander)
    {
        CommandListAbstract abstractCommands = new CommandListAbstract();

        for (int i = 0; i < commander.unitsCommanded.Count; i++)
        {
            abstractCommands[i] = commands[commander.unitsCommanded[i]].id;
        }
        return(abstractCommands);
    }
Пример #2
0
    /// <summary>
    /// Convert abstract command list that just references them by this commander's list of planes into a command list with specific Aircraft
    /// </summary>
    /// <param name="commander"></param>
    /// <param name="commands"></param>
    /// <returns></returns>
    CommandList CommandListAbstractToConcrete(CommandListAbstract commands, Commander commander)
    {
        CommandList concreteCommands = new CommandList();

        for (int i = 0; i < commands.Count; i++)
        {
            Maneuver maneuver = Maneuver.BasicManeuvers[(short)commands[i]];
            concreteCommands.Add(commander.unitsCommanded[i], maneuver);
        }
        return(concreteCommands);
    }
Пример #3
0
    /// <summary>
    /// Chooses moves for commander
    /// </summary>
    /// <param name="commander"></param>
    /// <param name="randomness"></param>
    /// <param name="difficultyLevel"></param> difficulty from 0 to 1, 1 meaning always choose the best move. 0 meaning always choose the worst move
    void SelectMove(ref Commander commander, float randomness, float difficultyLevel)
    {
        if (policy == null)
        {
            Debug.Log("No Policy!");
            return;
        }

        if (policy[gameState].Count == 0)
        {
            Debug.Log("Policy does not have any actions for state: " + gameState);
            return;
        }

        List <Tuple <CommandListAbstract, float> > potentialActions = policy[gameState];
        int chosenActionIndex;

        if (policy[gameState].Count == 0)
        {
            Debug.Log("Policy does not have any actions for state: " + gameState);
        }
        float randomFloat = UnityEngine.Random.Range(0.0f, 1.0f);

        if (randomFloat <= randomness)
        {
            // choose a random available action
            Debug.Log("random action");
            chosenActionIndex = UnityEngine.Random.Range(0, potentialActions.Count);
        }
        else
        {
            //May choose second, third, fourth best action etc. based on "AI difficulty factor"
            chosenActionIndex = Mathf.FloorToInt(potentialActions.Count * (1.0f - difficultyLevel));
        }

        CommandListAbstract actionToTake = potentialActions[chosenActionIndex].Item1;

        commander.commands = CommandListAbstractToConcrete(actionToTake, commander);


        Debug.Log("Commanded " + commander.unitsCommanded[0].callsign + "To do Maneuver: " + actionToTake[0].ToString());

        lastActionIndex = chosenActionIndex;
        lastState       = gameState;

        if (isTraining)
        {
            roundCountThisEpisode++;
        }

        commander.SubmitCommands();
    }
Пример #4
0
    /// <summary>
    /// Must enumerate every possible action to take from every possible state, where 1 action is a set of commands given to units. FOR NOW IT ONLY COMMANDS ONE UNIT!!!!
    /// </summary>
    /// <param name="c"></param>
    void InitializePolicy()
    {
        Commander c = commandersToControl[0];

        Debug.Log("Creating policy...");

        float initialQValue = 0.8f;

        policy = new List <Tuple <CommandListAbstract, float> > [stateSpace.states.Count];

        //for (int s = 0; s < stateSpace.Length; s++)
        for (int s = 0; s < stateSpace.states.Count; s++)
        {
            List <Tuple <CommandListAbstract, float> > commandLists = new List <Tuple <CommandListAbstract, float> >();

            // available options for each plane
            List <Maneuver>[] availableManeuvers = new List <Maneuver> [c.unitsOwned.Count];

            // enumerate possible maneuvers for each plane controlled
            for (int unitIdx = 0; unitIdx < c.unitsOwned.Count; unitIdx++)
            {
                var aircraft           = stateSpace.GetAircraftAtState(s);
                var potentialManeuvers = gameController.GetAvailableManeuvers(aircraft);
                availableManeuvers[unitIdx] = potentialManeuvers;
            }

            // get all possible combinations of maneuvers for ONE plane
            for (int unitIdx = 0; unitIdx < c.unitsOwned.Count; unitIdx++)
            {
                if (c.unitsOwned.Count > 1)
                {
                    Debug.LogError("Whoops, did not implement Policy creation for commanding multiple units yet...");
                }
                var maneuvers = availableManeuvers[unitIdx];
                for (int maneuverIdx = 0; maneuverIdx < maneuvers.Count; maneuverIdx++)
                {
                    CommandListAbstract action = new CommandListAbstract();
                    action.Add(unitIdx, maneuvers[maneuverIdx].id);
                    commandLists.Add(new Tuple <CommandListAbstract, float>(action, initialQValue));
                }
            }
            policy[s] = commandLists;

            //Debug.Log("Policy creation progress: " + s + "/" + stateSpace.states.Count);
        }

        Debug.Log("Policy creation finished!");
    }