// A commander with 2 planes with an abstract command list just says "First plane does this, second plane does that" without any regard for what the plane actually is. // The Q-learning algorithm learns in this way with the understanding that what each of those "first" and second "plane" (s) are is stored as the game state /// <summary> /// Convert command list with specific Aircraft into an abstract command list that just references them by this commander's list of planes /// </summary> /// <param name="commander"></param> /// <param name="commands"></param> /// <returns></returns> CommandListAbstract CommandListConcreteToAbstract(CommandList commands, Commander commander) { CommandListAbstract abstractCommands = new CommandListAbstract(); for (int i = 0; i < commander.unitsCommanded.Count; i++) { abstractCommands[i] = commands[commander.unitsCommanded[i]].id; } return(abstractCommands); }
/// <summary> /// Convert abstract command list that just references them by this commander's list of planes into a command list with specific Aircraft /// </summary> /// <param name="commander"></param> /// <param name="commands"></param> /// <returns></returns> CommandList CommandListAbstractToConcrete(CommandListAbstract commands, Commander commander) { CommandList concreteCommands = new CommandList(); for (int i = 0; i < commands.Count; i++) { Maneuver maneuver = Maneuver.BasicManeuvers[(short)commands[i]]; concreteCommands.Add(commander.unitsCommanded[i], maneuver); } return(concreteCommands); }
/// <summary> /// Chooses moves for commander /// </summary> /// <param name="commander"></param> /// <param name="randomness"></param> /// <param name="difficultyLevel"></param> difficulty from 0 to 1, 1 meaning always choose the best move. 0 meaning always choose the worst move void SelectMove(ref Commander commander, float randomness, float difficultyLevel) { if (policy == null) { Debug.Log("No Policy!"); return; } if (policy[gameState].Count == 0) { Debug.Log("Policy does not have any actions for state: " + gameState); return; } List <Tuple <CommandListAbstract, float> > potentialActions = policy[gameState]; int chosenActionIndex; if (policy[gameState].Count == 0) { Debug.Log("Policy does not have any actions for state: " + gameState); } float randomFloat = UnityEngine.Random.Range(0.0f, 1.0f); if (randomFloat <= randomness) { // choose a random available action Debug.Log("random action"); chosenActionIndex = UnityEngine.Random.Range(0, potentialActions.Count); } else { //May choose second, third, fourth best action etc. based on "AI difficulty factor" chosenActionIndex = Mathf.FloorToInt(potentialActions.Count * (1.0f - difficultyLevel)); } CommandListAbstract actionToTake = potentialActions[chosenActionIndex].Item1; commander.commands = CommandListAbstractToConcrete(actionToTake, commander); Debug.Log("Commanded " + commander.unitsCommanded[0].callsign + "To do Maneuver: " + actionToTake[0].ToString()); lastActionIndex = chosenActionIndex; lastState = gameState; if (isTraining) { roundCountThisEpisode++; } commander.SubmitCommands(); }
/// <summary> /// Must enumerate every possible action to take from every possible state, where 1 action is a set of commands given to units. FOR NOW IT ONLY COMMANDS ONE UNIT!!!! /// </summary> /// <param name="c"></param> void InitializePolicy() { Commander c = commandersToControl[0]; Debug.Log("Creating policy..."); float initialQValue = 0.8f; policy = new List <Tuple <CommandListAbstract, float> > [stateSpace.states.Count]; //for (int s = 0; s < stateSpace.Length; s++) for (int s = 0; s < stateSpace.states.Count; s++) { List <Tuple <CommandListAbstract, float> > commandLists = new List <Tuple <CommandListAbstract, float> >(); // available options for each plane List <Maneuver>[] availableManeuvers = new List <Maneuver> [c.unitsOwned.Count]; // enumerate possible maneuvers for each plane controlled for (int unitIdx = 0; unitIdx < c.unitsOwned.Count; unitIdx++) { var aircraft = stateSpace.GetAircraftAtState(s); var potentialManeuvers = gameController.GetAvailableManeuvers(aircraft); availableManeuvers[unitIdx] = potentialManeuvers; } // get all possible combinations of maneuvers for ONE plane for (int unitIdx = 0; unitIdx < c.unitsOwned.Count; unitIdx++) { if (c.unitsOwned.Count > 1) { Debug.LogError("Whoops, did not implement Policy creation for commanding multiple units yet..."); } var maneuvers = availableManeuvers[unitIdx]; for (int maneuverIdx = 0; maneuverIdx < maneuvers.Count; maneuverIdx++) { CommandListAbstract action = new CommandListAbstract(); action.Add(unitIdx, maneuvers[maneuverIdx].id); commandLists.Add(new Tuple <CommandListAbstract, float>(action, initialQValue)); } } policy[s] = commandLists; //Debug.Log("Policy creation progress: " + s + "/" + stateSpace.states.Count); } Debug.Log("Policy creation finished!"); }