// The hard-coded recommendation for new situations that aren't in the dictionary static StrategyType strategyRecommendationFromState(SimplifiedWorld state) { // Enemy has no ammo if (state.EnemyAmmoAmount == 0 || state.EnemyWeapon == WeaponType.None) { // Both the enemy and the player have no ammo if (state.AmmoAmount == 0 || state.Weapon == WeaponType.None) { return(StrategyType.GetAmmo); } // Player has ammo but enemy doesn't else { return(StrategyType.Attack); } } // Enemy has ammo else { // Enemy has ammo and the player doesn't if (state.AmmoAmount == 0 || state.Weapon == WeaponType.None) { // Player is vulnerable to enemies weapons. Positive Y closeness is player below if ((state.EnemyWeapon == WeaponType.Rockets && (state.YDistanceToEnemy == YCloseness.PosNear || state.YDistanceToEnemy == YCloseness.NegNear)) || (state.EnemyWeapon == WeaponType.Bombs && state.XDistanceToEnemy == XCloseness.Near) || (state.EnemyWeapon == WeaponType.Lightning && state.XDistanceToEnemy == XCloseness.Near)) { return(StrategyType.RunAway); } // Player is probably safe from enemy attack else { return(StrategyType.GetAmmo); } } // Both the player and the enemy have ammo else { // Player is vulnerable to enemies weapons if ((state.EnemyWeapon == WeaponType.Rockets && (state.YDistanceToEnemy == YCloseness.PosNear || state.YDistanceToEnemy == YCloseness.NegNear)) || (state.EnemyWeapon == WeaponType.Bombs && state.XDistanceToEnemy == XCloseness.Near) || (state.EnemyWeapon == WeaponType.Lightning && state.XDistanceToEnemy == XCloseness.Near)) { return(StrategyType.RunAway); } // Player is probably safe from enemy attack else { return(StrategyType.Attack); } } } }
// Updates the q value of a state-action tuple public void UpdateQValue(SimplifiedWorld state, StrategyType strategy, SimplifiedWorld nextState, float reward) { // Get the key float qValue = getQValue(state, strategy); string key = (new Key(state, strategy)).ToString(); // Update the QValue utilities[key] = qValue + Alpha * (reward + Discount * ComputeValueFromQValues(nextState) - qValue); }
// Returns a bool on whether or not input state is the same as this state public bool IsEquivalent(SimplifiedWorld comparisonState) { return(comparisonState.AmmoAmount == this.AmmoAmount && comparisonState.EnemyAmmoAmount == this.EnemyAmmoAmount && comparisonState.EnemyWeapon == this.EnemyWeapon && comparisonState.Weapon == this.Weapon && comparisonState.XDistanceToEnemy == this.XDistanceToEnemy && comparisonState.YDistanceToEnemy == this.YDistanceToEnemy); }
// Returns the q value of a state-action tuple public float getQValue(SimplifiedWorld state, StrategyType strategy) { string key = (new Key(state, strategy)).ToString(); if (utilities.ContainsKey(key)) { return(utilities [key]); } else { StrategyType recommended = strategyRecommendationFromState(state); return(recommended == strategy ? 1.0f : 0.0f); } }
public StrategyType GetStrategy(SimplifiedWorld state) { float random = UnityEngine.Random.Range(0.0f, 1.0f); // Take random strategy if (random <= Epsilon) { int numberOfStrategies = Enum.GetNames(typeof(StrategyType)).Length; int randomIndex = UnityEngine.Random.Range(0, numberOfStrategies); return(allPossibleStrategies[randomIndex]); } // Otherwise take best possible strategy else { return(ComputeStrategyFromQValues(state)); } }
// The reward function between States public static float Reward(SimplifiedWorld initState, StrategyType strategy, SimplifiedWorld resultState) { // If you've won, then you get huge reward float winningBonus = 0.0f; if (resultState.health > 0.0f && resultState.enemyHealth <= 0.0f) { winningBonus = WinningReward; } if (resultState.health > 0.0f && resultState.enemyHealth <= 0.0f) { winningBonus = -WinningReward; } // Hold on to your health return(resultState.health - resultState.enemyHealth + winningBonus); }
// Returns the utility value of a state public float ComputeValueFromQValues(SimplifiedWorld state) { float currentQValue = 0.0f; // Iterate through all possible strategies foreach (StrategyType strategy in allStrategies) { float qValue = getQValue(state, strategy); // If the utility for this strategy is higher than the current best, update if (qValue >= currentQValue) { currentQValue = qValue; } } return(currentQValue); }
// Returns the optimal action at a state public StrategyType ComputeStrategyFromQValues(SimplifiedWorld state) { StrategyType currentStrategy = StrategyType.Attack; float maxQValue = float.MinValue; // Iterate through all possible strategies foreach (StrategyType strategy in allStrategies) { float qValue = getQValue(state, strategy); // If the utility for this strategy is higher than the current best if (qValue > maxQValue) { maxQValue = qValue; currentStrategy = strategy; } } return(currentStrategy); }
static public Key FromString(string keyString) { Key key = new Key(); string[] propertyArray = keyString.Split(' '); int l = propertyArray.Length; string stateString = ""; for (int i = 0; i < l; i++) { if (i < l - 1) { stateString += propertyArray[i] + " "; } else { key.state = SimplifiedWorld.FromString(stateString); key.strategy = (StrategyType)Enum.Parse(typeof(StrategyType), propertyArray[i]); } } return(key); }
static public SimplifiedWorld FromString(string stateString) { SimplifiedWorld state = new SimplifiedWorld(); string[] propertyArray = stateString.Split(' '); for (int i = 0; i < propertyArray.Count(); i++) { if (i == 0) { state.Weapon = (WeaponType)Enum.Parse(typeof(WeaponType), propertyArray [i]); } else if (i == 1) { state.AmmoAmount = int.Parse(propertyArray [i]); } else if (i == 2) { state.EnemyWeapon = (WeaponType)Enum.Parse(typeof(WeaponType), propertyArray [i]); } else if (i == 3) { state.EnemyAmmoAmount = int.Parse(propertyArray [i]); } else if (i == 4) { state.XDistanceToEnemy = (XCloseness)Enum.Parse(typeof(XCloseness), propertyArray [i]); } else if (i == 5) { state.YDistanceToEnemy = (YCloseness)Enum.Parse(typeof(YCloseness), propertyArray [i]); } } return(state); }
// The center of the AI - get an action override public List <WorldAction> GetAction(World world) { // The immediate action comes from level 1 WorldAction bestAction = WorldAction.NoAction; // Update level 1 heuristic parameters World.Player player = playerNum == 1 ? world.Player1 : world.Player2; // Calculate new level 1 action if timer is up if (decisionTimer <= 0) { ActionWithFiller decision = level1Searcher.ComputeBestAction(world, fillerAction, strategy.NextPathIndex); bestAction = decision.Action; fillerAction = decision.FillerAction; decisionTimer = Level1StepSize; // Otherwise do the filler action } else { bestAction = fillerAction; // Check distance to path bool doneWithPath = false; if (strategy.SearchPath != null) { doneWithPath = strategy.NextPathIndex >= strategy.SearchPath.States.Count - 1; } // Calculate the path if this frame has been designated to it if (calculatePathNextFrame) { // Run A* Path path = level2Searcher.ComputeBestPath(blockWorld); // Must be set before using the level 1 heuristic with a path strategy.SearchPath = path; strategy.NextPathIndex = 0; calculatePathNextFrame = false; // If no path is able to be calculated, then check again sooner than normal if (path == null) { strategyTimer = NoPathFoundRefreshTimer; } } else { // Compute a new strategy if the old one is no longer valid SimplifiedWorld currentState = new SimplifiedWorld(world, playerNum); if (isFirstTime || !previousState.IsEquivalent(currentState) || doneWithPath || dangerZoneShifted(world) || playerLeftPath(world, strategy.SearchPath) || strategyTimer <= 0 || world.IsTerminal()) { if (isFirstTime) { previousState = currentState; } isFirstTime = false; // Get reward and update QValues if learning if (IsLearning) { float reward = SimplifiedWorld.Reward(previousState, strategy.Type, currentState); QLearner.UpdateQValue(previousState, strategy.Type, currentState, reward); // Don't learn once world is terminal if (world.IsTerminal()) { IsLearning = false; } } // Get a new strategy StrategyType newStrategy = QLearner.GetStrategy(currentState); #if STRATEGY_PRINT Debug.Log("Player " + playerNum.ToString() + " selects strategy: " + newStrategy.ToString()); #endif strategy = Strategy.StrategyWithType(playerNum, newStrategy); level1Searcher = new DiscreteAdversarialSearch(playerNum, strategy.Level1Heuristic, getFillerAction, getNewPathIndex, Level1StepSize, 4); level2Searcher = new AStar(Level2MaxNodesInPrioQueue, Level2MaxExpansions, strategy.Level2CostFunction, strategy.Level2GoalFunction, strategy.Level2HeuristicFunction); // Create block world and danger zone blockWorld = new BlockWorld(playerNum, world); // Recalc danger zone dangerZone = new DangerZone(opponentNum, world, blockWorld); // Must be set before using the level 2 reward, cost, and goal functions strategy.Level2DangerZone = dangerZone; // Calculate the path in the next frame calculatePathNextFrame = true; // Speeds up framerate after player has died if (world.IsTerminal()) { calculatePathNextFrame = false; } // Reset previous state previousState = currentState; strategyTimer = MaxStrategyTime; } } // Debug rendering of danger zone #if DANGER_RENDER dangerZone.Render(ResourceScript); dangerZone.RenderPlayerBeliefs(ResourceScript); #endif } // Advance path position strategy.NextPathIndex = getNewPathIndex(player, strategy.NextPathIndex); decisionTimer--; strategyTimer--; #if PATH_RENDER if (strategy.SearchPath != null) { strategy.SearchPath.Render(ResourceScript, strategy.NextPathIndex); } #endif // Return a single-valued list with the best action return(new List <WorldAction>() { bestAction }); }
public Key() { state = new SimplifiedWorld(); strategy = StrategyType.Attack; }
// Constructors public Key(SimplifiedWorld state, StrategyType strategy) { this.state = state; this.strategy = strategy; }
// Returns all possible states static public List <SimplifiedWorld> AllPossible() { // Create empty list List <SimplifiedWorld> stateList = new List <SimplifiedWorld>(); // Iterate through each property WeaponType[] weaponArray = new WeaponType[] { WeaponType.None, WeaponType.Bombs, WeaponType.Rockets, WeaponType.Minions, WeaponType.Lightning }; int[] ammoArray = new int[] { -1, 0, 1, 2, 3 }; XCloseness[] xClosenessArray = new XCloseness[] { XCloseness.Near, XCloseness.Medium, XCloseness.Far, XCloseness.WallBetween }; YCloseness[] yClosenessArray = new YCloseness[] { YCloseness.PosNear, YCloseness.PosMedium, YCloseness.PosFar, YCloseness.NegNear, YCloseness.NegMedium, YCloseness.NegFar, YCloseness.WallBetween }; // weapon foreach (WeaponType tempWeapon in weaponArray) { // ammoAmount foreach (int tempAmmoAmount in ammoArray) { // enemyWeapon foreach (WeaponType tempEnemyWeapon in weaponArray) { // enemyAmmoAmount foreach (int tempEnemyAmmoAmount in ammoArray) { // xDistanceToEnemy foreach (XCloseness tempXDistanceToEnemy in xClosenessArray) { // yDistanceToEnemy foreach (YCloseness tempYDistanceToEnemy in yClosenessArray) { // Create new state class with the above properties SimplifiedWorld newState = new SimplifiedWorld(); newState.Weapon = tempWeapon; newState.AmmoAmount = tempAmmoAmount; newState.EnemyWeapon = tempEnemyWeapon; newState.EnemyAmmoAmount = tempEnemyAmmoAmount; newState.XDistanceToEnemy = tempXDistanceToEnemy; newState.YDistanceToEnemy = tempYDistanceToEnemy; // Add newState to stateList stateList.Add(newState); } } } } } } return(stateList); }