protected override Reward Playout(WorldModel initialPlayoutState) { //WorldModel newState = initialPlayoutState.GenerateChildWorldModel(); FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)initialPlayoutState); Reward reward = new Reward(); int numberOfIterations = 0; while (!newState.IsTerminal() && numberOfIterations <= DEPTH_LIMIT) { Action[] possibleActions = newState.GetExecutableActions(); List <float> heuristics = new List <float>(); for (int i = 0; i < possibleActions.Length; i++) { heuristics.Add(possibleActions[i].GetHValue(newState)); } int index = chooseAction(possibleActions, heuristics); Action bestAction = possibleActions[index]; bestAction.ApplyActionEffects(newState); newState.CalculateNextPlayer(); reward.PlayerID = newState.GetNextPlayer(); reward.Value = heuristics[index]; if (DEPTH_LIMIT > 0) { numberOfIterations++; } } return(reward); }
private Reward Playout(WorldModel initialPlayoutState) { FutureStateWorldModel state = new FutureStateWorldModel(initialPlayoutState.GenerateChildWorldModel()); while (!state.IsTerminal()) { GOB.Action[] actions = state.GetExecutableActions(); actions[RandomGenerator.Next() % actions.Length].ApplyActionEffects(state); this.MaxPlayoutDepthReached++; } return(new Reward()); }
protected override Reward Playout(WorldModel initialPlayoutState) { FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)initialPlayoutState); Reward reward = new Reward(); int numberOfIterations = 0; while (!newState.IsTerminal() && (DEPTH_LIMIT <= 0 || !(numberOfIterations >= DEPTH_LIMIT))) { GOB.Action[] possibleActions = newState.GetExecutableActions(); List <double> results = new List <double>(); float chosenScore = 0f; int i; for (i = 0; i < possibleActions.Length; i++) { //results.Add(Heuristic(newState, possibleActions[i])); results.Add(possibleActions[i].GetUtility()); } GOB.Action bestAction = null; List <double> exponentialResults = results.Select(Math.Exp).ToList(); double sumExponentials = exponentialResults.Sum(); List <double> softmax = exponentialResults.Select(j => j / sumExponentials).ToList(); double prob = this.RandomGenerator.NextDouble(); double probabilitySum = 0; for (i = 0; i < possibleActions.Length; i++) { probabilitySum += softmax[i]; if (probabilitySum >= prob) { bestAction = possibleActions[i]; chosenScore = (float)softmax[i]; break; } } bestAction.ApplyActionEffects(newState); newState.CalculateNextPlayer(); reward.Value = chosenScore; reward.PlayerID = 0; if (DEPTH_LIMIT > 0) { numberOfIterations++; } } return(reward); }
virtual protected Reward Playout(WorldModel initialPlayoutState) { FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)initialPlayoutState); Reward reward = new Reward(); while (!newState.IsTerminal()) { GOB.Action[] possibleActions = newState.GetExecutableActions(); int actionIndex = this.RandomGenerator.Next(0, possibleActions.Length); GOB.Action chosenAction = possibleActions[actionIndex]; chosenAction.ApplyActionEffects(newState); newState.CalculateNextPlayer(); reward.Value = newState.GetScore(); reward.PlayerID = 0; } return(reward); }
/* * private bool ChestReallyDead(FutureStateWorldModel state, GOB.Action action, string enemyName, string chestName) * { * bool cond1 = !(bool)state.GetProperty(enemyName); * bool cond1b = GameObject.Find(enemyName) * bool cond2 = (bool)state.GetProperty(chestName); * bool cond3 = action is PickUpChest; * bool cond4 = action is PickUpChest && ((PickUpChest)action).Target.tag.Equals(chestName); * /* * if (cond3) * cond4 = action is PickUpChest && ((PickUpChest)action).Target.tag.Equals(chestName); * else * cond4 = false; */ //return cond1 && cond2 && cond3 && cond4; //return !(bool)state.GetProperty(enemyName) && (bool)state.GetProperty(chestName) && action is PickUpChest && ((PickUpChest)action).Target.tag.Equals(chestName); //} private GOB.Action ChooseBias(FutureStateWorldModel state) { GOB.Action[] actions = state.GetExecutableActions(); int[] features = new int[2]; int size = features.Length; double H = 0; double[] exp = new double[actions.Length]; //array com as exponenciais ja calculadas double[] P = new double[actions.Length]; //array com as probabilidades ja calculadas para escolher a melhor for (int j = 0; j < actions.Length; j++) { float h = 0; if (actions[j] is SwordAttack && (int)state.GetProperty(Properties.HP) + ((SwordAttack)actions[j]).hpChange <= 0) { //actions = actions.Where(val => val != action).ToArray(); //para a nao optimizacao exp[j] = 0; continue; //do for, para passa a proxima accao } if (ChestDead(state, actions[j], "Skeleton1", "Chest1") || ChestDead(state, actions[j], "Skeleton2", "Chest4") || ChestDead(state, actions[j], "Orc1", "Chest3") || ChestDead(state, actions[j], "Orc2", "Chest2") || ChestDead(state, actions[j], "Dragon", "Chest5")) { h = 91; exp[j] = Mathf.Exp(h); H += Mathf.Exp(h); continue; //do for, para passa a proxima accao } else { FutureStateWorldModel possibleState = (FutureStateWorldModel)state.GenerateChildWorldModel(); actions[j].ApplyActionEffects(possibleState); possibleState.CalculateNextPlayer(); features[WMoney] = (int)possibleState.GetProperty(Properties.MONEY); //features[WTime] = (int) (float) possibleState.GetProperty(Properties.TIME); features[WXP] = (int)possibleState.GetProperty(Properties.XP); //features[WLevel] = (int)possibleState.GetProperty(Properties.LEVEL); for (int i = 0; i < size; i++) { h += features[i] * weights[i]; //cada peso para uma accao } exp[j] = Mathf.Exp(h); //queremos guardar logo a exponencial para nao ter de calcular outra vez H += Mathf.Exp(h); } } if (H == 0) { return(actions[0]); } else { P[0] = exp[0] / H; //o primeiro nao acumula for (int j = 1; j < actions.Length; j++) { P[j] = P[j - 1] + exp[j] / H; //para ser cumulativo } double rand = RandomGenerator.NextDouble(); //prob maior mais pequena que o random return(actions[Array.FindIndex(P, val => val >= rand)]); } }
private GOB.Action ChooseRandom(FutureStateWorldModel state) { GOB.Action[] actions = state.GetExecutableActions(); return(actions[RandomGenerator.Next() % actions.Length]); }