protected override Reward Playout(WorldModel initialPlayoutState) { /* * while s is nonterminal do * chose a from Actions(s) uniformly at random * s <- Result(s,a) * return reward for state s */ FutureStateWorldModel currentState = initialPlayoutState.GenerateChildWorldModel() as FutureStateWorldModel; GOB.Action randomAction; ActionHistory.Clear(); int currentDepth = 0; Pair <int, Action> par = new Pair <int, Action>(0, new Action("asdasdas"));//cria com lixo para depois ser substituido while (!currentState.IsTerminal()) { randomAction = currentState.getNextBiasRandomAction(this.RandomGenerator, currentState); randomAction.ApplyActionEffects(currentState); currentState.CalculateNextPlayer(); par.Left = currentState.GetNextPlayer(); par.Right = randomAction; ActionHistory.Add(par); currentDepth++; } if (currentDepth > this.MaxPlayoutDepthReached) { this.MaxPlayoutDepthReached = currentDepth; } return(new Reward() { PlayerID = currentState.GetNextPlayer(), Value = currentState.GetScore() }); }
protected override Reward Playout(WorldModel initialPlayoutState) { //WorldModel newState = initialPlayoutState.GenerateChildWorldModel(); FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)initialPlayoutState); Reward reward = new Reward(); int numberOfIterations = 0; while (!newState.IsTerminal() && numberOfIterations <= DEPTH_LIMIT) { Action[] possibleActions = newState.GetExecutableActions(); List <float> heuristics = new List <float>(); for (int i = 0; i < possibleActions.Length; i++) { heuristics.Add(possibleActions[i].GetHValue(newState)); } int index = chooseAction(possibleActions, heuristics); Action bestAction = possibleActions[index]; bestAction.ApplyActionEffects(newState); newState.CalculateNextPlayer(); reward.PlayerID = newState.GetNextPlayer(); reward.Value = heuristics[index]; if (DEPTH_LIMIT > 0) { numberOfIterations++; } } return(reward); }
protected virtual Reward Playout(WorldModel initialPlayoutState) { FutureStateWorldModel state = (FutureStateWorldModel)initialPlayoutState.GenerateChildWorldModel(); while (!state.IsTerminal()) { GOB.Action[] actions = state.GetExecutableActions(); actions[RandomGenerator.Next() % actions.Length].ApplyActionEffects(state); state.CalculateNextPlayer(); this.MaxPlayoutDepthReached++; } return(new Reward()); }
virtual protected MCTSNode Expand(MCTSNode parent, GOB.Action action) { FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)parent.State); action.ApplyActionEffects(newState); newState.CalculateNextPlayer(); MCTSNode newNode = new MCTSNode(newState); newNode.Parent = parent; newNode.Q = 0; newNode.N = 0; newNode.Action = action; parent.ChildNodes.Add(newNode); return(newNode); }
protected virtual MCTSNode Expand(MCTSNode parent, GOB.Action action) { FutureStateWorldModel newModel = parent.State.GenerateChildWorldModel() as FutureStateWorldModel; action.ApplyActionEffects(newModel); newModel.CalculateNextPlayer(); MCTSNode childNode = new MCTSNode(newModel); childNode.Action = action; childNode.Parent = parent; parent.ChildNodes.Add(childNode); return(childNode); }
protected override Reward Playout(WorldModel initialPlayoutState) { FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)initialPlayoutState); Reward reward = new Reward(); int numberOfIterations = 0; while (!newState.IsTerminal() && (DEPTH_LIMIT <= 0 || !(numberOfIterations >= DEPTH_LIMIT))) { GOB.Action[] possibleActions = newState.GetExecutableActions(); List <double> results = new List <double>(); float chosenScore = 0f; int i; for (i = 0; i < possibleActions.Length; i++) { //results.Add(Heuristic(newState, possibleActions[i])); results.Add(possibleActions[i].GetUtility()); } GOB.Action bestAction = null; List <double> exponentialResults = results.Select(Math.Exp).ToList(); double sumExponentials = exponentialResults.Sum(); List <double> softmax = exponentialResults.Select(j => j / sumExponentials).ToList(); double prob = this.RandomGenerator.NextDouble(); double probabilitySum = 0; for (i = 0; i < possibleActions.Length; i++) { probabilitySum += softmax[i]; if (probabilitySum >= prob) { bestAction = possibleActions[i]; chosenScore = (float)softmax[i]; break; } } bestAction.ApplyActionEffects(newState); newState.CalculateNextPlayer(); reward.Value = chosenScore; reward.PlayerID = 0; if (DEPTH_LIMIT > 0) { numberOfIterations++; } } return(reward); }
virtual protected Reward Playout(WorldModel initialPlayoutState) { FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)initialPlayoutState); Reward reward = new Reward(); while (!newState.IsTerminal()) { GOB.Action[] possibleActions = newState.GetExecutableActions(); int actionIndex = this.RandomGenerator.Next(0, possibleActions.Length); GOB.Action chosenAction = possibleActions[actionIndex]; chosenAction.ApplyActionEffects(newState); newState.CalculateNextPlayer(); reward.Value = newState.GetScore(); reward.PlayerID = 0; } return(reward); }
private Reward Playout(WorldModel initialPlayoutState) { //TODO: implement FutureStateWorldModel state = (FutureStateWorldModel)initialPlayoutState.GenerateChildWorldModel(); while (!state.IsTerminal()) { //escolher entre MCTS normal e MCTS bias //ChooseRandom(state).ApplyActionEffects(state); ChooseBias(state).ApplyActionEffects(state); state.CalculateNextPlayer(); this.MaxPlayoutDepthReached++; } Reward reward = new Reward(); reward.Value = state.GetScore(); return(reward); }
protected virtual Reward Playout(WorldModel initialPlayoutState) { FutureStateWorldModel currentState = initialPlayoutState.GenerateChildWorldModel() as FutureStateWorldModel; GOB.Action randomAction; int currentDepth = 0; while (!currentState.IsTerminal()) { randomAction = currentState.getNextRandomAction(this.RandomGenerator); randomAction.ApplyActionEffects(currentState); currentState.CalculateNextPlayer(); currentDepth++; } if (currentDepth > this.MaxPlayoutDepthReached) { this.MaxPlayoutDepthReached = currentDepth; } return(new Reward() { PlayerID = currentState.GetNextPlayer(), Value = currentState.GetScore() }); }
/* * private bool ChestReallyDead(FutureStateWorldModel state, GOB.Action action, string enemyName, string chestName) * { * bool cond1 = !(bool)state.GetProperty(enemyName); * bool cond1b = GameObject.Find(enemyName) * bool cond2 = (bool)state.GetProperty(chestName); * bool cond3 = action is PickUpChest; * bool cond4 = action is PickUpChest && ((PickUpChest)action).Target.tag.Equals(chestName); * /* * if (cond3) * cond4 = action is PickUpChest && ((PickUpChest)action).Target.tag.Equals(chestName); * else * cond4 = false; */ //return cond1 && cond2 && cond3 && cond4; //return !(bool)state.GetProperty(enemyName) && (bool)state.GetProperty(chestName) && action is PickUpChest && ((PickUpChest)action).Target.tag.Equals(chestName); //} private GOB.Action ChooseBias(FutureStateWorldModel state) { GOB.Action[] actions = state.GetExecutableActions(); int[] features = new int[2]; int size = features.Length; double H = 0; double[] exp = new double[actions.Length]; //array com as exponenciais ja calculadas double[] P = new double[actions.Length]; //array com as probabilidades ja calculadas para escolher a melhor for (int j = 0; j < actions.Length; j++) { float h = 0; if (actions[j] is SwordAttack && (int)state.GetProperty(Properties.HP) + ((SwordAttack)actions[j]).hpChange <= 0) { //actions = actions.Where(val => val != action).ToArray(); //para a nao optimizacao exp[j] = 0; continue; //do for, para passa a proxima accao } if (ChestDead(state, actions[j], "Skeleton1", "Chest1") || ChestDead(state, actions[j], "Skeleton2", "Chest4") || ChestDead(state, actions[j], "Orc1", "Chest3") || ChestDead(state, actions[j], "Orc2", "Chest2") || ChestDead(state, actions[j], "Dragon", "Chest5")) { h = 91; exp[j] = Mathf.Exp(h); H += Mathf.Exp(h); continue; //do for, para passa a proxima accao } else { FutureStateWorldModel possibleState = (FutureStateWorldModel)state.GenerateChildWorldModel(); actions[j].ApplyActionEffects(possibleState); possibleState.CalculateNextPlayer(); features[WMoney] = (int)possibleState.GetProperty(Properties.MONEY); //features[WTime] = (int) (float) possibleState.GetProperty(Properties.TIME); features[WXP] = (int)possibleState.GetProperty(Properties.XP); //features[WLevel] = (int)possibleState.GetProperty(Properties.LEVEL); for (int i = 0; i < size; i++) { h += features[i] * weights[i]; //cada peso para uma accao } exp[j] = Mathf.Exp(h); //queremos guardar logo a exponencial para nao ter de calcular outra vez H += Mathf.Exp(h); } } if (H == 0) { return(actions[0]); } else { P[0] = exp[0] / H; //o primeiro nao acumula for (int j = 1; j < actions.Length; j++) { P[j] = P[j - 1] + exp[j] / H; //para ser cumulativo } double rand = RandomGenerator.NextDouble(); //prob maior mais pequena que o random return(actions[Array.FindIndex(P, val => val >= rand)]); } }