protected virtual Reward Playout(WorldModel initialPlayoutState) { Action[] actions = initialPlayoutState.GetExecutableActions(); float score = 0; foreach (Action action in actions) { WorldModel worldModel = initialPlayoutState.GenerateChildWorldModel(); int depthCount = 0; while (!worldModel.IsTerminal()) { Action randomAction = actions[RandomGenerator.Next(actions.Length)]; randomAction.ApplyActionEffects(worldModel); depthCount++; } if (depthCount > MaxPlayoutDepthReached) { MaxPlayoutDepthReached = depthCount; } score += worldModel.GetScore(); } return(new Reward() { Value = score / actions.Length, PlayerID = initialPlayoutState.GetNextPlayer(), }); }
protected override Reward Playout(WorldModel initialPlayoutState) { WorldModel worldModel = initialPlayoutState.GenerateChildWorldModel(); int depthCount = 0; while (!worldModel.IsTerminal() && depthCount <= MaxPlayoutDepth) { Action[] actions = worldModel.GetExecutableActions(); Action biasedAction = actions.First(); foreach (Action action in actions) { if (action.GetHValue(worldModel) < biasedAction.GetHValue(worldModel)) { biasedAction = action; } } biasedAction.ApplyActionEffects(worldModel); depthCount++; } if (depthCount > MaxPlayoutDepthReached) { base.MaxPlayoutDepthReached = depthCount; } return(new Reward() { Value = GetWorldModelScore(worldModel), PlayerID = initialPlayoutState.GetNextPlayer(), }); }
protected virtual Reward Playout(WorldModel initialPlayoutState) { WorldModel childWorldModel = initialPlayoutState.GenerateChildWorldModel(); GOB.Action[] actions = childWorldModel.GetExecutableActions(); int DepthReached = 0; while (!childWorldModel.IsTerminal()) { if (actions.Length > 0) { int index = this.RandomGenerator.Next(actions.Length); GOB.Action a = actions[index]; //GOB.Action a = actions[6]; a.ApplyActionEffects(childWorldModel); childWorldModel.CalculateNextPlayer(); } DepthReached++; } if (DepthReached > this.MaxPlayoutDepthReached) { this.MaxPlayoutDepthReached = DepthReached; } Reward reward = new Reward { PlayerID = childWorldModel.GetNextPlayer(), Value = childWorldModel.GetScore() }; return(reward); }
public virtual Reward Playout(WorldModel initialPlayoutState) { GOB.Action action; GOB.Action[] actions; int random; WorldModel state = initialPlayoutState.GenerateChildWorldModel(); while (!state.IsTerminal()) { //should choose randomly actions = state.GetExecutableActions(); if (actions.Length == 0) { continue; } random = RandomGenerator.Next(0, actions.Length); action = actions [random]; action.ApplyActionEffects(state); } Reward r = new Reward(); r.Value = state.GetScore(); r.PlayerID = state.GetNextPlayer(); return(r); }
protected virtual Reward Playout(WorldModel initialPlayoutState) { GOB.Action action; WorldModel model = initialPlayoutState.GenerateChildWorldModel(); GOB.Action[] actions; Reward reward = new Reward(); while (!model.IsTerminal()) { actions = model.GetExecutableActions(); if (actions.Length == 0) { break; } action = actions[RandomGenerator.Next(0, actions.Length)]; action.ApplyActionEffects(model); model.CalculateNextPlayer(); } reward.PlayerID = model.GetNextPlayer(); reward.Value = model.GetScore(); return(reward); }
protected override Reward Playout(WorldModel initialPlayoutState) { /* * while s is nonterminal do * chose a from Actions(s) uniformly at random * s <- Result(s,a) * return reward for state s */ FutureStateWorldModel currentState = initialPlayoutState.GenerateChildWorldModel() as FutureStateWorldModel; GOB.Action randomAction; ActionHistory.Clear(); int currentDepth = 0; Pair <int, Action> par = new Pair <int, Action>(0, new Action("asdasdas"));//cria com lixo para depois ser substituido while (!currentState.IsTerminal()) { randomAction = currentState.getNextBiasRandomAction(this.RandomGenerator, currentState); randomAction.ApplyActionEffects(currentState); currentState.CalculateNextPlayer(); par.Left = currentState.GetNextPlayer(); par.Right = randomAction; ActionHistory.Add(par); currentDepth++; } if (currentDepth > this.MaxPlayoutDepthReached) { this.MaxPlayoutDepthReached = currentDepth; } return(new Reward() { PlayerID = currentState.GetNextPlayer(), Value = currentState.GetScore() }); }
protected override float RunPlayout(WorldModel initialPlayoutState) { GOB.Action nextAction; WorldModel currentState = initialPlayoutState; var currentPlayoutDepth = 0; while (!currentState.IsTerminal()) { var executableActions = currentState.GetExecutableActions(); //Bias: Choose among the 50% best var maxIndex = Convert.ToInt32(Math.Ceiling(executableActions.Count * 0.5)); var orderedExecutableActions = executableActions.OrderByDescending(x => this.Heuristic.H(currentState, x)).Take(maxIndex).ToList(); var index = this.RandomGenerator.Next(0, maxIndex); nextAction = executableActions[index]; currentState = currentState.GenerateChildWorldModel(nextAction); currentPlayoutDepth++; } if (currentPlayoutDepth > this.MaxPlayoutDepthReached) { this.MaxPlayoutDepthReached = currentPlayoutDepth; } //var currentPlayer = currentState.GetNextPlayer(); //var value = initialPlayoutState.GetNextPlayer() == currentPlayer ? score : -score; return(currentState.GetScore()); }
protected override Reward Playout(WorldModel initialPlayoutState) { GOB.Action[] actions = initialPlayoutState.GetExecutableActions(); int bestHvalue = int.MaxValue; int bestActionIndex = -1; WorldModel currentState = initialPlayoutState; while (!currentState.IsTerminal()) { for (int i = 0; i < actions.Length; i++) { GOB.Action action = actions[i]; int h = action.getHvalue(); if (h < bestHvalue) { bestActionIndex = i; bestHvalue = h; } } WorldModel childState = initialPlayoutState.GenerateChildWorldModel(); actions[bestActionIndex].ApplyActionEffects(childState); childState.CalculateNextPlayer(); currentState = childState; base.CurrentDepth++; } Reward r = new Reward(); r.Value = currentState.GetScore(); return(r); }
protected virtual Reward Playout(WorldModel initialPlayoutState) { GOB.Action action; GOB.Action[] actions; Reward reward = new Reward(); WorldModel current = initialPlayoutState; int random; actions = current.GetExecutableActions(); if (actions.Length == 0) { reward.PlayerID = current.GetNextPlayer(); reward.Value = 0; } while (!current.IsTerminal()) { current = current.GenerateChildWorldModel(); random = RandomGenerator.Next(0, actions.Length); action = actions[random]; action.ApplyActionEffects(current); current.CalculateNextPlayer(); } reward.PlayerID = current.GetNextPlayer(); reward.Value = current.GetScore(); return(reward); }
//Rave + Biased protected override Reward Playout(WorldModel initialPlayoutState) { //throw new NotImplementedException(); ActionHistory = new List <Pair <int, GOB.Action> >(); WorldModel childWorldModel = initialPlayoutState.GenerateChildWorldModel(); GOB.Action action; int playoutReach = 0; while (!childWorldModel.IsTerminal()) { //Select a random Action GOB.Action[] actions = childWorldModel.GetExecutableActions(); double[] actionIndexes = new double[actions.Length]; double heuristicValue = 0.0; double accumulatedHeuristicValue = 0.0; double randomIndex; int chosenActionIndex = 0; for (int i = 0; i < actions.Length; i++) { heuristicValue = actions[i].H(childWorldModel); accumulatedHeuristicValue += Math.Pow(Math.E, -heuristicValue); actionIndexes[i] = accumulatedHeuristicValue; } randomIndex = this.RandomGenerator.NextDouble() * accumulatedHeuristicValue; //Debug.Log("Acumulated: " + accumulatedHeuristicValue); for (int i = 0; i < actions.Length; i++) { if (randomIndex <= actionIndexes[i]) { chosenActionIndex = i; break; } } ActionHistory.Add(new Pair <int, GOB.Action>(childWorldModel.GetNextPlayer(), actions[chosenActionIndex])); actions[chosenActionIndex].ApplyActionEffects(childWorldModel); childWorldModel.CalculateNextPlayer(); playoutReach += 1; } if (playoutReach > MaxPlayoutDepthReached) { MaxPlayoutDepthReached = playoutReach; } Reward reward = new Reward { PlayerID = childWorldModel.GetNextPlayer(), Value = childWorldModel.GetScore() }; return(reward); }
private Reward Playout(WorldModel initialPlayoutState) { FutureStateWorldModel state = new FutureStateWorldModel(initialPlayoutState.GenerateChildWorldModel()); while (!state.IsTerminal()) { GOB.Action[] actions = state.GetExecutableActions(); actions[RandomGenerator.Next() % actions.Length].ApplyActionEffects(state); this.MaxPlayoutDepthReached++; } return(new Reward()); }
// Only worth running multiple playouts in case action is Sword Attack protected virtual WorldModel StochasticPlayout(Action action, WorldModel currState) { if (action.Name.Equals("SwordAttack") && this.MaxPlayouts > 0) { WorldModel[] tests = new WorldModel[this.MaxPlayouts]; for (int i = 0; i < this.MaxPlayouts; i++) { tests[i] = currState.GenerateChildWorldModel(); action.ApplyActionEffects(tests[i]); } currState = AverageState(tests, (SwordAttack)action); } else { currState = currState.GenerateChildWorldModel(); action.ApplyActionEffects(currState); } return(currState); }
protected override Reward Playout(WorldModel initialPlayoutState) { WorldModel state = initialPlayoutState.GenerateChildWorldModel(); while (!state.IsTerminal()) { getRandomAction(state).ApplyActionEffects(state); } Reward r = new Reward(); r.Value = r.GetRewardForNode(new MCTSNode(state)); return(r); }
protected override Reward Playout(WorldModel initialPlayoutState) { WorldModel childWorldModel = initialPlayoutState.GenerateChildWorldModel(); int DepthReached = 0; while (!childWorldModel.IsTerminal()) { GOB.Action[] actions = childWorldModel.GetExecutableActions(); double[] actionIndexes = new double[actions.Length]; double heuristicValue = 0.0; double accumulatedHeuristicValue = 0.0; double randomIndex; int chosenActionIndex = 0; for (int i = 0; i < actions.Length; i++) { heuristicValue = actions[i].H(childWorldModel); accumulatedHeuristicValue += Math.Pow(Math.E, -heuristicValue); actionIndexes[i] = accumulatedHeuristicValue; } randomIndex = this.RandomGenerator.NextDouble() * accumulatedHeuristicValue; for (int i = 0; i < actions.Length; i++) { if (randomIndex <= actionIndexes[i]) { chosenActionIndex = i; break; } } actions[chosenActionIndex].ApplyActionEffects(childWorldModel); childWorldModel.CalculateNextPlayer(); DepthReached++; } if (DepthReached > this.MaxPlayoutDepthReached) { this.MaxPlayoutDepthReached = DepthReached; } Reward reward = new Reward { PlayerID = this.InitialNode.PlayerID, Value = childWorldModel.GetScore() }; return(reward); }
private Reward Playout(WorldModel initialPlayoutState) { //TODO: implement FutureStateWorldModel state = (FutureStateWorldModel)initialPlayoutState.GenerateChildWorldModel(); while (!state.IsTerminal()) { //escolher entre MCTS normal e MCTS bias //ChooseRandom(state).ApplyActionEffects(state); ChooseBias(state).ApplyActionEffects(state); state.CalculateNextPlayer(); this.MaxPlayoutDepthReached++; } Reward reward = new Reward(); reward.Value = state.GetScore(); return(reward); }
protected virtual Reward Playout(WorldModel initialPlayoutState) { WorldModel currentState = initialPlayoutState; while (!currentState.IsTerminal()) { GOB.Action[] actions = currentState.GetExecutableActions(); if (actions.Length == 0) { continue; } int index = this.RandomGenerator.Next(0, actions.Length); GOB.Action action = actions[index]; currentState = currentState.GenerateChildWorldModel(); action.ApplyActionEffects(currentState); this.CurrentDepth++; } Reward reward = new Reward(); reward.Value = currentState.GetScore(); return(reward); }
public override Reward Playout(WorldModel initialPlayoutState) { GOB.Action action = null; GOB.Action[] actions; WorldModel state = initialPlayoutState.GenerateChildWorldModel(); while (!state.IsTerminal()) { //should choose randomly actions = state.GetExecutableActions(); float best = float.MinValue; foreach (var a in actions) { WorldModel w = state.GenerateChildWorldModel(); a.ApplyActionEffects(w); var heuristic = w.GetGoalValue("BeQuick") + 1 / w.GetGoalValue("GainXP") + w.GetGoalValue("Survive") + w.GetGoalValue("GetRich"); if (heuristic > best) { best = heuristic; action = a; } } action.ApplyActionEffects(state); } Reward r = new Reward(); r.Value = state.GetScore(); r.PlayerID = state.GetNextPlayer(); return(r); }
protected override Reward Playout(WorldModel initialPlayoutState) { ActionHistory = new List <Pair <int, GOB.Action> >(); WorldModel state = initialPlayoutState.GenerateChildWorldModel(); Action nextAction; while (!state.IsTerminal()) { Action[] actions = state.GetExecutableActions(); if (actions.Length > 0) { nextAction = actions[RandomGenerator.Next() % actions.Length]; ActionHistory.Add(new Pair <int, GOB.Action>(state.GetNextPlayer(), nextAction)); nextAction.ApplyActionEffects(state); state.CalculateNextPlayer(); } } Reward r = new Reward(); r.Value = state.GetScore(); return(r); }
protected virtual float RunPlayout(WorldModel currentState) { GOB.Action nextAction; var currentPlayoutDepth = 0; while (!currentState.IsTerminal()) { var executableActions = currentState.GetExecutableActions(); nextAction = executableActions[this.RandomGenerator.Next(0, executableActions.Count)]; currentState = currentState.GenerateChildWorldModel(nextAction); currentPlayoutDepth++; } if (currentPlayoutDepth > this.MaxPlayoutDepthReached) { this.MaxPlayoutDepthReached = currentPlayoutDepth; } //var value = initialPlayoutState.GetNextPlayer() == currentPlayer ? score : -score; return(currentState.GetScore()); }
protected virtual Reward Playout(WorldModel initialPlayoutState) { FutureStateWorldModel currentState = initialPlayoutState.GenerateChildWorldModel() as FutureStateWorldModel; GOB.Action randomAction; int currentDepth = 0; while (!currentState.IsTerminal()) { randomAction = currentState.getNextRandomAction(this.RandomGenerator); randomAction.ApplyActionEffects(currentState); currentState.CalculateNextPlayer(); currentDepth++; } if (currentDepth > this.MaxPlayoutDepthReached) { this.MaxPlayoutDepthReached = currentDepth; } return(new Reward() { PlayerID = currentState.GetNextPlayer(), Value = currentState.GetScore() }); }
protected override Reward Playout(WorldModel initialPlayoutState) { WorldModel model = initialPlayoutState.GenerateChildWorldModel(); List <GOB.Action> actions; List <GOB.Action> executableActions = new List <GOB.Action>(); GOB.Action nextAction = null; Reward reward = new Reward(); double heuristicValue; double accumulatedHeuristicValue; double bestValue, minValue; SortedDictionary <double, GOB.Action> heuristicList = new SortedDictionary <double, GOB.Action>(); actions = model.GetActions(); while (!model.IsTerminal()) { heuristicList.Clear(); executableActions.Clear(); heuristicValue = 0; accumulatedHeuristicValue = 0; bestValue = -1; minValue = float.MaxValue; if (actions.Count == 0) { break; } foreach (GOB.Action action in actions) { if (action.CanExecute(model)) { accumulatedHeuristicValue += Math.Pow(Math.E, action.H(model)); executableActions.Add(action); } } foreach (GOB.Action action in executableActions) { heuristicValue = Math.Pow(Math.E, action.H(model)) / accumulatedHeuristicValue; if (!heuristicList.ContainsKey(heuristicValue)) { heuristicList.Add(heuristicValue, action); } if (heuristicValue > bestValue) { bestValue = heuristicValue; } if (heuristicValue < minValue) { minValue = heuristicValue; } } double randomNumber = GetRandomNumber(minValue, bestValue); foreach (KeyValuePair <double, GOB.Action> actionHeuristic in heuristicList) { if (actionHeuristic.Key >= randomNumber) { nextAction = actionHeuristic.Value; break; } } if (nextAction == null) { break; } nextAction.ApplyActionEffects(model); model.CalculateNextPlayer(); } reward.PlayerID = model.GetNextPlayer(); reward.Value = model.GetScore(); return(reward); }
public float H(WorldModel state, Action action) { return(H(state.GenerateChildWorldModel(action))); }
protected override Reward Playout(WorldModel initialPlayoutState) { GOB.Action action; GOB.Action[] actions; Reward reward = new Reward(); WorldModel current = initialPlayoutState; double random; float h = 0; double accumulate = 0; float euclidean = 0; double softmax = 0; List <double> interval = new List <double>(); WalkToTargetAndExecuteAction wa; actions = current.GetExecutableActions(); if (actions.Length == 0) { reward.PlayerID = current.GetNextPlayer(); reward.Value = 0; } while (!current.IsTerminal()) { accumulate = 0; interval.Clear(); //if (actions.Length == 0) // break; foreach (var a in actions) { h = 0; var gameMan = this.CurrentStateWorldModel.GetGameManager(); var character = gameMan.characterData; wa = a as WalkToTargetAndExecuteAction; if (wa != null) { euclidean = (wa.Target.transform.position - wa.Character.transform.position).magnitude; if (euclidean <= 0) { euclidean = 1; } } if (a.Name.Contains("LevelUp")) //1000 { h = 1000; } if (a.Name.Contains("GetHealthPotion")) //0-25 { h = (character.MaxHP - character.HP) * 1.5f; } else if (a.Name.Contains("PickUpChest")) //5-25 { h = (character.Money + 5) * 3.5f; } else if (a.Name.Contains("FireballSkeleton") || a.Name.Contains("FireballOrc")) //0-25 { h = character.Mana * 30; } else if (a.Name.Contains("SwordAttackSkeleton")) { h = (character.HP - 5) * 2; } else if (a.Name.Contains("SwordAttackOrc")) { h = (character.HP - 10) * 2; } else if (a.Name.Contains("SwordAttackDragon")) { h = character.HP - 20; } if (h < 0) { h = 0; } h = h * 1000 / euclidean; accumulate += h; if (h > 0) { softmax += Math.Pow(Math.E, -h / accumulate); interval.Add(softmax); Debug.Log(softmax); } else { interval.Add(0); } } random = RandomGenerator.NextDouble() * softmax; for (int j = 0; j < interval.Count; j++) { if (random <= interval[j]) { action = actions[j]; current = current.GenerateChildWorldModel(); action.ApplyActionEffects(current); current.CalculateNextPlayer(); break; } if (j == interval.Count - 1) { current = current.GenerateChildWorldModel(); reward.Value = 0; reward.PlayerID = current.GetNextPlayer(); return(reward); } } } reward.PlayerID = current.GetNextPlayer(); reward.Value = current.GetScore(); return(reward); }