protected virtual MCTSNode Selection(MCTSNode initialNode) { GOB.Action nextAction = initialNode.State.GetNextAction(); MCTSNode currentNode = initialNode; int currentDepth = 0; while (!currentNode.State.IsTerminal()) { if (nextAction != null) { return(Expand(currentNode, nextAction)); } else { currentNode = BestUCTChild(currentNode); nextAction = currentNode.State.GetNextAction(); } currentDepth++; } if (currentDepth > this.MaxSelectionDepthReached) { this.MaxSelectionDepthReached = currentDepth; } return(currentNode); }
protected virtual Reward Playout(WorldModel initialPlayoutState) { WorldModel childWorldModel = initialPlayoutState.GenerateChildWorldModel(); GOB.Action[] actions = childWorldModel.GetExecutableActions(); int DepthReached = 0; while (!childWorldModel.IsTerminal()) { if (actions.Length > 0) { int index = this.RandomGenerator.Next(actions.Length); GOB.Action a = actions[index]; //GOB.Action a = actions[6]; a.ApplyActionEffects(childWorldModel); childWorldModel.CalculateNextPlayer(); } DepthReached++; } if (DepthReached > this.MaxPlayoutDepthReached) { this.MaxPlayoutDepthReached = DepthReached; } Reward reward = new Reward { PlayerID = childWorldModel.GetNextPlayer(), Value = childWorldModel.GetScore() }; return(reward); }
private MCTSNode Selection(MCTSNode initialNode) { MCTSNode currentNode = initialNode; GOB.Action nextAction = currentNode.State.GetNextAction(); while (!currentNode.State.IsTerminal()) { if (nextAction != null) { return(Expand(currentNode, nextAction)); } else { MCTSNode newNode = BestUCTChild(currentNode); if (newNode != null) { currentNode = newNode; } else { return(currentNode); } } } return(currentNode); }
protected override Reward Playout(WorldModel initialPlayoutState) { GOB.Action[] actions = initialPlayoutState.GetExecutableActions(); int bestHvalue = int.MaxValue; int bestActionIndex = -1; WorldModel currentState = initialPlayoutState; while (!currentState.IsTerminal()) { for (int i = 0; i < actions.Length; i++) { GOB.Action action = actions[i]; int h = action.getHvalue(); if (h < bestHvalue) { bestActionIndex = i; bestHvalue = h; } } WorldModel childState = initialPlayoutState.GenerateChildWorldModel(); actions[bestActionIndex].ApplyActionEffects(childState); childState.CalculateNextPlayer(); currentState = childState; base.CurrentDepth++; } Reward r = new Reward(); r.Value = currentState.GetScore(); return(r); }
public GOB.Action Run() { //TODO: Execute MCTS PlayoutIterations times var startTime = Time.realtimeSinceStartup; this.CurrentIterationsInFrame = 0; int MaxIterations = this.MaxIterationsProcessedPerFrame / this.PlayoutIterations; MCTSNode selectedNode = new MCTSNode(this.CurrentStateWorldModel.GenerateChildWorldModel()); List <float> results = new List <float>(); int i = 0; for (i = 0; i < this.PlayoutIterations; i++) { Reward reward; while (this.CurrentIterationsInFrame < MaxIterations) { MCTSNode newNode = Selection(selectedNode); reward = Playout(newNode.State); Backpropagate(newNode, reward); this.CurrentIterationsInFrame++; } for (int j = 0; j < selectedNode.ChildNodes.Count; j++) { if (results.Count <= j) { results.Add(selectedNode.ChildNodes[j].Q / selectedNode.ChildNodes[j].N); } else { results[j] += selectedNode.ChildNodes[j].Q / selectedNode.ChildNodes[j].N; } if (i == this.PlayoutIterations - 1) { results[j] /= this.PlayoutIterations; } } } this.TotalProcessingTime += Time.realtimeSinceStartup - startTime; this.InProgress = false; //MCTSNode best = BestChild(selectedNode); MCTSNode best = BestAverageChild(selectedNode, results); BestActionSequence.Clear(); GOB.Action bestAction = best != null ? best.Action : null; if (bestAction != null) { BestActionSequence.Add(bestAction); } return(bestAction); }
float Heuristic(WorldModel state, GOB.Action action) { if (action.Name.Contains("LevelUp")) { return(1f); } if (action.Name.Contains("DivineWrath")) { return(1f); } if (action.Name.Contains("DivineSmite")) { return(0.95f); } if (action.Name.Contains("ShieldOfFaith")) { return(0.9f); } int money = (int)state.GetProperty(Properties.MONEY); int mana = (int)state.GetProperty(Properties.MANA); int HP = (int)state.GetProperty(Properties.HP); int MaxHP = (int)state.GetProperty(Properties.MAXHP); float time = (float)state.GetProperty(Properties.TIME); float moneyScore = (float)money / 25f; float manaScore = (float)mana / 10f; float hpScore = (float)HP / (float)MaxHP; float timeScore = time / 200f; if (hpScore < 0.5f) { if (action.Name.Contains("LayOnHands")) { return(1f); } if (action.Name.Contains("GetHealthPotion")) { return(0.7f + 0.3f / (action.GetDuration() + 1f)); } if (action.Name.Contains("SwordAttack")) { return(0.01f); } } if (manaScore < 0.5f) { if (action.Name.Contains("GetManaPotion")) { return(0.7f + 0.3f / (action.GetDuration() + 1f)); } } return(timeScore); }
private MCTSNode Expand(MCTSNode parent, GOB.Action action) { MCTSNode childNode = new MCTSNode(parent.State); childNode.Parent = parent; childNode.Action = action; action.ApplyActionEffects(childNode.State); parent.ChildNodes.Add(childNode); return(childNode); }
private MCTSNode Expand(MCTSNode parent, GOB.Action action) { var childState = parent.State.GenerateChildWorldModel(action); var childNode = new MCTSNode(childState) { Parent = parent, Action = action, PlayerID = childState.GetNextPlayer() }; parent.ChildNodes.Add(childNode); return(childNode); }
private MCTSNode Expand(MCTSNode parent, GOB.Action action) { WorldModel state = parent.State.GenerateChildWorldModel(); MCTSNode child = new MCTSNode(state); child.Parent = parent; action.ApplyActionEffects(state); child.State.CalculateNextPlayer(); child.Action = action; parent.ChildNodes.Add(child); return(child); }
virtual protected MCTSNode Expand(MCTSNode parent, GOB.Action action) { FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)parent.State); action.ApplyActionEffects(newState); newState.CalculateNextPlayer(); MCTSNode newNode = new MCTSNode(newState); newNode.Parent = parent; newNode.Q = 0; newNode.N = 0; newNode.Action = action; parent.ChildNodes.Add(newNode); return(newNode); }
protected virtual MCTSNode Expand(MCTSNode parent, GOB.Action action) { WorldModel currentState = parent.State.GenerateChildWorldModel(); action.ApplyActionEffects(currentState); MCTSNode newChild = new MCTSNode(currentState) { Parent = parent, Action = action }; parent.ChildNodes.Add(newChild); return(newChild); }
private MCTSNode Expand(MCTSNode parent, GOB.Action action) { WorldModel worldmodel = CurrentStateWorldModel.GenerateChildWorldModel(); action.ApplyActionEffects(worldmodel); MCTSNode n = new MCTSNode(worldmodel) { Action = action, Parent = parent, N = 0, Q = 0 }; parent.ChildNodes.Add(n); return(n); }
protected virtual MCTSNode Expand(MCTSNode parent, GOB.Action action) { FutureStateWorldModel newModel = parent.State.GenerateChildWorldModel() as FutureStateWorldModel; action.ApplyActionEffects(newModel); newModel.CalculateNextPlayer(); MCTSNode childNode = new MCTSNode(newModel); childNode.Action = action; childNode.Parent = parent; parent.ChildNodes.Add(childNode); return(childNode); }
protected override Reward Playout(WorldModel initialPlayoutState) { FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)initialPlayoutState); Reward reward = new Reward(); int numberOfIterations = 0; while (!newState.IsTerminal() && (DEPTH_LIMIT <= 0 || !(numberOfIterations >= DEPTH_LIMIT))) { GOB.Action[] possibleActions = newState.GetExecutableActions(); List <double> results = new List <double>(); float chosenScore = 0f; int i; for (i = 0; i < possibleActions.Length; i++) { //results.Add(Heuristic(newState, possibleActions[i])); results.Add(possibleActions[i].GetUtility()); } GOB.Action bestAction = null; List <double> exponentialResults = results.Select(Math.Exp).ToList(); double sumExponentials = exponentialResults.Sum(); List <double> softmax = exponentialResults.Select(j => j / sumExponentials).ToList(); double prob = this.RandomGenerator.NextDouble(); double probabilitySum = 0; for (i = 0; i < possibleActions.Length; i++) { probabilitySum += softmax[i]; if (probabilitySum >= prob) { bestAction = possibleActions[i]; chosenScore = (float)softmax[i]; break; } } bestAction.ApplyActionEffects(newState); newState.CalculateNextPlayer(); reward.Value = chosenScore; reward.PlayerID = 0; if (DEPTH_LIMIT > 0) { numberOfIterations++; } } return(reward); }
private MCTSNode Expand(MCTSNode parent, GOB.Action action) { WorldModel state = parent.State.GenerateChildWorldModel(); MCTSNode expand = new MCTSNode(state); expand.Parent = parent; action.ApplyActionEffects(state); expand.Action = action; state.CalculateNextPlayer(); expand.PlayerID = state.GetNextPlayer(); parent.ChildNodes.Add(expand); expand.N = 0; expand.Q = 0; return(expand); }
private MCTSNode Expand(MCTSNode parent, GOB.Action action) { var childWorldModel = parent.State.GenerateChildWorldModel(); action.ApplyActionEffects(childWorldModel); childWorldModel.CalculateNextPlayer(); var childNode = new MCTSNode(childWorldModel) { Action = action, PlayerID = parent.PlayerID, Parent = parent }; parent.ChildNodes.Add(childNode); return(childNode); }
protected override MCTSNode Expand(MCTSNode parent, GOB.Action action) { WorldModel currentState = parent.State.GenerateChildWorldModel(); action.ApplyActionEffects(currentState); currentState.CalculateNextPlayer(); MCTSNode newChild = new MCTSNode(currentState) { Parent = parent, Action = action }; parent.ChildNodes.Add(newChild); return(newChild); }
virtual protected Reward Playout(WorldModel initialPlayoutState) { FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)initialPlayoutState); Reward reward = new Reward(); while (!newState.IsTerminal()) { GOB.Action[] possibleActions = newState.GetExecutableActions(); int actionIndex = this.RandomGenerator.Next(0, possibleActions.Length); GOB.Action chosenAction = possibleActions[actionIndex]; chosenAction.ApplyActionEffects(newState); newState.CalculateNextPlayer(); reward.Value = newState.GetScore(); reward.PlayerID = 0; } return(reward); }
private bool ChestDead(FutureStateWorldModel state, GOB.Action action, string enemyName, string chestName) { //bool cond1 = !(bool)state.GetProperty(enemyName); bool cond1 = GameObject.Find(enemyName) == null; bool cond2 = GameObject.Find(chestName) != null; //bool cond2 = (bool)state.GetProperty(chestName); bool cond3 = action is PickUpChest; bool cond4 = action is PickUpChest && ((PickUpChest)action).Target.name.Equals(chestName); /* * if (cond3) * cond4 = action is PickUpChest && ((PickUpChest)action).Target.tag.Equals(chestName); * else * cond4 = false; */ return(cond1 && cond2 && cond3 && cond4); //return !(bool)state.GetProperty(enemyName) && (bool)state.GetProperty(chestName) && action is PickUpChest && ((PickUpChest)action).Target.tag.Equals(chestName); }
private MCTSNode Expand(MCTSNode parent, GOB.Action action) { //TODO: implement //WorldModel worldmodel = CurrentStateWorldModel.GenerateChildWorldModel(); WorldModel worldmodel = parent.State.GenerateChildWorldModel(); action.ApplyActionEffects(worldmodel); worldmodel.CalculateNextPlayer(); MCTSNode n = new MCTSNode(worldmodel) { Action = action, Parent = parent, N = 0, Q = 0 }; parent.ChildNodes.Add(n); return(n); }
protected virtual Reward Playout(WorldModel initialPlayoutState) { WorldModel currentState = initialPlayoutState; while (!currentState.IsTerminal()) { GOB.Action[] actions = currentState.GetExecutableActions(); if (actions.Length == 0) { continue; } int index = this.RandomGenerator.Next(0, actions.Length); GOB.Action action = actions[index]; currentState = currentState.GenerateChildWorldModel(); action.ApplyActionEffects(currentState); this.CurrentDepth++; } Reward reward = new Reward(); reward.Value = currentState.GetScore(); return(reward); }
public override Reward Playout(WorldModel initialPlayoutState) { GOB.Action action = null; GOB.Action[] actions; WorldModel state = initialPlayoutState.GenerateChildWorldModel(); while (!state.IsTerminal()) { //should choose randomly actions = state.GetExecutableActions(); float best = float.MinValue; foreach (var a in actions) { WorldModel w = state.GenerateChildWorldModel(); a.ApplyActionEffects(w); var heuristic = w.GetGoalValue("BeQuick") + 1 / w.GetGoalValue("GainXP") + w.GetGoalValue("Survive") + w.GetGoalValue("GetRich"); if (heuristic > best) { best = heuristic; action = a; } } action.ApplyActionEffects(state); } Reward r = new Reward(); r.Value = state.GetScore(); r.PlayerID = state.GetNextPlayer(); return(r); }
public ActionRAVE(GOB.Action action) { this.action = action; this.Q = 0; this.Plays = 0; }
protected MCTSNode Expand(WorldModel parentState, GOB.Action action) { //TODO: implement throw new NotImplementedException(); }
protected override Reward Playout(WorldModel initialPlayoutState) { WorldModel model = initialPlayoutState.GenerateChildWorldModel(); List <GOB.Action> actions; List <GOB.Action> executableActions = new List <GOB.Action>(); GOB.Action nextAction = null; Reward reward = new Reward(); double heuristicValue; double accumulatedHeuristicValue; double bestValue, minValue; SortedDictionary <double, GOB.Action> heuristicList = new SortedDictionary <double, GOB.Action>(); actions = model.GetActions(); while (!model.IsTerminal()) { heuristicList.Clear(); executableActions.Clear(); heuristicValue = 0; accumulatedHeuristicValue = 0; bestValue = -1; minValue = float.MaxValue; if (actions.Count == 0) { break; } foreach (GOB.Action action in actions) { if (action.CanExecute(model)) { accumulatedHeuristicValue += Math.Pow(Math.E, action.H(model)); executableActions.Add(action); } } foreach (GOB.Action action in executableActions) { heuristicValue = Math.Pow(Math.E, action.H(model)) / accumulatedHeuristicValue; if (!heuristicList.ContainsKey(heuristicValue)) { heuristicList.Add(heuristicValue, action); } if (heuristicValue > bestValue) { bestValue = heuristicValue; } if (heuristicValue < minValue) { minValue = heuristicValue; } } double randomNumber = GetRandomNumber(minValue, bestValue); foreach (KeyValuePair <double, GOB.Action> actionHeuristic in heuristicList) { if (actionHeuristic.Key >= randomNumber) { nextAction = actionHeuristic.Value; break; } } if (nextAction == null) { break; } nextAction.ApplyActionEffects(model); model.CalculateNextPlayer(); } reward.PlayerID = model.GetNextPlayer(); reward.Value = model.GetScore(); return(reward); }