protected virtual Reward Playout(WorldModel initialPlayoutState) { WorldModel childWorldModel = initialPlayoutState.GenerateChildWorldModel(); GOB.Action[] actions = childWorldModel.GetExecutableActions(); int DepthReached = 0; while (!childWorldModel.IsTerminal()) { if (actions.Length > 0) { int index = this.RandomGenerator.Next(actions.Length); GOB.Action a = actions[index]; //GOB.Action a = actions[6]; a.ApplyActionEffects(childWorldModel); childWorldModel.CalculateNextPlayer(); } DepthReached++; } if (DepthReached > this.MaxPlayoutDepthReached) { this.MaxPlayoutDepthReached = DepthReached; } Reward reward = new Reward { PlayerID = childWorldModel.GetNextPlayer(), Value = childWorldModel.GetScore() }; return(reward); }
private MCTSNode Expand(MCTSNode parent, GOB.Action action) { MCTSNode childNode = new MCTSNode(parent.State); childNode.Parent = parent; childNode.Action = action; action.ApplyActionEffects(childNode.State); parent.ChildNodes.Add(childNode); return(childNode); }
private MCTSNode Expand(MCTSNode parent, GOB.Action action) { WorldModel state = parent.State.GenerateChildWorldModel(); MCTSNode child = new MCTSNode(state); child.Parent = parent; action.ApplyActionEffects(state); child.State.CalculateNextPlayer(); child.Action = action; parent.ChildNodes.Add(child); return(child); }
protected virtual MCTSNode Expand(MCTSNode parent, GOB.Action action) { WorldModel currentState = parent.State.GenerateChildWorldModel(); action.ApplyActionEffects(currentState); MCTSNode newChild = new MCTSNode(currentState) { Parent = parent, Action = action }; parent.ChildNodes.Add(newChild); return(newChild); }
virtual protected MCTSNode Expand(MCTSNode parent, GOB.Action action) { FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)parent.State); action.ApplyActionEffects(newState); newState.CalculateNextPlayer(); MCTSNode newNode = new MCTSNode(newState); newNode.Parent = parent; newNode.Q = 0; newNode.N = 0; newNode.Action = action; parent.ChildNodes.Add(newNode); return(newNode); }
protected virtual MCTSNode Expand(MCTSNode parent, GOB.Action action) { FutureStateWorldModel newModel = parent.State.GenerateChildWorldModel() as FutureStateWorldModel; action.ApplyActionEffects(newModel); newModel.CalculateNextPlayer(); MCTSNode childNode = new MCTSNode(newModel); childNode.Action = action; childNode.Parent = parent; parent.ChildNodes.Add(childNode); return(childNode); }
private MCTSNode Expand(MCTSNode parent, GOB.Action action) { WorldModel worldmodel = CurrentStateWorldModel.GenerateChildWorldModel(); action.ApplyActionEffects(worldmodel); MCTSNode n = new MCTSNode(worldmodel) { Action = action, Parent = parent, N = 0, Q = 0 }; parent.ChildNodes.Add(n); return(n); }
protected override Reward Playout(WorldModel initialPlayoutState) { FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)initialPlayoutState); Reward reward = new Reward(); int numberOfIterations = 0; while (!newState.IsTerminal() && (DEPTH_LIMIT <= 0 || !(numberOfIterations >= DEPTH_LIMIT))) { GOB.Action[] possibleActions = newState.GetExecutableActions(); List <double> results = new List <double>(); float chosenScore = 0f; int i; for (i = 0; i < possibleActions.Length; i++) { //results.Add(Heuristic(newState, possibleActions[i])); results.Add(possibleActions[i].GetUtility()); } GOB.Action bestAction = null; List <double> exponentialResults = results.Select(Math.Exp).ToList(); double sumExponentials = exponentialResults.Sum(); List <double> softmax = exponentialResults.Select(j => j / sumExponentials).ToList(); double prob = this.RandomGenerator.NextDouble(); double probabilitySum = 0; for (i = 0; i < possibleActions.Length; i++) { probabilitySum += softmax[i]; if (probabilitySum >= prob) { bestAction = possibleActions[i]; chosenScore = (float)softmax[i]; break; } } bestAction.ApplyActionEffects(newState); newState.CalculateNextPlayer(); reward.Value = chosenScore; reward.PlayerID = 0; if (DEPTH_LIMIT > 0) { numberOfIterations++; } } return(reward); }
protected override MCTSNode Expand(MCTSNode parent, GOB.Action action) { WorldModel currentState = parent.State.GenerateChildWorldModel(); action.ApplyActionEffects(currentState); currentState.CalculateNextPlayer(); MCTSNode newChild = new MCTSNode(currentState) { Parent = parent, Action = action }; parent.ChildNodes.Add(newChild); return(newChild); }
private MCTSNode Expand(MCTSNode parent, GOB.Action action) { WorldModel state = parent.State.GenerateChildWorldModel(); MCTSNode expand = new MCTSNode(state); expand.Parent = parent; action.ApplyActionEffects(state); expand.Action = action; state.CalculateNextPlayer(); expand.PlayerID = state.GetNextPlayer(); parent.ChildNodes.Add(expand); expand.N = 0; expand.Q = 0; return(expand); }
private MCTSNode Expand(MCTSNode parent, GOB.Action action) { var childWorldModel = parent.State.GenerateChildWorldModel(); action.ApplyActionEffects(childWorldModel); childWorldModel.CalculateNextPlayer(); var childNode = new MCTSNode(childWorldModel) { Action = action, PlayerID = parent.PlayerID, Parent = parent }; parent.ChildNodes.Add(childNode); return(childNode); }
virtual protected Reward Playout(WorldModel initialPlayoutState) { FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)initialPlayoutState); Reward reward = new Reward(); while (!newState.IsTerminal()) { GOB.Action[] possibleActions = newState.GetExecutableActions(); int actionIndex = this.RandomGenerator.Next(0, possibleActions.Length); GOB.Action chosenAction = possibleActions[actionIndex]; chosenAction.ApplyActionEffects(newState); newState.CalculateNextPlayer(); reward.Value = newState.GetScore(); reward.PlayerID = 0; } return(reward); }
private MCTSNode Expand(MCTSNode parent, GOB.Action action) { //TODO: implement //WorldModel worldmodel = CurrentStateWorldModel.GenerateChildWorldModel(); WorldModel worldmodel = parent.State.GenerateChildWorldModel(); action.ApplyActionEffects(worldmodel); worldmodel.CalculateNextPlayer(); MCTSNode n = new MCTSNode(worldmodel) { Action = action, Parent = parent, N = 0, Q = 0 }; parent.ChildNodes.Add(n); return(n); }
protected virtual Reward Playout(WorldModel initialPlayoutState) { WorldModel currentState = initialPlayoutState; while (!currentState.IsTerminal()) { GOB.Action[] actions = currentState.GetExecutableActions(); if (actions.Length == 0) { continue; } int index = this.RandomGenerator.Next(0, actions.Length); GOB.Action action = actions[index]; currentState = currentState.GenerateChildWorldModel(); action.ApplyActionEffects(currentState); this.CurrentDepth++; } Reward reward = new Reward(); reward.Value = currentState.GetScore(); return(reward); }
public override Reward Playout(WorldModel initialPlayoutState) { GOB.Action action = null; GOB.Action[] actions; WorldModel state = initialPlayoutState.GenerateChildWorldModel(); while (!state.IsTerminal()) { //should choose randomly actions = state.GetExecutableActions(); float best = float.MinValue; foreach (var a in actions) { WorldModel w = state.GenerateChildWorldModel(); a.ApplyActionEffects(w); var heuristic = w.GetGoalValue("BeQuick") + 1 / w.GetGoalValue("GainXP") + w.GetGoalValue("Survive") + w.GetGoalValue("GetRich"); if (heuristic > best) { best = heuristic; action = a; } } action.ApplyActionEffects(state); } Reward r = new Reward(); r.Value = state.GetScore(); r.PlayerID = state.GetNextPlayer(); return(r); }
protected override Reward Playout(WorldModel initialPlayoutState) { WorldModel model = initialPlayoutState.GenerateChildWorldModel(); List <GOB.Action> actions; List <GOB.Action> executableActions = new List <GOB.Action>(); GOB.Action nextAction = null; Reward reward = new Reward(); double heuristicValue; double accumulatedHeuristicValue; double bestValue, minValue; SortedDictionary <double, GOB.Action> heuristicList = new SortedDictionary <double, GOB.Action>(); actions = model.GetActions(); while (!model.IsTerminal()) { heuristicList.Clear(); executableActions.Clear(); heuristicValue = 0; accumulatedHeuristicValue = 0; bestValue = -1; minValue = float.MaxValue; if (actions.Count == 0) { break; } foreach (GOB.Action action in actions) { if (action.CanExecute(model)) { accumulatedHeuristicValue += Math.Pow(Math.E, action.H(model)); executableActions.Add(action); } } foreach (GOB.Action action in executableActions) { heuristicValue = Math.Pow(Math.E, action.H(model)) / accumulatedHeuristicValue; if (!heuristicList.ContainsKey(heuristicValue)) { heuristicList.Add(heuristicValue, action); } if (heuristicValue > bestValue) { bestValue = heuristicValue; } if (heuristicValue < minValue) { minValue = heuristicValue; } } double randomNumber = GetRandomNumber(minValue, bestValue); foreach (KeyValuePair <double, GOB.Action> actionHeuristic in heuristicList) { if (actionHeuristic.Key >= randomNumber) { nextAction = actionHeuristic.Value; break; } } if (nextAction == null) { break; } nextAction.ApplyActionEffects(model); model.CalculateNextPlayer(); } reward.PlayerID = model.GetNextPlayer(); reward.Value = model.GetScore(); return(reward); }