protected override Reward Playout(WorldModel initialPlayoutState) { WorldModel worldModel = initialPlayoutState.GenerateChildWorldModel(); int depthCount = 0; while (!worldModel.IsTerminal() && depthCount <= MaxPlayoutDepth) { Action[] actions = worldModel.GetExecutableActions(); Action biasedAction = actions.First(); foreach (Action action in actions) { if (action.GetHValue(worldModel) < biasedAction.GetHValue(worldModel)) { biasedAction = action; } } biasedAction.ApplyActionEffects(worldModel); depthCount++; } if (depthCount > MaxPlayoutDepthReached) { base.MaxPlayoutDepthReached = depthCount; } return(new Reward() { Value = GetWorldModelScore(worldModel), PlayerID = initialPlayoutState.GetNextPlayer(), }); }
protected virtual Reward Playout(WorldModel initialPlayoutState) { Action[] actions = initialPlayoutState.GetExecutableActions(); float score = 0; foreach (Action action in actions) { WorldModel worldModel = initialPlayoutState.GenerateChildWorldModel(); int depthCount = 0; while (!worldModel.IsTerminal()) { Action randomAction = actions[RandomGenerator.Next(actions.Length)]; randomAction.ApplyActionEffects(worldModel); depthCount++; } if (depthCount > MaxPlayoutDepthReached) { MaxPlayoutDepthReached = depthCount; } score += worldModel.GetScore(); } return(new Reward() { Value = score / actions.Length, PlayerID = initialPlayoutState.GetNextPlayer(), }); }
protected override Reward Playout(WorldModel initialPlayoutState) { //WorldModel newState = initialPlayoutState.GenerateChildWorldModel(); FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)initialPlayoutState); Reward reward = new Reward(); int numberOfIterations = 0; while (!newState.IsTerminal() && numberOfIterations <= DEPTH_LIMIT) { Action[] possibleActions = newState.GetExecutableActions(); List <float> heuristics = new List <float>(); for (int i = 0; i < possibleActions.Length; i++) { heuristics.Add(possibleActions[i].GetHValue(newState)); } int index = chooseAction(possibleActions, heuristics); Action bestAction = possibleActions[index]; bestAction.ApplyActionEffects(newState); newState.CalculateNextPlayer(); reward.PlayerID = newState.GetNextPlayer(); reward.Value = heuristics[index]; if (DEPTH_LIMIT > 0) { numberOfIterations++; } } return(reward); }
protected MCTSNode Expand(MCTSNode parent, Action action) { WorldModel newState = parent.State.GenerateChildWorldModel(); action.ApplyActionEffects(newState); MCTSNode child = new MCTSNode(newState) { Action = action, Parent = parent, PlayerID = newState.GetNextPlayer(), }; parent.ChildNodes.Add(child); return(child); }
protected int chooseAction(Action[] possibleActions, List <float> heuristics) { Action bestAction = null; float maxHeuristic = -1.0f; int best = 0; for (int i = 0; i < possibleActions.Length; i++) { if (heuristics[i] > maxHeuristic) { maxHeuristic = heuristics[i]; bestAction = possibleActions[i]; best = i; } } return(best); }
protected MCTSNode Expand(MCTSNode parent, Action action) { WorldModel newModel = parent.State.GenerateChildWorldModel(); action.ApplyActionEffects(newModel); newModel.CalculateNextPlayer(); MCTSNode node = new MCTSNode(newModel) { Action = action, Parent = parent, PlayerID = newModel.GetNextPlayer(), Q = 0, N = 0 }; parent.ChildNodes.Add(node); return(node); }
// Only worth running multiple playouts in case action is Sword Attack protected virtual WorldModel StochasticPlayout(Action action, WorldModel currState) { if (action.Name.Equals("SwordAttack") && this.MaxPlayouts > 0) { WorldModel[] tests = new WorldModel[this.MaxPlayouts]; for (int i = 0; i < this.MaxPlayouts; i++) { tests[i] = currState.GenerateChildWorldModel(); action.ApplyActionEffects(tests[i]); } currState = AverageState(tests, (SwordAttack)action); } else { currState = currState.GenerateChildWorldModel(); action.ApplyActionEffects(currState); } return(currState); }
protected MCTSNode Expand(MCTSNode parent, Action action) { //TODO: implement throw new NotImplementedException(); }