protected override Reward Playout(WorldModel initialPlayoutState) { /* * while s is nonterminal do * chose a from Actions(s) uniformly at random * s <- Result(s,a) * return reward for state s */ FutureStateWorldModel currentState = initialPlayoutState.GenerateChildWorldModel() as FutureStateWorldModel; GOB.Action randomAction; ActionHistory.Clear(); int currentDepth = 0; Pair <int, Action> par = new Pair <int, Action>(0, new Action("asdasdas"));//cria com lixo para depois ser substituido while (!currentState.IsTerminal()) { randomAction = currentState.getNextBiasRandomAction(this.RandomGenerator, currentState); randomAction.ApplyActionEffects(currentState); currentState.CalculateNextPlayer(); par.Left = currentState.GetNextPlayer(); par.Right = randomAction; ActionHistory.Add(par); currentDepth++; } if (currentDepth > this.MaxPlayoutDepthReached) { this.MaxPlayoutDepthReached = currentDepth; } return(new Reward() { PlayerID = currentState.GetNextPlayer(), Value = currentState.GetScore() }); }
virtual protected Reward Playout(WorldModel initialPlayoutState) { FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)initialPlayoutState); Reward reward = new Reward(); while (!newState.IsTerminal()) { GOB.Action[] possibleActions = newState.GetExecutableActions(); int actionIndex = this.RandomGenerator.Next(0, possibleActions.Length); GOB.Action chosenAction = possibleActions[actionIndex]; chosenAction.ApplyActionEffects(newState); newState.CalculateNextPlayer(); reward.Value = newState.GetScore(); reward.PlayerID = 0; } return(reward); }
private Reward Playout(WorldModel initialPlayoutState) { //TODO: implement FutureStateWorldModel state = (FutureStateWorldModel)initialPlayoutState.GenerateChildWorldModel(); while (!state.IsTerminal()) { //escolher entre MCTS normal e MCTS bias //ChooseRandom(state).ApplyActionEffects(state); ChooseBias(state).ApplyActionEffects(state); state.CalculateNextPlayer(); this.MaxPlayoutDepthReached++; } Reward reward = new Reward(); reward.Value = state.GetScore(); return(reward); }
protected virtual Reward Playout(WorldModel initialPlayoutState) { FutureStateWorldModel currentState = initialPlayoutState.GenerateChildWorldModel() as FutureStateWorldModel; GOB.Action randomAction; int currentDepth = 0; while (!currentState.IsTerminal()) { randomAction = currentState.getNextRandomAction(this.RandomGenerator); randomAction.ApplyActionEffects(currentState); currentState.CalculateNextPlayer(); currentDepth++; } if (currentDepth > this.MaxPlayoutDepthReached) { this.MaxPlayoutDepthReached = currentDepth; } return(new Reward() { PlayerID = currentState.GetNextPlayer(), Value = currentState.GetScore() }); }