protected override Reward Playout(WorldModel initialPlayoutState) { /* * while s is nonterminal do * chose a from Actions(s) uniformly at random * s <- Result(s,a) * return reward for state s */ FutureStateWorldModel currentState = initialPlayoutState.GenerateChildWorldModel() as FutureStateWorldModel; GOB.Action randomAction; ActionHistory.Clear(); int currentDepth = 0; Pair <int, Action> par = new Pair <int, Action>(0, new Action("asdasdas"));//cria com lixo para depois ser substituido while (!currentState.IsTerminal()) { randomAction = currentState.getNextBiasRandomAction(this.RandomGenerator, currentState); randomAction.ApplyActionEffects(currentState); currentState.CalculateNextPlayer(); par.Left = currentState.GetNextPlayer(); par.Right = randomAction; ActionHistory.Add(par); currentDepth++; } if (currentDepth > this.MaxPlayoutDepthReached) { this.MaxPlayoutDepthReached = currentDepth; } return(new Reward() { PlayerID = currentState.GetNextPlayer(), Value = currentState.GetScore() }); }
protected override Reward Playout(WorldModel initialPlayoutState) { //WorldModel newState = initialPlayoutState.GenerateChildWorldModel(); FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)initialPlayoutState); Reward reward = new Reward(); int numberOfIterations = 0; while (!newState.IsTerminal() && numberOfIterations <= DEPTH_LIMIT) { Action[] possibleActions = newState.GetExecutableActions(); List <float> heuristics = new List <float>(); for (int i = 0; i < possibleActions.Length; i++) { heuristics.Add(possibleActions[i].GetHValue(newState)); } int index = chooseAction(possibleActions, heuristics); Action bestAction = possibleActions[index]; bestAction.ApplyActionEffects(newState); newState.CalculateNextPlayer(); reward.PlayerID = newState.GetNextPlayer(); reward.Value = heuristics[index]; if (DEPTH_LIMIT > 0) { numberOfIterations++; } } return(reward); }
protected virtual Reward Playout(WorldModel initialPlayoutState) { FutureStateWorldModel currentState = initialPlayoutState.GenerateChildWorldModel() as FutureStateWorldModel; GOB.Action randomAction; int currentDepth = 0; while (!currentState.IsTerminal()) { randomAction = currentState.getNextRandomAction(this.RandomGenerator); randomAction.ApplyActionEffects(currentState); currentState.CalculateNextPlayer(); currentDepth++; } if (currentDepth > this.MaxPlayoutDepthReached) { this.MaxPlayoutDepthReached = currentDepth; } return(new Reward() { PlayerID = currentState.GetNextPlayer(), Value = currentState.GetScore() }); }