public bool[] RunFrame() { var action1 = agent1.Act(gs, 1); var action2 = agent2.Act(gs, 2); bool[] frameResult = PacManGameState.Step(gs, action1, action2, speed); agent1.Obs((frameResult[0] ? 1F : 0F), frameResult[2]); agent1.Obs((frameResult[1] ? 1F : 0F), frameResult[2]); return(frameResult); }
public MovementIntent Act(PacManGameState gs, int playerNumber) { int bestActionScore = 0; int j; MovementIntent bestAction = 0; foreach (var action in movementIntentValues) { int actionScore = 0; for (var i = 0; i < RolloutIterations; i++) { gsCopy.CopyGS(gs); var randActionIndex = UnityEngine.Random.Range(0, movementIntentValues.Length); MovementIntent randAction = (MovementIntent)movementIntentValues.GetValue(randActionIndex); var result = PacManGameState.Step(gsCopy, action, randAction, Speed); for (j = 0; !result[2] && j < ExplorationFrames; j++) // While not terminal state { randActionIndex = UnityEngine.Random.Range(0, movementIntentValues.Length); MovementIntent randAction1 = (MovementIntent)movementIntentValues.GetValue(randActionIndex); randActionIndex = UnityEngine.Random.Range(0, movementIntentValues.Length); MovementIntent randAction2 = (MovementIntent)movementIntentValues.GetValue(randActionIndex); result = PacManGameState.Step(gsCopy, randAction1, randAction2, Speed); } actionScore += result[playerNumber] ? 1 : 0; } if (actionScore > bestActionScore) { bestActionScore = actionScore; bestAction = action; } } return(bestAction); }