public static GreedyGridWorldPolicy Create( GridWorld world, GridWorldValueTable gridWorldValueTable, IGridWorldRewarder gridWorldRewarder) { var greedyPolicy = new GreedyGridWorldPolicy(); foreach (var state in world.AllStates()) { greedyPolicy._actions[state] = FindBestAction(world, state, gridWorldValueTable, gridWorldRewarder); } return(greedyPolicy); }
private static GridWorldAction FindBestAction( GridWorld world, GridWorldState state, GridWorldValueTable gridWorldValueTable, IGridWorldRewarder gridWorldRewarder) { var max = double.MinValue; var maxAction = GridWorldAction.Down; foreach (var action in world.AvailableActions(state)) { var nextState = world.NextState(state, action); var nextStateValue = gridWorldValueTable.Value(nextState); var reward = gridWorldRewarder.Reward(state, action); if (reward + nextStateValue > max) { max = reward + nextStateValue; maxAction = action; } } return(maxAction); }
public GridWorldValueTable(GridWorld world) { _world = world; _values = new double[world.AllStates().Count()]; }