private double CalculateValue( GridWorldState state, IGridWorldPolicy policy, IGridWorldRewarder gridWorldRewarder) { var newValue = 0.0; foreach (var action in _world.AvailableActions(state)) { var nextState = _world.NextState(state, action); var reward = gridWorldRewarder.Reward(state, action); newValue += policy.PAction(state, action) * (reward + Value(nextState)); } return(newValue); }
private static GridWorldAction FindBestAction( GridWorld world, GridWorldState state, GridWorldValueTable gridWorldValueTable, IGridWorldRewarder gridWorldRewarder) { var max = double.MinValue; var maxAction = GridWorldAction.Down; foreach (var action in world.AvailableActions(state)) { var nextState = world.NextState(state, action); var nextStateValue = gridWorldValueTable.Value(nextState); var reward = gridWorldRewarder.Reward(state, action); if (reward + nextStateValue > max) { max = reward + nextStateValue; maxAction = action; } } return(maxAction); }
public double PAction(GridWorldState state, GridWorldAction action) { return(action == _actions[state] ? 1 : 0); }
public double Value(GridWorldState state) { return(_values[state.Position1D]); }