public void Evaluate( IGridWorldPolicy policy, IGridWorldRewarder gridWorldRewarder, int sweepLimit = -1) { var sweep = 0; var largestValueChange = 0.0; do { largestValueChange = 0.0; foreach (var state in _world.AllStates()) { var originalValue = Value(state); var newValue = CalculateValue(state, policy, gridWorldRewarder); _values[state.Position1D] = newValue; var valueChange = Math.Abs(originalValue - newValue); if (valueChange > largestValueChange) { largestValueChange = valueChange; } } if (sweepLimit > 0 && ++sweep == sweepLimit) { break; } } while (largestValueChange > 0.000001); }
public static GreedyGridWorldPolicy Create( GridWorld world, GridWorldValueTable gridWorldValueTable, IGridWorldRewarder gridWorldRewarder) { var greedyPolicy = new GreedyGridWorldPolicy(); foreach (var state in world.AllStates()) { greedyPolicy._actions[state] = FindBestAction(world, state, gridWorldValueTable, gridWorldRewarder); } return(greedyPolicy); }
private double CalculateValue( GridWorldState state, IGridWorldPolicy policy, IGridWorldRewarder gridWorldRewarder) { var newValue = 0.0; foreach (var action in _world.AvailableActions(state)) { var nextState = _world.NextState(state, action); var reward = gridWorldRewarder.Reward(state, action); newValue += policy.PAction(state, action) * (reward + Value(nextState)); } return(newValue); }
private static GridWorldAction FindBestAction( GridWorld world, GridWorldState state, GridWorldValueTable gridWorldValueTable, IGridWorldRewarder gridWorldRewarder) { var max = double.MinValue; var maxAction = GridWorldAction.Down; foreach (var action in world.AvailableActions(state)) { var nextState = world.NextState(state, action); var nextStateValue = gridWorldValueTable.Value(nextState); var reward = gridWorldRewarder.Reward(state, action); if (reward + nextStateValue > max) { max = reward + nextStateValue; maxAction = action; } } return(maxAction); }