public void Evaluate( IGridWorldPolicy policy, IGridWorldRewarder gridWorldRewarder, int sweepLimit = -1) { var sweep = 0; var largestValueChange = 0.0; do { largestValueChange = 0.0; foreach (var state in _world.AllStates()) { var originalValue = Value(state); var newValue = CalculateValue(state, policy, gridWorldRewarder); _values[state.Position1D] = newValue; var valueChange = Math.Abs(originalValue - newValue); if (valueChange > largestValueChange) { largestValueChange = valueChange; } } if (sweepLimit > 0 && ++sweep == sweepLimit) { break; } } while (largestValueChange > 0.000001); }
private double CalculateValue( GridWorldState state, IGridWorldPolicy policy, IGridWorldRewarder gridWorldRewarder) { var newValue = 0.0; foreach (var action in _world.AvailableActions(state)) { var nextState = _world.NextState(state, action); var reward = gridWorldRewarder.Reward(state, action); newValue += policy.PAction(state, action) * (reward + Value(nextState)); } return(newValue); }