private double CalculateValue( TState state, IPolicy <TState, TAction> policy, IRewarder <TState, TAction> rewarder) { var newValue = 0.0; foreach (var action in _problem.AvailableActions(state)) { foreach (var(nextState, pNextState) in _problem.PossibleStates(state, action)) { var reward = rewarder.Reward(state, nextState, action); newValue += policy.PAction(state, action) * pNextState * (reward + Value(nextState)); } } return(newValue); }