private static TAction FindBestAction( IProblem <TState, TAction> problem, TState state, ValueTable <TState, TAction> valueTable, IRewarder <TState, TAction> rewarder) { var maxActionValue = double.MinValue; var maxAction = default(TAction); foreach (var action in problem.AvailableActions(state)) { var actionValue = 0.0; foreach (var(nextState, pNextState) in problem.PossibleStates(state, action)) { var nextStateValue = valueTable.Value(nextState); var reward = rewarder.Reward(state, nextState, action); actionValue += pNextState * (reward + nextStateValue); } if (actionValue > maxActionValue) { maxActionValue = actionValue; maxAction = action; } } return(maxAction); }
public TAction Action(TState state) { const double approxOne = 1 - double.Epsilon; return(_problem .AvailableActions(state) .FirstOrDefault(action => PAction(state, action) >= approxOne)); }
public double PAction(TState state, TAction action) { var numActions = _problem.AvailableActions(state).Count(); if (numActions == 0) { return(0.0); } return(1.0 / numActions); }
private double CalculateValue( TState state, IPolicy <TState, TAction> policy, IRewarder <TState, TAction> rewarder) { var newValue = 0.0; foreach (var action in _problem.AvailableActions(state)) { foreach (var(nextState, pNextState) in _problem.PossibleStates(state, action)) { var reward = rewarder.Reward(state, nextState, action); newValue += policy.PAction(state, action) * pNextState * (reward + Value(nextState)); } } return(newValue); }