private Value GetExpectation(IMarkovAction <TState> action, TState state, IValueFunction <TState> previousFunction) { var expectation = new Value(0.0, 0.0); foreach (var nextState in action[state]) { var probability = action[state, nextState]; if (probability == 0) { continue; } var totalReward = 0.0; foreach (var reward in Rewards) { totalReward += reward[state, nextState, action]; } var prevValue = previousFunction[nextState]; var rewardValue = new Value(totalReward + (Discount * prevValue.Mean), prevValue.Variance + (totalReward * totalReward) + (2 * Discount * prevValue.Mean * totalReward)); expectation += probability * rewardValue; } return(expectation); }
/// <inheritdoc /> public double this[TState from, TState to, IMarkovAction <TState> action] { get { if (action is TAction markovAction) // && action[from, to] > 0) { return(this[from, to, markovAction]); } return(0.0); } }
public double this[IMarkovAction <EnhancementState> action, EnhancementState state] { get { if (state.FailStack >= _targetFailstack) { return(0.0); } if (state.Items[5] > 0) { return(action is CleanseAction ? 1.0 : 0.0); } return(action is EnhancementAction ? 1.0 : 0.0); } }
public double this[IMarkovAction <EnhancementState> action, EnhancementState state] { get { if (state.Items[0] <= 0) { return(0.0); } if (state.Items[_targetGrade] > 0 && StopAtOnce) { return(0.0); } var grade = state.Items.Take(_targetGrade).ToList().FindLastIndex(i => i > 0) + 1; if (action is EnhancementAction enhancementAction) // && state.FailStack > 0) { return(enhancementAction.Grade == grade ? 1.0 : 0.0); } return(0.0); } }
private List <HashSet <TState> > GetValueDependencies(IDeterministicPolicy <TState> policy, TState state, int depth, IMarkovAction <TState> nextAction = null) { var deps = new List <HashSet <TState> >(depth + 1); var it = 0; var states = new HashSet <TState>(); var valueFunction = ValueFunctions[policy][depth]; // replace the policy action with next action if (nextAction != null) { var actionStates = new List <TState>(); lock (nextAction) actionStates.AddRange(nextAction[state].Where(s => !valueFunction.HasState(s))); states.UnionWith(actionStates); depth--; } else { states.Add(state); } deps.Add(states); while (depth-- > 0) { ++it; var nextStates = new HashSet <TState>(); foreach (var s in states) { var action = policy[s]; if (action == null) { continue; } IEnumerable <TState> actionStates; lock (action) actionStates = action[s]; nextStates.UnionWith(actionStates); } deps.Add(nextStates); states = nextStates; } return(deps); }
public ActionEdge(IMarkovAction <EnhancementState> action) { Action = action.ToString(); }