public double GetProbabilityOfAction(AIAction action, State state, ActionValueFunction actionValueFunction) { double sumProbabilityOfActions = 0; foreach (var actionValue in actionValueFunction.GetActionValues(state)) { sumProbabilityOfActions += Math.Exp(actionValueFunction.GetValue(state, actionValue.Key)); } return(Math.Exp(actionValueFunction.GetValue(state, action)) / sumProbabilityOfActions); }
public double GetProbabilityOfAction(AIAction action, State state, ActionValueFunction actionValueFunction) { //TODO /* * int highestCount = ListUtils.GetAllHighest(actionValueFunction.GetActionValues(state), (x, y) => (x.Value - y.Value)).Count; * int allCount = actionValueFunction.GetActionValues(state).Count; * if (actionValueFunction.GetValue(state, action) == ListUtils.GetHighest(actionValueFunction.GetActionValues(state), (x, y) => x.Value > y.Value).Value) * return greedy / highestCount; * return (1 - greedy) / allCount ; */ double sumProbabilityOfActions = 0; foreach (var actionValue in actionValueFunction.GetActionValues(state)) { sumProbabilityOfActions += Math.Exp(actionValueFunction.GetValue(state, actionValue.Key)); } return(Math.Exp(actionValueFunction.GetValue(state, action)) / sumProbabilityOfActions); }
private void UpdateActionValueFunction(State state, State nextState, AIAction action, double reward) { double value = ActionValueFunction.GetValue(state, action); double policyActionValueSum = 0; foreach (var actionValue in ActionValueFunction.GetActionValues(nextState)) { policyActionValueSum += policy.GetProbabilityOfAction(actionValue.Key, nextState, ActionValueFunction) * ActionValueFunction.GetValue(nextState, actionValue.Key); } double newValue = value + stepSize * (reward + discount * policyActionValueSum - ActionValueFunction.GetValue(state, action)); ActionValueFunction = ActionValueFunction.UpdateValue(state, action, newValue); }