public AIAction ChooseAction(State state, ActionValueFunction actionValueFunction) { if (Random() > greedy) { return(ChooseRandomAction(actionValueFunction.GetActionValues(state))); } return(ChooseRandomAction(ListUtils.GetAllHighest(GetActionProbabilities(state, actionValueFunction), (x, y) => (x.Value - y.Value)))); }
public double GetProbabilityOfAction(AIAction action, State state, ActionValueFunction actionValueFunction) { double sumProbabilityOfActions = 0; foreach (var actionValue in actionValueFunction.GetActionValues(state)) { sumProbabilityOfActions += Math.Exp(actionValueFunction.GetValue(state, actionValue.Key)); } return(Math.Exp(actionValueFunction.GetValue(state, action)) / sumProbabilityOfActions); }
private void UpdateActionValueFunction(State state, State nextState, AIAction action, double reward) { double value = ActionValueFunction.GetValue(state, action); double policyActionValueSum = 0; foreach (var actionValue in ActionValueFunction.GetActionValues(nextState)) { policyActionValueSum += policy.GetProbabilityOfAction(actionValue.Key, nextState, ActionValueFunction) * ActionValueFunction.GetValue(nextState, actionValue.Key); } double newValue = value + stepSize * (reward + discount * policyActionValueSum - ActionValueFunction.GetValue(state, action)); ActionValueFunction = ActionValueFunction.UpdateValue(state, action, newValue); }
public double GetProbabilityOfAction(AIAction action, State state, ActionValueFunction actionValueFunction) { //TODO /* * int highestCount = ListUtils.GetAllHighest(actionValueFunction.GetActionValues(state), (x, y) => (x.Value - y.Value)).Count; * int allCount = actionValueFunction.GetActionValues(state).Count; * if (actionValueFunction.GetValue(state, action) == ListUtils.GetHighest(actionValueFunction.GetActionValues(state), (x, y) => x.Value > y.Value).Value) * return greedy / highestCount; * return (1 - greedy) / allCount ; */ double sumProbabilityOfActions = 0; foreach (var actionValue in actionValueFunction.GetActionValues(state)) { sumProbabilityOfActions += Math.Exp(actionValueFunction.GetValue(state, actionValue.Key)); } return(Math.Exp(actionValueFunction.GetValue(state, action)) / sumProbabilityOfActions); }
private List <KeyValuePair <AIAction, double> > GetActionProbabilities(State state, ActionValueFunction actionValueFunction) { return(actionValueFunction.GetActionValues(state).ConvertAll(actionValue => new KeyValuePair <AIAction, double>(actionValue.Key, GetProbabilityOfAction(actionValue.Key, state, actionValueFunction)))); }
private void InitState(State currentState) { ActionValueFunction = ActionValueFunction.InitActionValues(currentState); }