public MultiResValue(IEqualityComparer <int[]> StateComparer, IEqualityComparer <actionType> ActionComparer, List <actionType> AvailableActions, int[] StartState, params object[] numLevels_minLevel) : base(StateComparer, ActionComparer, AvailableActions, StartState, numLevels_minLevel) { minLevel = numLevels_minLevel.Length >= 2 ? (int)numLevels_minLevel[1] : 0; models = new ModelBasedValue <int[], actionType> [(int)numLevels_minLevel[0]]; transitions = new StateTransition <int[], actionType> [(int)numLevels_minLevel[0]]; subgoals = new List <Goal <int[], actionType> > [(int)numLevels_minLevel[0]]; availableActions = AvailableActions; stateComparer = StateComparer; actionComparer = ActionComparer; stateTree = new intStateTree(); //stateTree = new taxiStateTree(); //stateTree = new learnedStateTree(); pathFinder = new PathFinder <int[], actionType>(stateComparer); for (int i = 0; i < models.Length; i++) { models[i] = new ModelBasedValue <int[], actionType>(stateComparer, actionComparer, availableActions, StartState) { maxUpdates = i == 0 ? (minLevel > 0 ? 20 : 20) : 20, defaultQ = i == 0 ? 15 : 0, gamma = i == 0 ? 0.9 : 0.4 }; transitions[i] = new StateTransition <int[], actionType>(null, default(actionType), double.NegativeInfinity, null); subgoals[i] = new List <Goal <int[], actionType> >(); } currentGoal = new Goal <int[], actionType>(0, null, default(actionType), null, 0, stateComparer, actionComparer); }
//Given the Current state, needs to view all of the existing model values for the state //Will return the probabilities from the t-Table values from all the models as a list public List <double> t_TableValues(StateTransition <stateType, actionType> StaTran, Dictionary <int, ActionValue <stateType, actionType> > models) { List <double> returnValues = new List <double>(); for (int key = 0; key < models.Count; key++) { ModelBasedValue <stateType, actionType> modelsCopy = (ModelBasedValue <stateType, actionType>)models[key].T.GetStateValueTable; Dictionary <stateType, int> s2Counts = modelsCopy.T.GetStateValueTable(StaTran.oldState, StaTran.action); double thisS2Counts = 0; if (s2Counts.ContainsKey(StaTran.newState)) { thisS2Counts = (double)s2Counts[StaTran.newState]; } double total = (double)s2Counts.Values.Sum(); if (total == 0) { returnValues.Add(0); } else { returnValues.Add(thisS2Counts / total); } } return(returnValues); }
public LSValue(IEqualityComparer <stateType> StateComparer, IEqualityComparer <actionType> ActionComparer, List <actionType> AvailableActions, stateType StartState, params object[] parameters) : base(StateComparer, ActionComparer, AvailableActions, StartState, parameters) { stateComparer = StateComparer; actionComparer = ActionComparer; availableActions = AvailableActions; trueModel = new ModelBasedValue <stateType, actionType>(StateComparer, ActionComparer, availableActions, StartState); trueModel.maxUpdates = 1000; }
public LinearEgoAlloValue(IEqualityComparer <int[]> StateComparer, IEqualityComparer <int[]> ActionComparer, List <int[]> AvailableActions, int[] StartState, params object[] parameters) : base(StateComparer, ActionComparer, AvailableActions, StartState, parameters) { if (parameters.Length > 0) { fullPredictionMode = (bool)parameters[0]; } if (parameters.Length > 1) { updateTerminationStepCount = (int)parameters[1]; } if (parameters.Length > 2) { if ((bool)parameters[2]) { alloLearner = new ModelBasedValue <int[], int[]>(StateComparer, ActionComparer, AvailableActions, null, true); } else { alloLearner = new ModelFreeValue <int[], int[]>(StateComparer, ActionComparer, AvailableActions, null, true); } } stateComparer = StateComparer; actionComparer = ActionComparer; availableActions = AvailableActions; //alloModel = new ModelFreeValue<int[], int[]>(StateComparer, ActionComparer, availableActions, null, true); //{ // defaultQ = 10.3 //}; egoLearner = new ModelFreeValue <int[], int[]>(StateComparer, actionComparer, availableActions, StartState) { alpha = 0.9 }; egoPredictionModels = new ModelBasedValue <int[], int[]> [3]; for (int i = 0; i < egoPredictionModels.Length; i++) { egoPredictionModels[i] = new ModelBasedValue <int[], int[]>(StateComparer, ActionComparer, availableActions, StartState) { gamma = 0 }; } visitedStates = new Dictionary <int[], int> [4]; for (int i = 0; i < availableActions.Count; i++) { visitedStates[i] = new Dictionary <int[], int>(StateComparer); } }
public ContextChangeValueGeneric(IEqualityComparer <stateType> StateComparer, IEqualityComparer <actionType> ActionComparer, List <actionType> AvailableActions, stateType StartState, params object[] parameters) : base(StateComparer, ActionComparer, AvailableActions, StartState, parameters) { //Constructor ModelBasedValue <stateType, actionType> first = new ModelBasedValue <stateType, actionType>(StateComparer, ActionComparer, AvailableActions, StartState, parameters); models = new Dictionary <int, ActionValue <stateType, actionType> >(); models.Add(0, first); activeModelKey = 0; modelType = first.GetType(); SC = StateComparer; AC = ActionComparer; AA = AvailableActions; SS = StartState; parames = parameters; }
public EgoAlloValue(IEqualityComparer <int[]> StateComparer, IEqualityComparer <int[]> ActionComparer, List <int[]> AvailableActions, int[] StartState, params object[] parameters) : base(StateComparer, ActionComparer, AvailableActions, StartState, parameters) { if (parameters.Length > 0) { fullPredictionMode = (bool)parameters[0]; } stateComparer = StateComparer; actionComparer = ActionComparer; availableActions = AvailableActions; alloModel = new ModelBasedValue <int[], int[]>(StateComparer, ActionComparer, availableActions, StartState, true) { defaultQ = 10.3 }; egoModel = new ModelFreeValue <int[], int[]>(StateComparer, actionComparer, availableActions, StartState) { alpha = 0.9 }; network = new ActivationNetwork(new BipolarSigmoidFunction(2), 10, 10, 3); teacher = new ParallelResilientBackpropagationLearning(network); }