public LinearEgoAlloValue(IEqualityComparer <int[]> StateComparer, IEqualityComparer <int[]> ActionComparer, List <int[]> AvailableActions, int[] StartState, params object[] parameters) : base(StateComparer, ActionComparer, AvailableActions, StartState, parameters) { if (parameters.Length > 0) { fullPredictionMode = (bool)parameters[0]; } if (parameters.Length > 1) { updateTerminationStepCount = (int)parameters[1]; } if (parameters.Length > 2) { if ((bool)parameters[2]) { alloLearner = new ModelBasedValue <int[], int[]>(StateComparer, ActionComparer, AvailableActions, null, true); } else { alloLearner = new ModelFreeValue <int[], int[]>(StateComparer, ActionComparer, AvailableActions, null, true); } } stateComparer = StateComparer; actionComparer = ActionComparer; availableActions = AvailableActions; //alloModel = new ModelFreeValue<int[], int[]>(StateComparer, ActionComparer, availableActions, null, true); //{ // defaultQ = 10.3 //}; egoLearner = new ModelFreeValue <int[], int[]>(StateComparer, actionComparer, availableActions, StartState) { alpha = 0.9 }; egoPredictionModels = new ModelBasedValue <int[], int[]> [3]; for (int i = 0; i < egoPredictionModels.Length; i++) { egoPredictionModels[i] = new ModelBasedValue <int[], int[]>(StateComparer, ActionComparer, availableActions, StartState) { gamma = 0 }; } visitedStates = new Dictionary <int[], int> [4]; for (int i = 0; i < availableActions.Count; i++) { visitedStates[i] = new Dictionary <int[], int>(StateComparer); } }
public EgoAlloValue(IEqualityComparer <int[]> StateComparer, IEqualityComparer <int[]> ActionComparer, List <int[]> AvailableActions, int[] StartState, params object[] parameters) : base(StateComparer, ActionComparer, AvailableActions, StartState, parameters) { if (parameters.Length > 0) { fullPredictionMode = (bool)parameters[0]; } stateComparer = StateComparer; actionComparer = ActionComparer; availableActions = AvailableActions; alloModel = new ModelBasedValue <int[], int[]>(StateComparer, ActionComparer, availableActions, StartState, true) { defaultQ = 10.3 }; egoModel = new ModelFreeValue <int[], int[]>(StateComparer, actionComparer, availableActions, StartState) { alpha = 0.9 }; network = new ActivationNetwork(new BipolarSigmoidFunction(2), 10, 10, 3); teacher = new ParallelResilientBackpropagationLearning(network); }