예제 #1
0
        public MultiResValue(IEqualityComparer <int[]> StateComparer, IEqualityComparer <actionType> ActionComparer, List <actionType> AvailableActions, int[] StartState, params object[] numLevels_minLevel)
            : base(StateComparer, ActionComparer, AvailableActions, StartState, numLevels_minLevel)
        {
            minLevel    = numLevels_minLevel.Length >= 2 ? (int)numLevels_minLevel[1] : 0;
            models      = new ModelBasedValue <int[], actionType> [(int)numLevels_minLevel[0]];
            transitions = new StateTransition <int[], actionType> [(int)numLevels_minLevel[0]];
            subgoals    = new List <Goal <int[], actionType> > [(int)numLevels_minLevel[0]];

            availableActions = AvailableActions;
            stateComparer    = StateComparer;
            actionComparer   = ActionComparer;
            stateTree        = new intStateTree();
            //stateTree = new taxiStateTree();
            //stateTree = new learnedStateTree();

            pathFinder = new PathFinder <int[], actionType>(stateComparer);

            for (int i = 0; i < models.Length; i++)
            {
                models[i] = new ModelBasedValue <int[], actionType>(stateComparer, actionComparer, availableActions, StartState)
                {
                    maxUpdates = i == 0 ? (minLevel > 0 ? 20 : 20) : 20,
                    defaultQ   = i == 0 ? 15 : 0,
                    gamma      = i == 0 ? 0.9 : 0.4
                };

                transitions[i] = new StateTransition <int[], actionType>(null, default(actionType), double.NegativeInfinity, null);
                subgoals[i]    = new List <Goal <int[], actionType> >();
            }

            currentGoal = new Goal <int[], actionType>(0, null, default(actionType), null, 0, stateComparer, actionComparer);
        }
예제 #2
0
        //Given the Current state, needs to view all of the existing model values for the state
        //Will return the probabilities from the t-Table values from all the models as a list
        public List <double> t_TableValues(StateTransition <stateType, actionType> StaTran,
                                           Dictionary <int, ActionValue <stateType, actionType> > models)

        {
            List <double> returnValues = new List <double>();

            for (int key = 0; key < models.Count; key++)
            {
                ModelBasedValue <stateType, actionType> modelsCopy = (ModelBasedValue <stateType, actionType>)models[key].T.GetStateValueTable;
                Dictionary <stateType, int>             s2Counts   = modelsCopy.T.GetStateValueTable(StaTran.oldState, StaTran.action);

                double thisS2Counts = 0;
                if (s2Counts.ContainsKey(StaTran.newState))
                {
                    thisS2Counts = (double)s2Counts[StaTran.newState];
                }
                double total = (double)s2Counts.Values.Sum();

                if (total == 0)
                {
                    returnValues.Add(0);
                }
                else
                {
                    returnValues.Add(thisS2Counts / total);
                }
            }
            return(returnValues);
        }
예제 #3
0
        public LSValue(IEqualityComparer <stateType> StateComparer, IEqualityComparer <actionType> ActionComparer, List <actionType> AvailableActions, stateType StartState, params object[] parameters)
            : base(StateComparer, ActionComparer, AvailableActions, StartState, parameters)
        {
            stateComparer    = StateComparer;
            actionComparer   = ActionComparer;
            availableActions = AvailableActions;

            trueModel            = new ModelBasedValue <stateType, actionType>(StateComparer, ActionComparer, availableActions, StartState);
            trueModel.maxUpdates = 1000;
        }
예제 #4
0
        public LinearEgoAlloValue(IEqualityComparer <int[]> StateComparer, IEqualityComparer <int[]> ActionComparer, List <int[]> AvailableActions, int[] StartState, params object[] parameters)
            : base(StateComparer, ActionComparer, AvailableActions, StartState, parameters)
        {
            if (parameters.Length > 0)
            {
                fullPredictionMode = (bool)parameters[0];
            }
            if (parameters.Length > 1)
            {
                updateTerminationStepCount = (int)parameters[1];
            }
            if (parameters.Length > 2)
            {
                if ((bool)parameters[2])
                {
                    alloLearner = new ModelBasedValue <int[], int[]>(StateComparer, ActionComparer, AvailableActions, null, true);
                }
                else
                {
                    alloLearner = new ModelFreeValue <int[], int[]>(StateComparer, ActionComparer, AvailableActions, null, true);
                }
            }


            stateComparer    = StateComparer;
            actionComparer   = ActionComparer;
            availableActions = AvailableActions;

            //alloModel = new ModelFreeValue<int[], int[]>(StateComparer, ActionComparer, availableActions, null, true);
            //{
            //    defaultQ = 10.3
            //};
            egoLearner = new ModelFreeValue <int[], int[]>(StateComparer, actionComparer, availableActions, StartState)
            {
                alpha = 0.9
            };

            egoPredictionModels = new ModelBasedValue <int[], int[]> [3];
            for (int i = 0; i < egoPredictionModels.Length; i++)
            {
                egoPredictionModels[i] = new ModelBasedValue <int[], int[]>(StateComparer, ActionComparer, availableActions, StartState)
                {
                    gamma = 0
                };
            }

            visitedStates = new Dictionary <int[], int> [4];
            for (int i = 0; i < availableActions.Count; i++)
            {
                visitedStates[i] = new Dictionary <int[], int>(StateComparer);
            }
        }
예제 #5
0
        public ContextChangeValueGeneric(IEqualityComparer <stateType> StateComparer, IEqualityComparer <actionType> ActionComparer, List <actionType> AvailableActions, stateType StartState, params object[] parameters) : base(StateComparer, ActionComparer, AvailableActions, StartState, parameters)
        {
            //Constructor
            ModelBasedValue <stateType, actionType> first = new ModelBasedValue <stateType, actionType>(StateComparer, ActionComparer, AvailableActions, StartState, parameters);

            models = new Dictionary <int, ActionValue <stateType, actionType> >();
            models.Add(0, first);

            activeModelKey = 0;
            modelType      = first.GetType();

            SC      = StateComparer;
            AC      = ActionComparer;
            AA      = AvailableActions;
            SS      = StartState;
            parames = parameters;
        }
예제 #6
0
        public EgoAlloValue(IEqualityComparer <int[]> StateComparer, IEqualityComparer <int[]> ActionComparer, List <int[]> AvailableActions, int[] StartState, params object[] parameters)
            : base(StateComparer, ActionComparer, AvailableActions, StartState, parameters)
        {
            if (parameters.Length > 0)
            {
                fullPredictionMode = (bool)parameters[0];
            }

            stateComparer    = StateComparer;
            actionComparer   = ActionComparer;
            availableActions = AvailableActions;

            alloModel = new ModelBasedValue <int[], int[]>(StateComparer, ActionComparer, availableActions, StartState, true)
            {
                defaultQ = 10.3
            };
            egoModel = new ModelFreeValue <int[], int[]>(StateComparer, actionComparer, availableActions, StartState)
            {
                alpha = 0.9
            };

            network = new ActivationNetwork(new BipolarSigmoidFunction(2), 10, 10, 3);
            teacher = new ParallelResilientBackpropagationLearning(network);
        }