예제 #1
0
        public LinearEgoAlloValue(IEqualityComparer <int[]> StateComparer, IEqualityComparer <int[]> ActionComparer, List <int[]> AvailableActions, int[] StartState, params object[] parameters)
            : base(StateComparer, ActionComparer, AvailableActions, StartState, parameters)
        {
            if (parameters.Length > 0)
            {
                fullPredictionMode = (bool)parameters[0];
            }
            if (parameters.Length > 1)
            {
                updateTerminationStepCount = (int)parameters[1];
            }
            if (parameters.Length > 2)
            {
                if ((bool)parameters[2])
                {
                    alloLearner = new ModelBasedValue <int[], int[]>(StateComparer, ActionComparer, AvailableActions, null, true);
                }
                else
                {
                    alloLearner = new ModelFreeValue <int[], int[]>(StateComparer, ActionComparer, AvailableActions, null, true);
                }
            }


            stateComparer    = StateComparer;
            actionComparer   = ActionComparer;
            availableActions = AvailableActions;

            //alloModel = new ModelFreeValue<int[], int[]>(StateComparer, ActionComparer, availableActions, null, true);
            //{
            //    defaultQ = 10.3
            //};
            egoLearner = new ModelFreeValue <int[], int[]>(StateComparer, actionComparer, availableActions, StartState)
            {
                alpha = 0.9
            };

            egoPredictionModels = new ModelBasedValue <int[], int[]> [3];
            for (int i = 0; i < egoPredictionModels.Length; i++)
            {
                egoPredictionModels[i] = new ModelBasedValue <int[], int[]>(StateComparer, ActionComparer, availableActions, StartState)
                {
                    gamma = 0
                };
            }

            visitedStates = new Dictionary <int[], int> [4];
            for (int i = 0; i < availableActions.Count; i++)
            {
                visitedStates[i] = new Dictionary <int[], int>(StateComparer);
            }
        }
예제 #2
0
        public EgoAlloValue(IEqualityComparer <int[]> StateComparer, IEqualityComparer <int[]> ActionComparer, List <int[]> AvailableActions, int[] StartState, params object[] parameters)
            : base(StateComparer, ActionComparer, AvailableActions, StartState, parameters)
        {
            if (parameters.Length > 0)
            {
                fullPredictionMode = (bool)parameters[0];
            }

            stateComparer    = StateComparer;
            actionComparer   = ActionComparer;
            availableActions = AvailableActions;

            alloModel = new ModelBasedValue <int[], int[]>(StateComparer, ActionComparer, availableActions, StartState, true)
            {
                defaultQ = 10.3
            };
            egoModel = new ModelFreeValue <int[], int[]>(StateComparer, actionComparer, availableActions, StartState)
            {
                alpha = 0.9
            };

            network = new ActivationNetwork(new BipolarSigmoidFunction(2), 10, 10, 3);
            teacher = new ParallelResilientBackpropagationLearning(network);
        }