Пример #1
0
        /// <summary>
        /// Initializes a new instance of the <see cref="Sarsa"/> class.
        /// </summary>
        ///
        /// <param name="states">Amount of possible states.</param>
        /// <param name="actions">Amount of possible actions.</param>
        /// <param name="explorationPolicy">Exploration policy.</param>
        /// <param name="randomize">Randomize action estimates or not.</param>
        ///
        /// <remarks>The <b>randomize</b> parameter specifies if initial action estimates should be randomized
        /// with small values or not. Randomization of action values may be useful, when greedy exploration
        /// policies are used. In this case randomization ensures that actions of the same type are not chosen always.</remarks>
        ///
        public Sarsa(int states, int actions, IExplorationPolicy explorationPolicy, bool randomize)
        {
            this.states            = states;
            this.actions           = actions;
            this.explorationPolicy = explorationPolicy;

            // create Q-array
            qvalues = new double[states][];
            for (int i = 0; i < states; i++)
            {
                qvalues[i] = new double[actions];
            }

            // do randomization
            if (randomize)
            {
                Random rand = new Random();

                for (int i = 0; i < states; i++)
                {
                    for (int j = 0; j < actions; j++)
                    {
                        qvalues[i][j] = rand.NextDouble() / 10;
                    }
                }
            }
        }
Пример #2
0
        public Sarsa(int stateCount, int actionCount, IExplorationPolicy explorationPolicy, double learningRate = 0.1,
                     double discountFactor = 0.9, bool initializeRandom = false)
        {
            StateCount        = stateCount;
            ActionCount       = actionCount;
            ExplorationPolicy = explorationPolicy;
            LearningRate      = learningRate;
            DiscountFactor    = discountFactor;

            _q = new double[stateCount][];
            for (var i = 0; i < stateCount; i++)
            {
                _q[i] = new double[actionCount];
            }

            if (initializeRandom)
            {
                var random = new Random();
                for (var i = 0; i < stateCount; i++)
                {
                    for (var j = 0; j < actionCount; j++)
                    {
                        _q[i][j] = random.NextDouble();
                    }
                }
            }
        }
Пример #3
0
        /// <summary>
        /// Initializes a new instance of the <see cref="SARSA"/> class.
        /// </summary>
        /// <param name="numStates">The max number of states.</param>
        /// <param name="numActions">The max number of actions.</param>
        /// <param name="explorationPolicy">The exploration policy.</param>
        /// <param name="randomize">if set to <c>true</c> [randomize] the created quality array.</param>
        public SARSA(long numStates, int numActions, IExplorationPolicy explorationPolicy, bool randomize = true)
        {
            NumStates         = numStates;
            NumActions        = numActions;
            ExplorationPolicy = explorationPolicy;

            //quality array
            QualityValues = new double[NumStates][];
            for (long i = 0; i < NumStates; i++)
            {
                QualityValues[i] = new double[NumActions];
            }

            // do randomization on the initial values
            if (randomize)
            {
                Random rand = new Random();

                for (long i = 0; i < NumStates; i++)
                {
                    for (long j = 0; j < NumActions; j++)
                    {
                        QualityValues[i][j] = rand.NextDouble() / 10;
                    }
                }
            }
        }
Пример #4
0
        public DynaQ(int stateCount, int actionCount, IExplorationPolicy explorationPolicy, int n = 5,
                     double learningRate = 0.1, double discountFactor = 0.9, bool initializeRandom = false)
        {
            StateCount        = stateCount;
            ActionCount       = actionCount;
            N                 = n;
            ExplorationPolicy = explorationPolicy;
            LearningRate      = learningRate;
            DiscountFactor    = discountFactor;

            _visited     = new Dictionary <int, HashSet <int> >();
            _random      = new Random();
            _q           = new double[stateCount][];
            _finalStates = new int[stateCount][];
            _rewards     = new double[stateCount][];
            for (var i = 0; i < stateCount; i++)
            {
                _q[i]           = new double[actionCount];
                _finalStates[i] = new int[actionCount];
                _rewards[i]     = new double[actionCount];
            }

            if (initializeRandom)
            {
                var random = new Random();
                for (var i = 0; i < stateCount; i++)
                {
                    for (var j = 0; j < actionCount; j++)
                    {
                        _q[i][j] = random.NextDouble();
                    }
                }
            }
        }
        /// <summary>
        /// Initializes a new instance of the <see cref="TabuSearchExploration"/> class.
        /// </summary>
        /// 
        /// <param name="actions">Total actions count.</param>
        /// <param name="basePolicy">Base exploration policy.</param>
        /// 
        public TabuSearchExploration( int actions, IExplorationPolicy basePolicy )
        {
            this.actions = actions;
            this.basePolicy = basePolicy;

            // create tabu list
            tabuActions = new int[actions];
        }
Пример #6
0
        /// <summary>
        /// Initializes a new instance of the <see cref="TabuSearchExploration"/> class.
        /// </summary>
        ///
        /// <param name="actions">Total actions count.</param>
        /// <param name="basePolicy">Base exploration policy.</param>
        ///
        public TabuSearchExploration(int actions, IExplorationPolicy basePolicy)
        {
            this.actions    = actions;
            this.basePolicy = basePolicy;

            // create tabu list
            tabuActions = new int[actions];
        }
Пример #7
0
        /// <summary>
        /// Initializes a new instance of the <see cref="InfiniteQLearning"/> class.
        /// </summary>
        ///
        /// <param name="states">Amount of possible states.</param>
        /// <param name="actions">Amount of possible actions.</param>
        /// <param name="explorationPolicy">Exploration policy.</param>
        ///
        /// <remarks>The <b>randomize</b> parameter specifies if initial action estimates should be randomized
        /// with small values or not. Randomization of action values may be useful, when greedy exploration
        /// policies are used. In this case randomization ensures that actions of the same type are not chosen always.</remarks>
        ///
        public InfiniteQLearning(int states, int actions, IExplorationPolicy explorationPolicy)
        {
            this.states            = states;
            this.actions           = actions;
            this.explorationPolicy = explorationPolicy;

            // create Q-array
            qvalues = new Dictionary <BigInteger, double[]>();
        }
Пример #8
0
    public QLearning_FDGS(int actions, int goalX, int goalY, int[,] map, IExplorationPolicy explorationPolicy)
    {
        _mapWidth  = map.GetLength(1);
        _mapHeight = map.GetLength(0);

        _explorationPolicy = explorationPolicy;

        _rewardTable    = new double[_mapWidth * _mapHeight][];
        _qLearningTable = new double[_mapWidth * _mapHeight][];

        for (int i = 0; i < _qLearningTable.Length; i++)
        {
            _rewardTable[i]    = new double[actions];
            _qLearningTable[i] = new double[actions];
        }


        InitRewardMatrix();
        InitQLearningMatrix();

        for (int i = 1; i < (_mapWidth - 1); i++)
        {
            for (int j = 1; j < (_mapHeight - 1); j++)
            {
                var state = GetStateFromCoordinates(i, j);

                if (state == 67)
                {
                    Debug.Log("check state values...");
                }

                // set reward according to map (walls, no walls)
                _rewardTable[state][0] = (map[j + 1, i] == 0) && (_rewardTable[state][0] != 1) ? 0 : _rewardTable[state][0];
                _rewardTable[state][1] = (map[j, i + 1] == 0) && (_rewardTable[state][1] != 1) ? 0 : _rewardTable[state][1];
                _rewardTable[state][2] = (map[j - 1, i] == 0) && (_rewardTable[state][2] != 1) ? 0 : _rewardTable[state][2];
                _rewardTable[state][3] = (map[j, i - 1] == 0) && (_rewardTable[state][3] != 1) ? 0 : _rewardTable[state][3];

                // check for goal -> every state before reaching the goal will be rewarded
                if ((i == goalX) && (j == goalY))
                {
                    // below goal and moving up
                    _rewardTable[GetStateFromCoordinates(i, j - 1)][0] = map[j - 1, i] == -1 ? -1 : 1;
                    // right from goal and moving left
                    _rewardTable[GetStateFromCoordinates(i + 1, j)][3] = map[j, i + 1] == -1 ? -1 : 1;
                    // above goal and moving down
                    _rewardTable[GetStateFromCoordinates(i, j + 1)][2] = map[j + 1, i] == -1 ? -1 : 1;
                    // left from goal and moving right
                    _rewardTable[GetStateFromCoordinates(i - 1, j)][1] = map[j, i - 1] == -1 ? -1 : 1;
                }
            }
        }
    }
        /// <summary>
        /// Initializes a new instance of the <see cref="QLearning"/> class.
        /// </summary>
        ///
        /// <param name="states">可能状态的数量 Amount of possible states.</param>
        /// <param name="actions">可能动作的数量 Amount of possible actions.</param>
        /// <param name="explorationPolicy">探索策略 Exploration policy.</param>
        /// <param name="randomize">是否随机化动作估计。 Randomize action estimates or not.</param>
        ///
        /// <remarks>
        /// randomize参数指定初始动作估计值是否应该用小值随机化。 当使用贪婪勘探策略时,动作值的随机化可能是有用的。 在这种情况下,随机化确保不总是选择相同类型的动作。
        /// 随机参数指定初始动作估计应该随机值较小或不。随机值的动作值可能是有用的,当贪婪的勘探政策。在这种情况下随机化确保相同类型的操作不总是被选择的。
        /// The <b>randomize</b> parameter specifies if initial action estimates should be randomized
        /// with small values or not. Randomization of action values may be useful, when greedy exploration
        /// policies are used. In this case randomization ensures that actions of the same type are not chosen always.</remarks>
        ///
        public DoubleQLearning(int states, int actions, IExplorationPolicy explorationPolicy, bool randomize)
        {
            this.states            = states;
            this.actions           = actions;
            this.explorationPolicy = explorationPolicy;

            // create Q-array
            qvalues = new double[states][];
            for (int i = 0; i < states; i++)
            {
                qvalues[i] = new double[actions];
            }
            qvalues2 = new double[states][];
            for (int i = 0; i < states; i++)
            {
                qvalues2[i] = new double[actions];
            }

            // do randomization
            if (randomize)
            {
                Random rand = new Random();

                for (int i = 0; i < states; i++)
                {
                    for (int j = 0; j < actions; j++)
                    {
                        var q1 = rand.NextDouble() / 10;
                        var q2 = rand.NextDouble() / 10;
                        if (q1 > q2)
                        {
                            qvalues[i][j]  = q1;
                            qvalues2[i][j] = q2;
                        }
                        else
                        {
                            qvalues[i][j]  = q2;
                            qvalues2[i][j] = q1;
                        }
                    }
                }
            }
        }
Пример #10
0
 /// <summary>
 /// Initializes a new instance of the <see cref="Sarsa"/> class.
 /// </summary>
 ///
 /// <param name="states">Amount of possible states.</param>
 /// <param name="actions">Amount of possible actions.</param>
 /// <param name="explorationPolicy">Exploration policy.</param>
 ///
 /// <remarks>Action estimates are randomized in the case of this constructor
 /// is used.</remarks>
 ///
 public Sarsa(int states, int actions, IExplorationPolicy explorationPolicy) :
     this(states, actions, explorationPolicy, true)
 {
 }
Пример #11
0
        /// <summary>
        /// Initializes a new instance of the <see cref="QLearning"/> class.
        /// </summary>
        /// 
        /// <param name="states">Amount of possible states.</param>
        /// <param name="actions">Amount of possible actions.</param>
        /// <param name="explorationPolicy">Exploration policy.</param>
        /// <param name="randomize">Randomize action estimates or not.</param>
        /// 
        /// <remarks>The <b>randomize</b> parameter specifies if initial action estimates should be randomized
        /// with small values or not. Randomization of action values may be useful, when greedy exploration
        /// policies are used. In this case randomization ensures that actions of the same type are not chosen always.</remarks>
        /// 
        public QLearning(int states, int actions, IExplorationPolicy explorationPolicy, bool randomize)
        {
            this.states = states;
            this.actions = actions;
            this.explorationPolicy = explorationPolicy;

            // create Q-array
            qvalues = new double[states][];
            for (int i = 0; i < states; i++)
            {
                qvalues[i] = new double[actions];
            }

            // do randomization
            if (randomize)
            {
                Random rand = new Random();

                for (int i = 0; i < states; i++)
                {
                    for (int j = 0; j < actions; j++)
                    {
                        qvalues[i][j] = rand.NextDouble() / 10;
                    }
                }
            }
        }
Пример #12
0
 /// <summary>
 /// Initializes a new instance of the <see cref="QLearning"/> class.
 /// </summary>
 /// 
 /// <param name="states">Amount of possible states.</param>
 /// <param name="actions">Amount of possible actions.</param>
 /// <param name="explorationPolicy">Exploration policy.</param>
 /// 
 /// <remarks>Action estimates are randomized in the case of this constructor
 /// is used.</remarks>
 /// 
 public QLearning(int states, int actions, IExplorationPolicy explorationPolicy) :
     this(states, actions, explorationPolicy, true)
 {
 }
Пример #13
0
 /// <summary>
 /// Initializes a new instance of the <see cref="QLearning"/> class.
 /// </summary>
 ///
 /// <param name="states">Amount of possible states.</param>
 /// <param name="actions">Amount of possible actions.</param>
 /// <param name="explorationPolicy">Exploration policy.</param>
 ///
 /// <remarks>Action estimates are randomized in the case of this constructor
 /// is used.</remarks>
 ///
 public QLearning(int states, int actions, IExplorationPolicy explorationPolicy) :
     this(states, actions, explorationPolicy, true)
 {
 }
Пример #14
0
 /// <summary>
 /// Initializes a new instance of the <see cref="Sarsa"/> class.
 /// </summary>
 /// 
 /// <param name="states">Amount of possible states.</param>
 /// <param name="actions">Amount of possible actions.</param>
 /// <param name="explorationPolicy">Exploration policy.</param>
 /// 
 /// <remarks>Action estimates are randomized in the case of this constructor
 /// is used.</remarks>
 /// 
 public Sarsa( int states, int actions, IExplorationPolicy explorationPolicy ) :
     this( states, actions, explorationPolicy, true )
 {
 }
Пример #15
0
 public Agent(IExplorationPolicy ExplorationPolicy)
 {
     this.ExplorationPolicy = ExplorationPolicy;
 }