/// <summary> /// Initializes a new instance of the <see cref="Sarsa"/> class. /// </summary> /// /// <param name="states">Amount of possible states.</param> /// <param name="actions">Amount of possible actions.</param> /// <param name="explorationPolicy">Exploration policy.</param> /// <param name="randomize">Randomize action estimates or not.</param> /// /// <remarks>The <b>randomize</b> parameter specifies if initial action estimates should be randomized /// with small values or not. Randomization of action values may be useful, when greedy exploration /// policies are used. In this case randomization ensures that actions of the same type are not chosen always.</remarks> /// public Sarsa(int states, int actions, IExplorationPolicy explorationPolicy, bool randomize) { this.states = states; this.actions = actions; this.explorationPolicy = explorationPolicy; // create Q-array qvalues = new double[states][]; for (int i = 0; i < states; i++) { qvalues[i] = new double[actions]; } // do randomization if (randomize) { Random rand = new Random(); for (int i = 0; i < states; i++) { for (int j = 0; j < actions; j++) { qvalues[i][j] = rand.NextDouble() / 10; } } } }
public Sarsa(int stateCount, int actionCount, IExplorationPolicy explorationPolicy, double learningRate = 0.1, double discountFactor = 0.9, bool initializeRandom = false) { StateCount = stateCount; ActionCount = actionCount; ExplorationPolicy = explorationPolicy; LearningRate = learningRate; DiscountFactor = discountFactor; _q = new double[stateCount][]; for (var i = 0; i < stateCount; i++) { _q[i] = new double[actionCount]; } if (initializeRandom) { var random = new Random(); for (var i = 0; i < stateCount; i++) { for (var j = 0; j < actionCount; j++) { _q[i][j] = random.NextDouble(); } } } }
/// <summary> /// Initializes a new instance of the <see cref="SARSA"/> class. /// </summary> /// <param name="numStates">The max number of states.</param> /// <param name="numActions">The max number of actions.</param> /// <param name="explorationPolicy">The exploration policy.</param> /// <param name="randomize">if set to <c>true</c> [randomize] the created quality array.</param> public SARSA(long numStates, int numActions, IExplorationPolicy explorationPolicy, bool randomize = true) { NumStates = numStates; NumActions = numActions; ExplorationPolicy = explorationPolicy; //quality array QualityValues = new double[NumStates][]; for (long i = 0; i < NumStates; i++) { QualityValues[i] = new double[NumActions]; } // do randomization on the initial values if (randomize) { Random rand = new Random(); for (long i = 0; i < NumStates; i++) { for (long j = 0; j < NumActions; j++) { QualityValues[i][j] = rand.NextDouble() / 10; } } } }
public DynaQ(int stateCount, int actionCount, IExplorationPolicy explorationPolicy, int n = 5, double learningRate = 0.1, double discountFactor = 0.9, bool initializeRandom = false) { StateCount = stateCount; ActionCount = actionCount; N = n; ExplorationPolicy = explorationPolicy; LearningRate = learningRate; DiscountFactor = discountFactor; _visited = new Dictionary <int, HashSet <int> >(); _random = new Random(); _q = new double[stateCount][]; _finalStates = new int[stateCount][]; _rewards = new double[stateCount][]; for (var i = 0; i < stateCount; i++) { _q[i] = new double[actionCount]; _finalStates[i] = new int[actionCount]; _rewards[i] = new double[actionCount]; } if (initializeRandom) { var random = new Random(); for (var i = 0; i < stateCount; i++) { for (var j = 0; j < actionCount; j++) { _q[i][j] = random.NextDouble(); } } } }
/// <summary> /// Initializes a new instance of the <see cref="TabuSearchExploration"/> class. /// </summary> /// /// <param name="actions">Total actions count.</param> /// <param name="basePolicy">Base exploration policy.</param> /// public TabuSearchExploration( int actions, IExplorationPolicy basePolicy ) { this.actions = actions; this.basePolicy = basePolicy; // create tabu list tabuActions = new int[actions]; }
/// <summary> /// Initializes a new instance of the <see cref="TabuSearchExploration"/> class. /// </summary> /// /// <param name="actions">Total actions count.</param> /// <param name="basePolicy">Base exploration policy.</param> /// public TabuSearchExploration(int actions, IExplorationPolicy basePolicy) { this.actions = actions; this.basePolicy = basePolicy; // create tabu list tabuActions = new int[actions]; }
/// <summary> /// Initializes a new instance of the <see cref="InfiniteQLearning"/> class. /// </summary> /// /// <param name="states">Amount of possible states.</param> /// <param name="actions">Amount of possible actions.</param> /// <param name="explorationPolicy">Exploration policy.</param> /// /// <remarks>The <b>randomize</b> parameter specifies if initial action estimates should be randomized /// with small values or not. Randomization of action values may be useful, when greedy exploration /// policies are used. In this case randomization ensures that actions of the same type are not chosen always.</remarks> /// public InfiniteQLearning(int states, int actions, IExplorationPolicy explorationPolicy) { this.states = states; this.actions = actions; this.explorationPolicy = explorationPolicy; // create Q-array qvalues = new Dictionary <BigInteger, double[]>(); }
public QLearning_FDGS(int actions, int goalX, int goalY, int[,] map, IExplorationPolicy explorationPolicy) { _mapWidth = map.GetLength(1); _mapHeight = map.GetLength(0); _explorationPolicy = explorationPolicy; _rewardTable = new double[_mapWidth * _mapHeight][]; _qLearningTable = new double[_mapWidth * _mapHeight][]; for (int i = 0; i < _qLearningTable.Length; i++) { _rewardTable[i] = new double[actions]; _qLearningTable[i] = new double[actions]; } InitRewardMatrix(); InitQLearningMatrix(); for (int i = 1; i < (_mapWidth - 1); i++) { for (int j = 1; j < (_mapHeight - 1); j++) { var state = GetStateFromCoordinates(i, j); if (state == 67) { Debug.Log("check state values..."); } // set reward according to map (walls, no walls) _rewardTable[state][0] = (map[j + 1, i] == 0) && (_rewardTable[state][0] != 1) ? 0 : _rewardTable[state][0]; _rewardTable[state][1] = (map[j, i + 1] == 0) && (_rewardTable[state][1] != 1) ? 0 : _rewardTable[state][1]; _rewardTable[state][2] = (map[j - 1, i] == 0) && (_rewardTable[state][2] != 1) ? 0 : _rewardTable[state][2]; _rewardTable[state][3] = (map[j, i - 1] == 0) && (_rewardTable[state][3] != 1) ? 0 : _rewardTable[state][3]; // check for goal -> every state before reaching the goal will be rewarded if ((i == goalX) && (j == goalY)) { // below goal and moving up _rewardTable[GetStateFromCoordinates(i, j - 1)][0] = map[j - 1, i] == -1 ? -1 : 1; // right from goal and moving left _rewardTable[GetStateFromCoordinates(i + 1, j)][3] = map[j, i + 1] == -1 ? -1 : 1; // above goal and moving down _rewardTable[GetStateFromCoordinates(i, j + 1)][2] = map[j + 1, i] == -1 ? -1 : 1; // left from goal and moving right _rewardTable[GetStateFromCoordinates(i - 1, j)][1] = map[j, i - 1] == -1 ? -1 : 1; } } } }
/// <summary> /// Initializes a new instance of the <see cref="QLearning"/> class. /// </summary> /// /// <param name="states">可能状态的数量 Amount of possible states.</param> /// <param name="actions">可能动作的数量 Amount of possible actions.</param> /// <param name="explorationPolicy">探索策略 Exploration policy.</param> /// <param name="randomize">是否随机化动作估计。 Randomize action estimates or not.</param> /// /// <remarks> /// randomize参数指定初始动作估计值是否应该用小值随机化。 当使用贪婪勘探策略时,动作值的随机化可能是有用的。 在这种情况下,随机化确保不总是选择相同类型的动作。 /// 随机参数指定初始动作估计应该随机值较小或不。随机值的动作值可能是有用的,当贪婪的勘探政策。在这种情况下随机化确保相同类型的操作不总是被选择的。 /// The <b>randomize</b> parameter specifies if initial action estimates should be randomized /// with small values or not. Randomization of action values may be useful, when greedy exploration /// policies are used. In this case randomization ensures that actions of the same type are not chosen always.</remarks> /// public DoubleQLearning(int states, int actions, IExplorationPolicy explorationPolicy, bool randomize) { this.states = states; this.actions = actions; this.explorationPolicy = explorationPolicy; // create Q-array qvalues = new double[states][]; for (int i = 0; i < states; i++) { qvalues[i] = new double[actions]; } qvalues2 = new double[states][]; for (int i = 0; i < states; i++) { qvalues2[i] = new double[actions]; } // do randomization if (randomize) { Random rand = new Random(); for (int i = 0; i < states; i++) { for (int j = 0; j < actions; j++) { var q1 = rand.NextDouble() / 10; var q2 = rand.NextDouble() / 10; if (q1 > q2) { qvalues[i][j] = q1; qvalues2[i][j] = q2; } else { qvalues[i][j] = q2; qvalues2[i][j] = q1; } } } } }
/// <summary> /// Initializes a new instance of the <see cref="Sarsa"/> class. /// </summary> /// /// <param name="states">Amount of possible states.</param> /// <param name="actions">Amount of possible actions.</param> /// <param name="explorationPolicy">Exploration policy.</param> /// /// <remarks>Action estimates are randomized in the case of this constructor /// is used.</remarks> /// public Sarsa(int states, int actions, IExplorationPolicy explorationPolicy) : this(states, actions, explorationPolicy, true) { }
/// <summary> /// Initializes a new instance of the <see cref="QLearning"/> class. /// </summary> /// /// <param name="states">Amount of possible states.</param> /// <param name="actions">Amount of possible actions.</param> /// <param name="explorationPolicy">Exploration policy.</param> /// <param name="randomize">Randomize action estimates or not.</param> /// /// <remarks>The <b>randomize</b> parameter specifies if initial action estimates should be randomized /// with small values or not. Randomization of action values may be useful, when greedy exploration /// policies are used. In this case randomization ensures that actions of the same type are not chosen always.</remarks> /// public QLearning(int states, int actions, IExplorationPolicy explorationPolicy, bool randomize) { this.states = states; this.actions = actions; this.explorationPolicy = explorationPolicy; // create Q-array qvalues = new double[states][]; for (int i = 0; i < states; i++) { qvalues[i] = new double[actions]; } // do randomization if (randomize) { Random rand = new Random(); for (int i = 0; i < states; i++) { for (int j = 0; j < actions; j++) { qvalues[i][j] = rand.NextDouble() / 10; } } } }
/// <summary> /// Initializes a new instance of the <see cref="QLearning"/> class. /// </summary> /// /// <param name="states">Amount of possible states.</param> /// <param name="actions">Amount of possible actions.</param> /// <param name="explorationPolicy">Exploration policy.</param> /// /// <remarks>Action estimates are randomized in the case of this constructor /// is used.</remarks> /// public QLearning(int states, int actions, IExplorationPolicy explorationPolicy) : this(states, actions, explorationPolicy, true) { }
/// <summary> /// Initializes a new instance of the <see cref="Sarsa"/> class. /// </summary> /// /// <param name="states">Amount of possible states.</param> /// <param name="actions">Amount of possible actions.</param> /// <param name="explorationPolicy">Exploration policy.</param> /// /// <remarks>Action estimates are randomized in the case of this constructor /// is used.</remarks> /// public Sarsa( int states, int actions, IExplorationPolicy explorationPolicy ) : this( states, actions, explorationPolicy, true ) { }
public Agent(IExplorationPolicy ExplorationPolicy) { this.ExplorationPolicy = ExplorationPolicy; }