/// <summary> /// Choose an action. /// </summary> /// /// <param name="actionEstimates">Action estimates.</param> /// /// <returns>Returns selected action.</returns> /// /// <remarks>The method chooses an action depending on the provided estimates. The /// estimates can be any sort of estimate, which values usefulness of the action /// (expected summary reward, discounted reward, etc). The action is choosed from /// non-tabu actions only.</remarks> /// public int ChooseAction(double[] actionEstimates) { // get amount of non-tabu actions int nonTabuActions = actions; for (int i = 0; i < actions; i++) { if (tabuActions[i] != 0) { nonTabuActions--; } } // allowed actions double[] allowedActionEstimates = new double[nonTabuActions]; int[] allowedActionMap = new int[nonTabuActions]; for (int i = 0, j = 0; i < actions; i++) { if (tabuActions[i] == 0) { // allowed action allowedActionEstimates[j] = actionEstimates[i]; allowedActionMap[j] = i; j++; } else { // decrease tabu time of tabu action tabuActions[i]--; } } return(allowedActionMap[basePolicy.ChooseAction(allowedActionEstimates)]);; }
/// <summary> /// 从指定状态获取下一个动作。 /// Get next action from the specified state. /// </summary> /// /// <param name="state">要获取操作的当前状态。 /// Current state to get an action for.</param> /// /// <returns> /// 返回状态的动作 /// Returns the action for the state.</returns> /// /// <remarks> /// 该方法根据当前返回一个动作 /// The method returns an action according to current /// <see cref="ExplorationPolicy">exploration policy</see>.</remarks> /// public int GetAction(int state) { double[] qs = new double[actions]; for (int i = 0; i < actions; i++) { qs[i] = (qvalues[state][i] + qvalues2[state][i]) / 2; } return(explorationPolicy.ChooseAction(qs)); }
/// <summary> /// 从指定状态获取下一个动作。 /// Get next action from the specified state. /// </summary> /// /// <param name="state">要获取操作的当前状态。 /// Current state to get an action for.</param> /// /// <returns> /// 返回状态的动作 /// Returns the action for the state.</returns> /// /// <remarks> /// 该方法根据当前返回一个动作 /// The method returns an action according to current /// <see cref="ExplorationPolicy">exploration policy</see>.</remarks> /// public int GetAction(int state) { double[] nextActionEstimations = qvalues[state]; double maxNextExpectedReward = nextActionEstimations[0]; for (int i = 1; i < actions; i++) { if (nextActionEstimations[i] > maxNextExpectedReward) { maxNextExpectedReward = nextActionEstimations[i]; } } return(explorationPolicy.ChooseAction(qvalues[state])); }
/// <summary> /// Get next action from the specified state. /// </summary> /// /// <param name="state">Current state to get an action for.</param> /// /// <returns>Returns the action for the state.</returns> /// /// <remarks>The method returns an action according to current /// <see cref="ExplorationPolicy">exploration policy</see>.</remarks> /// public int GetAction(int state) { return(explorationPolicy.ChooseAction(qvalues[state])); }
/// <summary> /// Get next action from the specified state. /// </summary> /// /// <param name="state">Current state to get an action for.</param> /// /// <returns>Returns the action for the state.</returns> /// /// <remarks>The method returns an action according to current /// <see cref="ExplorationPolicy">exploration policy</see>.</remarks> /// public int GetAction(int state) { return(explorationPolicy.ChooseAction(Q(state))); }
public int GetAction(int state) { return(_explorationPolicy.ChooseAction(_rewardTable[state])); }