/// <summary>
        /// Choose an action.
        /// </summary>
        ///
        /// <param name="actionEstimates">Action estimates.</param>
        ///
        /// <returns>Returns selected action.</returns>
        ///
        /// <remarks>The method chooses an action depending on the provided estimates. The
        /// estimates can be any sort of estimate, which values usefulness of the action
        /// (expected summary reward, discounted reward, etc). The action is choosed from
        /// non-tabu actions only.</remarks>
        ///
        public int ChooseAction(double[] actionEstimates)
        {
            // get amount of non-tabu actions
            int nonTabuActions = actions;

            for (int i = 0; i < actions; i++)
            {
                if (tabuActions[i] != 0)
                {
                    nonTabuActions--;
                }
            }

            // allowed actions
            double[] allowedActionEstimates = new double[nonTabuActions];
            int[]    allowedActionMap       = new int[nonTabuActions];

            for (int i = 0, j = 0; i < actions; i++)
            {
                if (tabuActions[i] == 0)
                {
                    // allowed action
                    allowedActionEstimates[j] = actionEstimates[i];
                    allowedActionMap[j]       = i;
                    j++;
                }
                else
                {
                    // decrease tabu time of tabu action
                    tabuActions[i]--;
                }
            }

            return(allowedActionMap[basePolicy.ChooseAction(allowedActionEstimates)]);;
        }
 /// <summary>
 /// 从指定状态获取下一个动作。
 /// Get next action from the specified state.
 /// </summary>
 ///
 /// <param name="state">要获取操作的当前状态。
 /// Current state to get an action for.</param>
 ///
 /// <returns>
 /// 返回状态的动作
 /// Returns the action for the state.</returns>
 ///
 /// <remarks>
 /// 该方法根据当前返回一个动作
 /// The method returns an action according to current
 /// <see cref="ExplorationPolicy">exploration policy</see>.</remarks>
 ///
 public int GetAction(int state)
 {
     double[] qs = new double[actions];
     for (int i = 0; i < actions; i++)
     {
         qs[i] = (qvalues[state][i] + qvalues2[state][i]) / 2;
     }
     return(explorationPolicy.ChooseAction(qs));
 }
示例#3
0
        /// <summary>
        /// 从指定状态获取下一个动作。
        /// Get next action from the specified state.
        /// </summary>
        ///
        /// <param name="state">要获取操作的当前状态。
        /// Current state to get an action for.</param>
        ///
        /// <returns>
        /// 返回状态的动作
        /// Returns the action for the state.</returns>
        ///
        /// <remarks>
        /// 该方法根据当前返回一个动作
        /// The method returns an action according to current
        /// <see cref="ExplorationPolicy">exploration policy</see>.</remarks>
        ///
        public int GetAction(int state)
        {
            double[] nextActionEstimations = qvalues[state];
            double   maxNextExpectedReward = nextActionEstimations[0];

            for (int i = 1; i < actions; i++)
            {
                if (nextActionEstimations[i] > maxNextExpectedReward)
                {
                    maxNextExpectedReward = nextActionEstimations[i];
                }
            }

            return(explorationPolicy.ChooseAction(qvalues[state]));
        }
示例#4
0
 /// <summary>
 /// Get next action from the specified state.
 /// </summary>
 ///
 /// <param name="state">Current state to get an action for.</param>
 ///
 /// <returns>Returns the action for the state.</returns>
 ///
 /// <remarks>The method returns an action according to current
 /// <see cref="ExplorationPolicy">exploration policy</see>.</remarks>
 ///
 public int GetAction(int state)
 {
     return(explorationPolicy.ChooseAction(qvalues[state]));
 }
示例#5
0
 /// <summary>
 /// Get next action from the specified state.
 /// </summary>
 ///
 /// <param name="state">Current state to get an action for.</param>
 ///
 /// <returns>Returns the action for the state.</returns>
 ///
 /// <remarks>The method returns an action according to current
 /// <see cref="ExplorationPolicy">exploration policy</see>.</remarks>
 ///
 public int GetAction(int state)
 {
     return(explorationPolicy.ChooseAction(Q(state)));
 }
示例#6
0
 public int GetAction(int state)
 {
     return(_explorationPolicy.ChooseAction(_rewardTable[state]));
 }