C# (CSharp) ObjectState.getState示例

    /*
     * returns the maximum Q value From the q table according to the next state
     */
    float maxQ(ObjectState nextState)
    {
        float maxValue = float.MinValue;

        bool[]  possibleActions = controller.getAvailableActions(nextState.getState());
        float[] Qactions        = (float[])Q[nextState.getId()];
        for (int i = 0; i < possibleActions.Length; i++)
        {
            if (possibleActions[i])
            {
                float value = Qactions[i];
                if (value > maxValue)
                {
                    maxValue = value;
                }
            }
        }

        /* bool[] possibleActions = controller.getAvailableActions(nextState.getState());
         * float[] Qactions = (float[])Q[nextState.getId()];
         * if (possibleActions.Length > 0) {
         *   maxValue=
         * }*/

        return(maxValue);
    } // end of maxQ

示例#2

显示文件

文件： QLearning.cs 项目： Stounis/project3d

    /*
     * returns the action with the highest q value
     */
    /*int bestAction(ObjectState s) {
     *
     *  updateStates();
     *
     *  int action = 0;
     *  float maxValue = float.MinValue;
     *
     *  bool[] possibleActions = controller.getAvailableActions(s.getState());
     *  float[] Qactions = (float[])Q[s.getId()];
     *  for (int i = 0; i < possibleActions.Length; i++) {
     *      if (possibleActions[i]) {
     *          float value = Qactions[i];
     *          if (value > maxValue) { // > or >=
     *              maxValue = value;
     *              action = i;
     *          }
     *      }
     *  }
     *  return action;
     * } // end of bestAction */

    /*
     * best action test
     */
    public int bestAction(ObjectState s)
    {
        int   action   = 0;
        float maxValue = float.MinValue;

        bool[]     possibleActions = controller.getAvailableActions(s.getState());
        List <int> actions         = new List <int>();

        for (int i = 0; i < possibleActions.Length; i++)
        {
            if (possibleActions[i])
            {
                actions.Add(i);
            }
        }
        float[] Qactions = (float[])Q[s.getId()];

        // add an element of randomness for exploration
        if (Random.Range(0.0f, 1.0f) < epsilon)
        {
            int random = Random.Range(0, actions.Count);
            action = actions[random];
        }
        else
        {
            // get a list of all the max q.
            // in case that the max q is shared with more than one action
            List <int> bestActions = new List <int>();
            foreach (int a in actions)
            {
                float tempValue = Qactions[a];
                if (tempValue > maxValue)
                {
                    maxValue = tempValue;
                    bestActions.Clear();
                    bestActions.Add(a);
                }
                else if (tempValue == maxValue)
                {
                    bestActions.Add(a);
                }
            }

            // choose a random action from the best ones
            if (bestActions.Count > 1)
            {
                int random = Random.Range(0, bestActions.Count);
                action = bestActions[random];
            }
            else if (bestActions.Count == 1)
            {
                action = bestActions[0];
            }
        }

        return(action);
    }// end of ba

示例#3

显示文件

文件： QLearning.cs 项目： Stounis/project3d

    } // end of Constructor

    /*
     * updates the q table according to the selected action and the current state
     * rl = reinforcement learning
     */
    public void rl(ObjectState oldState, int action, ObjectState newState)
    {
        updateStates(); // add state in the array if new state

        // Q(s,a) = Q(s,a) + a * (r(s,a) + g * maxQ(s') - Q(s,a))
        float q    = getQvalue(oldState.getId(), action); //float q = Q[state,action];
        float maxq = maxQ(newState);                      //float maxQ = maxQ (state);
        int   r    = controller.reward(newState.getState(), action);

        //float value = q + alpha * (r + gamma * maxq - q);//float value = q + alpha * (r + gamma * maxQ - q);
        float value = q + alpha * (r + gamma * 1 - q);

        Debug.Log("q: " + q + " maxq: " + maxq);
        Debug.Log("reward: " + r + " value: " + value);
        updateQvalue(oldState.getId(), action, value); // q[oldstate,action] = value

        addMemory(oldState.getId(), action);
        maintainMemory();

        totalReward += r;
        // repeat q process for memory

        /*if (memory.Count > 1 && (Mathf.Abs(r)>10)) {
         *  for (int i = memory.Count - 2; i > 0; i--) {
         *      if (r - memoryRewardRed > 0) {
         *          r -= memoryRewardRed;
         *      }
         *      else if (r + memoryRewardRed < 0) {
         *          r += memoryRewardRed;
         *      }
         *      else {
         *          r = 0;
         *          return;
         *      }
         *      //r = 0; //test
         *
         *      int[] newestMem = (int[])memory[i + 1]; // new memory
         *      int newMemoryState = newestMem[0]; // new state
         *
         *      int[] oldestMem = (int[])memory[i]; // old memory
         *      int oldMemoryState = oldestMem[0]; // old state
         *      int oldMemoryAction = oldestMem[1]; // old action
         *
         *      float qMemory = getQvalue(oldMemoryState, oldMemoryAction); //float q = Q[state,action];
         *      float maxQMemory = maxQ(newMemoryState); //float maxQ = maxQ (state);
         *
         *      float memoryValue = qMemory + alpha * (r + gamma * maxQMemory - qMemory);//float value = q + alpha * (r + gamma * maxQ - q);
         *
         *      updateQvalue(oldMemoryState, oldMemoryAction, memoryValue); // q[oldstate,action] = value
         *  }
         * } */
    } // end of rl

示例#4

显示文件

文件： QLearning.cs 项目： Stounis/project3d

    }// end of ba

    int randomAction(ObjectState s)
    {
        updateStates();
        int state = 0;

        bool[] possibleActions = controller.getAvailableActions(s.getState());

        for (int i = 0; i < possibleActions.Length; i++)
        {
            int random = Random.Range(0, possibleActions.Length - 1);
            if (possibleActions[random])
            {
                state = random;
                break;
            }
        }

        return(state);
    } // end of randomaction