Ejemplo n.º 1
0
        public ActionPool()
        {
            actionDictionary = new Dictionary <string, Type>();

            //initialise the action pool with the types of actions that will be used
            System.Reflection.Assembly[] assemblies = AppDomain.CurrentDomain.GetAssemblies();
            for (int i = 0; i < assemblies.Length; i++)
            {
                //create an array for all of the actions that the user has implemented
                //currently only uses BTAction but could be implemented to work with all task types in future
                Type[] actions = assemblies[i].GetTypes().Where(t => t.IsSubclassOf(typeof(BTAction))).ToArray();
                for (int j = 0; j < actions.Length; j++)
                {
                    //create a temporary instance of the action class so that its name can be retrieved
                    BTTask task = (BTTask)assemblies[i].CreateInstance(actions[j].Name);
                    //only let this action be added if it is flagged as 'poolable'
                    //custom flag to prevent system breaking additions
                    if (task.CheckIfPoolable())
                    {
                        if (!actionDictionary.ContainsKey(task.GetName()))
                        {
                            actionDictionary.Add(task.GetName(), actions[j]);
                        }
                    }
                }
            }
        }
Ejemplo n.º 2
0
 private bool CheckIfRejected(BTTask task)
 {
     if (learner.GetPreviousState().CheckIfRejected(task.GetName()))
     {
         return(true);
     }
     return(false);
 }
Ejemplo n.º 3
0
 //checks if the task has been rejected from the current state
 private bool CheckIfRejected(BTTask task)
 {
     if (states[CurrentStateName].CheckIfRejected(task.GetName()))
     {
         return(true);
     }
     return(false);
 }
 //adds an action as a child to this node
 public void AddTask(BTTask newAction)
 {
     for (int i = 0; i < children.Count; i++)
     {
         if (children[i].GetName() == newAction.GetName())
         {
             return;
         }
     }
     children.Add(newAction);
     newAction.SetTreeDepth(treeDepth + 1);
 }
    //INCOMPLETE
    public override float GetReward(StateClass state, BTTask action)
    {
        float reward = 0.0f;

        //prepare for a veeeeery long switch statement to match several state-action pairs to their rewards
        switch (action.GetName())
        {
        case "AttackSequence":
            //negative rewards for low health and high enemy numbers
            if (stateArray[0] == 1)     //low health
            {
                reward = -50.0f;
                if (stateArray[1] == 1)      //one or no enemies
                {
                    if (stateArray[2] == 1)  //current enemy is low health, get the last hit in
                    {
                        reward = 10.0f;
                    }
                }
                if (stateArray[1] == 2)      //some enemies
                {
                    reward = -75.0f;
                }
                if (stateArray[1] == 3)     //lots of enemies
                {
                    reward = -100.0f;
                }
            }
            else if (stateArray[0] == 2)        //medium-low health
            {
                reward = -25.0f;
                if (stateArray[1] == 1)     //only one enemy, can probably attack
                {
                    reward = 10.0f;
                    if (stateArray[2] <= 2)     //enemy quite low on health
                    {
                        reward = 30.0f;
                    }
                }
                if (stateArray[1] == 2)      //some enemies
                {
                    reward = -30.0f;
                    if (stateArray[2] == 1)      //current enemy is low health, get the last hit in
                    {
                        reward = 10.0f;
                    }
                }
                if (stateArray[1] == 3)      //lots of enemies
                {
                    reward = -50.0f;
                }
            }
            else if (stateArray[0] == 3)    //medium high health
            {
                if (stateArray[1] == 1)     //one enemy or fewer
                {
                    reward = 30.0f;
                }
                else if (stateArray[1] == 2)
                {
                    reward = 25.0f;
                }
            }
            else if (stateArray[0] == 4) //high or full health
            {
                reward = 100.0f;         //it is always safe to attack at this health
            }
            if (stateArray[2] == 5)
            {
                reward = -100.0f;       //no enemy to attack
            }
            break;

        case "FleeSequnece":        //not implemented in time
            break;

        case "HealSelf":
            //healing should only be done when there are few or no enemies nearby and when not at full health
            if (stateArray[1] == 3)     //lots of enemies, don't heal at all
            {
                reward = -100.0f;
            }
            else if (stateArray[0] == 4)    //full health already so don't bother
            {
                reward = -100.0f;
            }

            else if (stateArray[1] == 2)        //some enemies, reward depends on current health
            {
                if (stateArray[0] == 1)
                {
                    reward = -10.0f;        //negative since they should be trying to flee at this stage
                }
                if (stateArray[0] == 2)
                {
                    reward = 0.0f;
                }
                if (stateArray[0] == 3)
                {
                    reward = 10.0f;
                }
            }
            else if (stateArray[1] == 1)    //1 or no enemies nearby, should be safe to heal
            {
                if (stateArray[0] == 1)
                {
                    reward = 60.0f;        //definitely try and heal
                }
                if (stateArray[0] == 2)
                {
                    reward = 30.0f;
                }
                if (stateArray[0] == 3)
                {
                    reward = 10.0f;
                }
            }
            break;
        }
        return(reward);
    }
            //This is the general tick function, variations are made by overriding the FirstTimeInit, GetState and GetReward functions
            public override StatusValue Tick(Blackboard blackboard)
            {
                if (!isInitialised)
                {
                    //initialise any custom variables
                    FirstTimeInit(blackboard);

                    //set the initial state
                    learner.SetPreviousStateName(GetState(blackboard));

                    isInitialised = true;
                }

                //only do this part when the current action has finished
                if (status != StatusValue.RUNNING)
                {
                    //get the current state for the learner
                    learner.SetCurrentStateName(GetState(blackboard));

                    //random probability to add an action
                    if (isNodeDynamic)
                    {
                        System.Random random = CustomExtensions.ThreadSafeRandom.ThisThreadsRandom;
                        if (children.Count == 0 || random.NextDouble() < 0.25f)
                        {
                            //retrieve a random task from the action pool
                            BTTask newTask = (BTTask)ActionPool.Instance.GetRandomAction();
                            //add the task if it is not rejected by the current state
                            if (!learner.GetPreviousState().CheckIfRejected(newTask.GetName()))
                            {
                                //Debug.Log("learner has not rejected: " + newTask.GetName());
                                AddTask(newTask);
                            }
                        }
                    }

                    //get the index of the action that the RL decides to use
                    indexOfSelectedAction = learner.SelectAnAction(children);
                }

                //update this node's status by ticking the selected action
                status = children[indexOfSelectedAction].Tick(blackboard);

                //if the action is still running, return running until is has finished
                if (status == StatusValue.RUNNING)
                {
                    return(status);
                }

                //update the states
                learner.SetPreviousStateName(learner.CurrentStateName);
                learner.SetCurrentStateName(GetState(blackboard));

                //get the reward for the previous state, i.e. the one this was in when starting the action
                float reward = GetReward(learner.GetStates()[learner.PreviousStateName], children[indexOfSelectedAction]);

                //update the Q value table
                learner.UpdateQValueTable(reward);

                //decrement the number of steps left to perform (might not be used)

                //update the use count for this action
                learner.GetPreviousState().UpdateActionUseCount(learner.CurrentActionName);

                //check if the action should be rejected
                //action should be performed a minimum number of times before checking this
                if (learner.GetPreviousState().GetActionUseCount(learner.CurrentActionName) > minimumActionPerformances)
                {
                    //check if rejected
                    if (ShouldActionBeRejected(blackboard))
                    {
                        //reject from this state
                        learner.GetPreviousState().RejectAction(children[indexOfSelectedAction].GetName());
                        //if all states reject the action then remove it from this node
                        if (ShouldActionBeRemoved(learner.CurrentActionName))
                        {
                            children.RemoveAt(learner.CurrentActionIndex);
                        }
                    }
                }

                QLearningValues(blackboard);

                return(StatusValue.SUCCESS);
            }