public ActionPool() { actionDictionary = new Dictionary <string, Type>(); //initialise the action pool with the types of actions that will be used System.Reflection.Assembly[] assemblies = AppDomain.CurrentDomain.GetAssemblies(); for (int i = 0; i < assemblies.Length; i++) { //create an array for all of the actions that the user has implemented //currently only uses BTAction but could be implemented to work with all task types in future Type[] actions = assemblies[i].GetTypes().Where(t => t.IsSubclassOf(typeof(BTAction))).ToArray(); for (int j = 0; j < actions.Length; j++) { //create a temporary instance of the action class so that its name can be retrieved BTTask task = (BTTask)assemblies[i].CreateInstance(actions[j].Name); //only let this action be added if it is flagged as 'poolable' //custom flag to prevent system breaking additions if (task.CheckIfPoolable()) { if (!actionDictionary.ContainsKey(task.GetName())) { actionDictionary.Add(task.GetName(), actions[j]); } } } } }
private bool CheckIfRejected(BTTask task) { if (learner.GetPreviousState().CheckIfRejected(task.GetName())) { return(true); } return(false); }
//checks if the task has been rejected from the current state private bool CheckIfRejected(BTTask task) { if (states[CurrentStateName].CheckIfRejected(task.GetName())) { return(true); } return(false); }
//adds an action as a child to this node public void AddTask(BTTask newAction) { for (int i = 0; i < children.Count; i++) { if (children[i].GetName() == newAction.GetName()) { return; } } children.Add(newAction); newAction.SetTreeDepth(treeDepth + 1); }
//INCOMPLETE public override float GetReward(StateClass state, BTTask action) { float reward = 0.0f; //prepare for a veeeeery long switch statement to match several state-action pairs to their rewards switch (action.GetName()) { case "AttackSequence": //negative rewards for low health and high enemy numbers if (stateArray[0] == 1) //low health { reward = -50.0f; if (stateArray[1] == 1) //one or no enemies { if (stateArray[2] == 1) //current enemy is low health, get the last hit in { reward = 10.0f; } } if (stateArray[1] == 2) //some enemies { reward = -75.0f; } if (stateArray[1] == 3) //lots of enemies { reward = -100.0f; } } else if (stateArray[0] == 2) //medium-low health { reward = -25.0f; if (stateArray[1] == 1) //only one enemy, can probably attack { reward = 10.0f; if (stateArray[2] <= 2) //enemy quite low on health { reward = 30.0f; } } if (stateArray[1] == 2) //some enemies { reward = -30.0f; if (stateArray[2] == 1) //current enemy is low health, get the last hit in { reward = 10.0f; } } if (stateArray[1] == 3) //lots of enemies { reward = -50.0f; } } else if (stateArray[0] == 3) //medium high health { if (stateArray[1] == 1) //one enemy or fewer { reward = 30.0f; } else if (stateArray[1] == 2) { reward = 25.0f; } } else if (stateArray[0] == 4) //high or full health { reward = 100.0f; //it is always safe to attack at this health } if (stateArray[2] == 5) { reward = -100.0f; //no enemy to attack } break; case "FleeSequnece": //not implemented in time break; case "HealSelf": //healing should only be done when there are few or no enemies nearby and when not at full health if (stateArray[1] == 3) //lots of enemies, don't heal at all { reward = -100.0f; } else if (stateArray[0] == 4) //full health already so don't bother { reward = -100.0f; } else if (stateArray[1] == 2) //some enemies, reward depends on current health { if (stateArray[0] == 1) { reward = -10.0f; //negative since they should be trying to flee at this stage } if (stateArray[0] == 2) { reward = 0.0f; } if (stateArray[0] == 3) { reward = 10.0f; } } else if (stateArray[1] == 1) //1 or no enemies nearby, should be safe to heal { if (stateArray[0] == 1) { reward = 60.0f; //definitely try and heal } if (stateArray[0] == 2) { reward = 30.0f; } if (stateArray[0] == 3) { reward = 10.0f; } } break; } return(reward); }
//This is the general tick function, variations are made by overriding the FirstTimeInit, GetState and GetReward functions public override StatusValue Tick(Blackboard blackboard) { if (!isInitialised) { //initialise any custom variables FirstTimeInit(blackboard); //set the initial state learner.SetPreviousStateName(GetState(blackboard)); isInitialised = true; } //only do this part when the current action has finished if (status != StatusValue.RUNNING) { //get the current state for the learner learner.SetCurrentStateName(GetState(blackboard)); //random probability to add an action if (isNodeDynamic) { System.Random random = CustomExtensions.ThreadSafeRandom.ThisThreadsRandom; if (children.Count == 0 || random.NextDouble() < 0.25f) { //retrieve a random task from the action pool BTTask newTask = (BTTask)ActionPool.Instance.GetRandomAction(); //add the task if it is not rejected by the current state if (!learner.GetPreviousState().CheckIfRejected(newTask.GetName())) { //Debug.Log("learner has not rejected: " + newTask.GetName()); AddTask(newTask); } } } //get the index of the action that the RL decides to use indexOfSelectedAction = learner.SelectAnAction(children); } //update this node's status by ticking the selected action status = children[indexOfSelectedAction].Tick(blackboard); //if the action is still running, return running until is has finished if (status == StatusValue.RUNNING) { return(status); } //update the states learner.SetPreviousStateName(learner.CurrentStateName); learner.SetCurrentStateName(GetState(blackboard)); //get the reward for the previous state, i.e. the one this was in when starting the action float reward = GetReward(learner.GetStates()[learner.PreviousStateName], children[indexOfSelectedAction]); //update the Q value table learner.UpdateQValueTable(reward); //decrement the number of steps left to perform (might not be used) //update the use count for this action learner.GetPreviousState().UpdateActionUseCount(learner.CurrentActionName); //check if the action should be rejected //action should be performed a minimum number of times before checking this if (learner.GetPreviousState().GetActionUseCount(learner.CurrentActionName) > minimumActionPerformances) { //check if rejected if (ShouldActionBeRejected(blackboard)) { //reject from this state learner.GetPreviousState().RejectAction(children[indexOfSelectedAction].GetName()); //if all states reject the action then remove it from this node if (ShouldActionBeRemoved(learner.CurrentActionName)) { children.RemoveAt(learner.CurrentActionIndex); } } } QLearningValues(blackboard); return(StatusValue.SUCCESS); }