/// Uses the communicator to retrieve the actions, memories and values and /// sends them to the agents public void DecideAction(Dictionary <Agent, AgentInfo> agentInfo) { if (brainBatcher != null) { brainBatcher.SendBrainInfo(brain.gameObject.name, agentInfo); } }
/// Uses the Decision Component to decide that action to take public void DecideAction(Dictionary <Agent, AgentInfo> agentInfo) { if (brainBatcher != null) { brainBatcher.SendBrainInfo(brain.gameObject.name, agentInfo); } if (decision == null) { throw new UnityAgentsException( "The Brain is set to Heuristic, but no decision script attached to it"); } foreach (var agent in agentInfo.Keys) { agent.UpdateVectorAction(decision.Decide( agentInfo[agent].stackedVectorObservation, agentInfo[agent].visualObservations, agentInfo[agent].reward, agentInfo[agent].done, agentInfo[agent].memories)); } foreach (var agent in agentInfo.Keys) { agent.UpdateMemoriesAction(decision.MakeMemory( agentInfo[agent].stackedVectorObservation, agentInfo[agent].visualObservations, agentInfo[agent].reward, agentInfo[agent].done, agentInfo[agent].memories)); } }
/// Uses the continuous inputs or dicrete inputs of the player to /// decide action public void DecideAction(Dictionary <Agent, AgentInfo> agentInfo) { if (brainBatcher != null) { brainBatcher.SendBrainInfo(brain.gameObject.name, agentInfo); } if (brain.brainParameters.vectorActionSpaceType == SpaceType.continuous) { foreach (var agent in agentInfo.Keys) { var action = new float[brain.brainParameters.vectorActionSize]; foreach (var cha in keyContinuousPlayerActions) { if (Input.GetKey(cha.key)) { action[cha.index] = cha.value; } } foreach (var axisAction in axisContinuousPlayerActions) { var axisValue = Input.GetAxis(axisAction.axis); axisValue *= axisAction.scale; if (Mathf.Abs(axisValue) > 0.0001) { action[axisAction.index] = axisValue; } } agent.UpdateVectorAction(action); } } else { foreach (var agent in agentInfo.Keys) { var action = new float[1] { defaultAction }; foreach (var dha in discretePlayerActions) { if (Input.GetKey(dha.key)) { action[0] = dha.value; break; } } agent.UpdateVectorAction(action); } } }
/// Uses the stored information to run the tensorflow graph and generate /// the actions. public void DecideAction(Dictionary <Agent, AgentInfo> agentInfo) { #if ENABLE_TENSORFLOW if (brainBatcher != null) { brainBatcher.SendBrainInfo(brain.gameObject.name, agentInfo); } int currentBatchSize = agentInfo.Count(); List <Agent> agentList = agentInfo.Keys.ToList(); if (currentBatchSize == 0) { return; } // Create the state tensor if (hasState) { int stateLength = 1; stateLength = brain.brainParameters.vectorObservationSize; inputState = new float[currentBatchSize, stateLength *brain.brainParameters.numStackedVectorObservations]; var i = 0; foreach (Agent agent in agentList) { List <float> stateList = agentInfo[agent].stackedVectorObservation; for (int j = 0; j < stateLength * brain.brainParameters.numStackedVectorObservations; j++) { inputState[i, j] = stateList[j]; } i++; } } // Create the state tensor if (hasPrevAction) { int totalNumberActions = brain.brainParameters.vectorActionSize.Length; inputPrevAction = new int[currentBatchSize, totalNumberActions]; var i = 0; foreach (Agent agent in agentList) { float[] actionList = agentInfo[agent].storedVectorActions; for (var j = 0; j < totalNumberActions; j++) { inputPrevAction[i, j] = Mathf.FloorToInt(actionList[j]); } i++; } } if (hasMaskedActions) { maskedActions = new float[ currentBatchSize, brain.brainParameters.vectorActionSize.Sum() ]; var i = 0; foreach (Agent agent in agentList) { for (int j = 0; j < brain.brainParameters.vectorActionSize.Sum(); j++) { if (agentInfo[agent].actionMasks != null) { maskedActions[i, j] = agentInfo[agent].actionMasks[j] ? 0.0f : 1.0f; } else { maskedActions[i, j] = 1.0f; } } i++; } } observationMatrixList.Clear(); for (int observationIndex = 0; observationIndex < brain.brainParameters.cameraResolutions.Length; observationIndex++) { texturesHolder.Clear(); foreach (Agent agent in agentList) { texturesHolder.Add(agentInfo[agent].visualObservations[observationIndex]); } observationMatrixList.Add( BatchVisualObservations(texturesHolder, brain.brainParameters.cameraResolutions[observationIndex].blackAndWhite)); } // Create the recurrent tensor if (hasRecurrent) { // Need to have variable memory size inputOldMemories = new float[currentBatchSize, memorySize]; var i = 0; foreach (Agent agent in agentList) { float[] m = agentInfo[agent].memories.ToArray(); for (int j = 0; j < m.Length; j++) { inputOldMemories[i, j] = m[j]; } i++; } } var runner = session.GetRunner(); try { runner.Fetch(graph[graphScope + ActionPlaceholderName][0]); } catch { throw new UnityAgentsException(string.Format( @"The node {0} could not be found. Please make sure the graphScope {1} is correct", graphScope + ActionPlaceholderName, graphScope)); } if (hasBatchSize) { runner.AddInput(graph[graphScope + BatchSizePlaceholderName][0], new int[] { currentBatchSize }); } foreach (TensorFlowAgentPlaceholder placeholder in graphPlaceholders) { try { if (placeholder.valueType == TensorFlowAgentPlaceholder.TensorType.FloatingPoint) { runner.AddInput(graph[graphScope + placeholder.name][0], new float[] { Random.Range(placeholder.minValue, placeholder.maxValue) }); } else if (placeholder.valueType == TensorFlowAgentPlaceholder.TensorType.Integer) { runner.AddInput(graph[graphScope + placeholder.name][0], new int[] { Random.Range((int)placeholder.minValue, (int)placeholder.maxValue + 1) }); } } catch { throw new UnityAgentsException(string.Format( @"One of the Tensorflow placeholder cound nout be found. In brain {0}, there are no {1} placeholder named {2}.", brain.gameObject.name, placeholder.valueType.ToString(), graphScope + placeholder.name)); } } // Create the state tensor if (hasState) { runner.AddInput(graph[graphScope + VectorObservationPlacholderName][0], inputState); } // Create the previous action tensor if (hasPrevAction) { runner.AddInput(graph[graphScope + PreviousActionPlaceholderName][0], inputPrevAction); } // Create the mask action tensor if (hasMaskedActions) { runner.AddInput(graph[graphScope + ActionMaskPlaceholderName][0], maskedActions); } // Create the observation tensors for (int obsNumber = 0; obsNumber < brain.brainParameters.cameraResolutions.Length; obsNumber++) { runner.AddInput(graph[graphScope + VisualObservationPlaceholderName[obsNumber]][0], observationMatrixList[obsNumber]); } if (hasRecurrent) { runner.AddInput(graph[graphScope + "sequence_length"][0], 1); runner.AddInput(graph[graphScope + RecurrentInPlaceholderName][0], inputOldMemories); runner.Fetch(graph[graphScope + RecurrentOutPlaceholderName][0]); } if (hasValueEstimate) { runner.Fetch(graph[graphScope + "value_estimate"][0]); } TFTensor[] networkOutput; try { networkOutput = runner.Run(); } catch (TFException e) { string errorMessage = e.Message; try { errorMessage = $@"The tensorflow graph needs an input for {e.Message.Split(new string[] {"Node: "}, 0)[1].Split('=')[0]} of type {e.Message.Split(new string[] {"dtype="}, 0)[1].Split(',')[0]}"; } finally { throw new UnityAgentsException(errorMessage); } } // Create the recurrent tensor if (hasRecurrent) { float[,] recurrentTensor = networkOutput[1].GetValue() as float[, ]; var i = 0; foreach (Agent agent in agentList) { var m = new float[memorySize]; for (int j = 0; j < memorySize; j++) { m[j] = recurrentTensor[i, j]; } agent.UpdateMemoriesAction(m.ToList()); i++; } } if (hasValueEstimate) { float[,] value_estimates = new float[currentBatchSize, 1]; if (hasRecurrent) { value_estimates = networkOutput[2].GetValue() as float[, ]; } else { value_estimates = networkOutput[1].GetValue() as float[, ]; } var i = 0; foreach (Agent agent in agentList) { agent.UpdateValueAction(value_estimates[i, 0]); } } if (brain.brainParameters.vectorActionSpaceType == SpaceType.continuous) { var output = networkOutput[0].GetValue() as float[, ]; var i = 0; foreach (Agent agent in agentList) { var a = new float[brain.brainParameters.vectorActionSize[0]]; for (int j = 0; j < brain.brainParameters.vectorActionSize[0]; j++) { a[j] = output[i, j]; } agent.UpdateVectorAction(a); i++; } } else if (brain.brainParameters.vectorActionSpaceType == SpaceType.discrete) { long[,] output = networkOutput[0].GetValue() as long[, ]; var i = 0; foreach (Agent agent in agentList) { var actSize = brain.brainParameters.vectorActionSize.Length; var a = new float[actSize]; for (int actIdx = 0; actIdx < actSize; actIdx++) { a[actIdx] = output[i, actIdx]; } agent.UpdateVectorAction(a); i++; } } #else if (agentInfo.Count > 0) { throw new UnityAgentsException(string.Format( @"The brain {0} was set to Internal but the Tensorflow library is not present in the Unity project.", brain.gameObject.name)); } #endif }
/// <summary> /// Calls the DecideAction method that the concrete brain implements. /// </summary> private void BrainDecideAction() { m_BrainBatcher?.SendBrainInfo(name, m_AgentInfos); DecideAction(); }