/// Uses the communicator to retrieve the actions, memories and values and
 ///  sends them to the agents
 public void DecideAction(Dictionary <Agent, AgentInfo> agentInfo)
 {
     if (brainBatcher != null)
     {
         brainBatcher.SendBrainInfo(brain.gameObject.name, agentInfo);
     }
 }
示例#2
0
        /// Uses the Decision Component to decide that action to take
        public void DecideAction(Dictionary <Agent, AgentInfo> agentInfo)
        {
            if (brainBatcher != null)
            {
                brainBatcher.SendBrainInfo(brain.gameObject.name, agentInfo);
            }

            if (decision == null)
            {
                throw new UnityAgentsException(
                          "The Brain is set to Heuristic, but no decision script attached to it");
            }

            foreach (var agent in agentInfo.Keys)
            {
                agent.UpdateVectorAction(decision.Decide(
                                             agentInfo[agent].stackedVectorObservation,
                                             agentInfo[agent].visualObservations,
                                             agentInfo[agent].reward,
                                             agentInfo[agent].done,
                                             agentInfo[agent].memories));
            }

            foreach (var agent in agentInfo.Keys)
            {
                agent.UpdateMemoriesAction(decision.MakeMemory(
                                               agentInfo[agent].stackedVectorObservation,
                                               agentInfo[agent].visualObservations,
                                               agentInfo[agent].reward,
                                               agentInfo[agent].done,
                                               agentInfo[agent].memories));
            }
        }
示例#3
0
        /// Uses the continuous inputs or dicrete inputs of the player to
        /// decide action
        public void DecideAction(Dictionary <Agent, AgentInfo> agentInfo)
        {
            if (brainBatcher != null)
            {
                brainBatcher.SendBrainInfo(brain.gameObject.name, agentInfo);
            }

            if (brain.brainParameters.vectorActionSpaceType == SpaceType.continuous)
            {
                foreach (var agent in agentInfo.Keys)
                {
                    var action = new float[brain.brainParameters.vectorActionSize];
                    foreach (var cha in keyContinuousPlayerActions)
                    {
                        if (Input.GetKey(cha.key))
                        {
                            action[cha.index] = cha.value;
                        }
                    }


                    foreach (var axisAction in axisContinuousPlayerActions)
                    {
                        var axisValue = Input.GetAxis(axisAction.axis);
                        axisValue *= axisAction.scale;
                        if (Mathf.Abs(axisValue) > 0.0001)
                        {
                            action[axisAction.index] = axisValue;
                        }
                    }

                    agent.UpdateVectorAction(action);
                }
            }
            else
            {
                foreach (var agent in agentInfo.Keys)
                {
                    var action = new float[1] {
                        defaultAction
                    };
                    foreach (var dha in discretePlayerActions)
                    {
                        if (Input.GetKey(dha.key))
                        {
                            action[0] = dha.value;
                            break;
                        }
                    }


                    agent.UpdateVectorAction(action);
                }
            }
        }
        /// Uses the stored information to run the tensorflow graph and generate
        /// the actions.
        public void DecideAction(Dictionary <Agent, AgentInfo> agentInfo)
        {
#if ENABLE_TENSORFLOW
            if (brainBatcher != null)
            {
                brainBatcher.SendBrainInfo(brain.gameObject.name, agentInfo);
            }

            int          currentBatchSize = agentInfo.Count();
            List <Agent> agentList        = agentInfo.Keys.ToList();
            if (currentBatchSize == 0)
            {
                return;
            }


            // Create the state tensor
            if (hasState)
            {
                int stateLength = 1;
                stateLength = brain.brainParameters.vectorObservationSize;
                inputState  =
                    new float[currentBatchSize, stateLength *brain.brainParameters.numStackedVectorObservations];

                var i = 0;
                foreach (Agent agent in agentList)
                {
                    List <float> stateList = agentInfo[agent].stackedVectorObservation;
                    for (int j =
                             0;
                         j < stateLength * brain.brainParameters.numStackedVectorObservations;
                         j++)
                    {
                        inputState[i, j] = stateList[j];
                    }

                    i++;
                }
            }

            // Create the state tensor
            if (hasPrevAction)
            {
                int totalNumberActions = brain.brainParameters.vectorActionSize.Length;
                inputPrevAction = new int[currentBatchSize, totalNumberActions];
                var i = 0;
                foreach (Agent agent in agentList)
                {
                    float[] actionList = agentInfo[agent].storedVectorActions;
                    for (var j = 0; j < totalNumberActions; j++)
                    {
                        inputPrevAction[i, j] = Mathf.FloorToInt(actionList[j]);
                    }
                    i++;
                }
            }

            if (hasMaskedActions)
            {
                maskedActions = new float[
                    currentBatchSize,
                    brain.brainParameters.vectorActionSize.Sum()
                                ];
                var i = 0;
                foreach (Agent agent in agentList)
                {
                    for (int j = 0; j < brain.brainParameters.vectorActionSize.Sum(); j++)
                    {
                        if (agentInfo[agent].actionMasks != null)
                        {
                            maskedActions[i, j] = agentInfo[agent].actionMasks[j] ? 0.0f : 1.0f;
                        }
                        else
                        {
                            maskedActions[i, j] = 1.0f;
                        }
                    }
                    i++;
                }
            }

            observationMatrixList.Clear();
            for (int observationIndex =
                     0;
                 observationIndex < brain.brainParameters.cameraResolutions.Length;
                 observationIndex++)
            {
                texturesHolder.Clear();
                foreach (Agent agent in agentList)
                {
                    texturesHolder.Add(agentInfo[agent].visualObservations[observationIndex]);
                }

                observationMatrixList.Add(
                    BatchVisualObservations(texturesHolder,
                                            brain.brainParameters.cameraResolutions[observationIndex].blackAndWhite));
            }

            // Create the recurrent tensor
            if (hasRecurrent)
            {
                // Need to have variable memory size
                inputOldMemories = new float[currentBatchSize, memorySize];
                var i = 0;
                foreach (Agent agent in agentList)
                {
                    float[] m = agentInfo[agent].memories.ToArray();
                    for (int j = 0; j < m.Length; j++)
                    {
                        inputOldMemories[i, j] = m[j];
                    }

                    i++;
                }
            }


            var runner = session.GetRunner();
            try
            {
                runner.Fetch(graph[graphScope + ActionPlaceholderName][0]);
            }
            catch
            {
                throw new UnityAgentsException(string.Format(
                                                   @"The node {0} could not be found. Please make sure the graphScope {1} is correct",
                                                   graphScope + ActionPlaceholderName, graphScope));
            }

            if (hasBatchSize)
            {
                runner.AddInput(graph[graphScope + BatchSizePlaceholderName][0], new int[] { currentBatchSize });
            }

            foreach (TensorFlowAgentPlaceholder placeholder in graphPlaceholders)
            {
                try
                {
                    if (placeholder.valueType == TensorFlowAgentPlaceholder.TensorType.FloatingPoint)
                    {
                        runner.AddInput(graph[graphScope + placeholder.name][0],
                                        new float[] { Random.Range(placeholder.minValue, placeholder.maxValue) });
                    }
                    else if (placeholder.valueType == TensorFlowAgentPlaceholder.TensorType.Integer)
                    {
                        runner.AddInput(graph[graphScope + placeholder.name][0],
                                        new int[] { Random.Range((int)placeholder.minValue, (int)placeholder.maxValue + 1) });
                    }
                }
                catch
                {
                    throw new UnityAgentsException(string.Format(
                                                       @"One of the Tensorflow placeholder cound nout be found.
                In brain {0}, there are no {1} placeholder named {2}.",
                                                       brain.gameObject.name, placeholder.valueType.ToString(), graphScope + placeholder.name));
                }
            }

            // Create the state tensor
            if (hasState)
            {
                runner.AddInput(graph[graphScope + VectorObservationPlacholderName][0], inputState);
            }

            // Create the previous action tensor
            if (hasPrevAction)
            {
                runner.AddInput(graph[graphScope + PreviousActionPlaceholderName][0], inputPrevAction);
            }

            // Create the mask action tensor
            if (hasMaskedActions)
            {
                runner.AddInput(graph[graphScope + ActionMaskPlaceholderName][0], maskedActions);
            }

            // Create the observation tensors
            for (int obsNumber =
                     0;
                 obsNumber < brain.brainParameters.cameraResolutions.Length;
                 obsNumber++)
            {
                runner.AddInput(graph[graphScope + VisualObservationPlaceholderName[obsNumber]][0],
                                observationMatrixList[obsNumber]);
            }

            if (hasRecurrent)
            {
                runner.AddInput(graph[graphScope + "sequence_length"][0], 1);
                runner.AddInput(graph[graphScope + RecurrentInPlaceholderName][0], inputOldMemories);
                runner.Fetch(graph[graphScope + RecurrentOutPlaceholderName][0]);
            }

            if (hasValueEstimate)
            {
                runner.Fetch(graph[graphScope + "value_estimate"][0]);
            }

            TFTensor[] networkOutput;
            try
            {
                networkOutput = runner.Run();
            }
            catch (TFException e)
            {
                string errorMessage = e.Message;
                try
                {
                    errorMessage =
                        $@"The tensorflow graph needs an input for {e.Message.Split(new string[] {"Node: "}, 0)[1].Split('=')[0]} of type {e.Message.Split(new string[] {"dtype="}, 0)[1].Split(',')[0]}";
                }
                finally
                {
                    throw new UnityAgentsException(errorMessage);
                }
            }

            // Create the recurrent tensor
            if (hasRecurrent)
            {
                float[,] recurrentTensor = networkOutput[1].GetValue() as float[, ];

                var i = 0;
                foreach (Agent agent in agentList)
                {
                    var m = new float[memorySize];
                    for (int j = 0; j < memorySize; j++)
                    {
                        m[j] = recurrentTensor[i, j];
                    }

                    agent.UpdateMemoriesAction(m.ToList());
                    i++;
                }
            }


            if (hasValueEstimate)
            {
                float[,] value_estimates = new float[currentBatchSize, 1];
                if (hasRecurrent)
                {
                    value_estimates = networkOutput[2].GetValue() as float[, ];
                }
                else
                {
                    value_estimates = networkOutput[1].GetValue() as float[, ];
                }

                var i = 0;
                foreach (Agent agent in agentList)
                {
                    agent.UpdateValueAction(value_estimates[i, 0]);
                }
            }

            if (brain.brainParameters.vectorActionSpaceType == SpaceType.continuous)
            {
                var output = networkOutput[0].GetValue() as float[, ];
                var i      = 0;
                foreach (Agent agent in agentList)
                {
                    var a = new float[brain.brainParameters.vectorActionSize[0]];
                    for (int j = 0; j < brain.brainParameters.vectorActionSize[0]; j++)
                    {
                        a[j] = output[i, j];
                    }

                    agent.UpdateVectorAction(a);
                    i++;
                }
            }
            else if (brain.brainParameters.vectorActionSpaceType == SpaceType.discrete)
            {
                long[,] output = networkOutput[0].GetValue() as long[, ];
                var i = 0;
                foreach (Agent agent in agentList)
                {
                    var actSize = brain.brainParameters.vectorActionSize.Length;
                    var a       = new float[actSize];
                    for (int actIdx = 0; actIdx < actSize; actIdx++)
                    {
                        a[actIdx] = output[i, actIdx];
                    }
                    agent.UpdateVectorAction(a);
                    i++;
                }
            }
#else
            if (agentInfo.Count > 0)
            {
                throw new UnityAgentsException(string.Format(
                                                   @"The brain {0} was set to Internal but the Tensorflow 
                        library is not present in the Unity project.",
                                                   brain.gameObject.name));
            }
#endif
        }
示例#5
0
 /// <summary>
 /// Calls the DecideAction method that the concrete brain implements.
 /// </summary>
 private void BrainDecideAction()
 {
     m_BrainBatcher?.SendBrainInfo(name, m_AgentInfos);
     DecideAction();
 }