コード例 #1
0
    public override Dictionary <Agent, TakeActionOutput> TakeAction(Dictionary <Agent, AgentInfoInternal> agentInfos)
    {
        var result = new Dictionary <Agent, TakeActionOutput>();

        var agentList = new List <Agent>(agentInfos.Keys);

        float[,] vectorObsAll = CreateVectorInputBatch(agentInfos, agentList);
        var visualObsAll = CreateVisualInputBatch(agentInfos, agentList, BrainToTrain.brainParameters.cameraResolutions);
        var actionMasks  = CreateActionMasks(agentInfos, agentList, BrainToTrain.brainParameters.vectorActionSize);

        float[,] actions = null;
        actions          = modeNE.EvaluateActionNE(vectorObsAll, visualObsAll, actionMasks);

        int i = 0;

        foreach (var agent in agentList)
        {
            var info = agentInfos[agent];
            //use result from neural network directly
            var tempAction = new TakeActionOutput();
            tempAction.outputAction = actions.GetRow(i);
            result[agent]           = tempAction;
            i++;
        }

        return(result);
    }
コード例 #2
0
    public override Dictionary <Agent, TakeActionOutput> TakeAction(Dictionary <Agent, AgentInfoInternal> agentInfos)
    {
        var result    = new Dictionary <Agent, TakeActionOutput>();
        var agentList = new List <Agent>(agentInfos.Keys);

        if (agentList.Count <= 0)
        {
            return(result);
        }
        float[,] vectorObsAll = CreateVectorInputBatch(agentInfos, agentList);
        var visualObsAll = CreateVisualInputBatch(agentInfos, agentList, BrainToTrain.brainParameters.cameraResolutions);
        var actionMasks  = CreateActionMasks(agentInfos, agentList, BrainToTrain.brainParameters.vectorActionSize);

        float[,] actionProbs = null;
        var values  = iModelPPO.EvaluateValue(vectorObsAll, visualObsAll);
        var actions = iModelPPO.EvaluateAction(vectorObsAll, out actionProbs, visualObsAll, actionMasks);



        int i = 0;

        foreach (var agent in agentList)
        {
            var agentDecision = agent.GetComponent <AgentDependentDecision>();

            if (isTraining && agentDecision != null && agentDecision.useDecision && UnityEngine.Random.Range(0, 1.0f) <= parametersPPO.useHeuristicChance)
            {
                //if this agent will use the decision, use it
                var info   = agentInfos[agent];
                var action = agentDecision.Decide(info.stackedVectorObservation, info.visualObservations, new List <float>(actions.GetRow(i)));
                float[,] vectorOb = CreateVectorInputBatch(agentInfos, new List <Agent>()
                {
                    agent
                });
                var visualOb = CreateVisualInputBatch(agentInfos, new List <Agent>()
                {
                    agent
                }, BrainToTrain.brainParameters.cameraResolutions);
                var probs = iModelPPO.EvaluateProbability(vectorOb, action.Reshape(1, action.Length), visualOb, actionMasks);

                var temp = new TakeActionOutput();
                temp.allProbabilities = probs.GetRow(0);
                temp.outputAction     = action;
                temp.value            = values[i];
                result[agent]         = temp;
            }
            else
            {
                var temp = new TakeActionOutput();
                temp.allProbabilities = actionProbs.GetRow(i);
                temp.outputAction     = actions.GetRow(i);
                temp.value            = values[i];
                result[agent]         = temp;
            }
            i++;
        }


        return(result);
    }
コード例 #3
0
    public Dictionary <Agent, TakeActionOutput> TakeAction(Dictionary <Agent, AgentInfo> agentInfos)
    {
        var result = new Dictionary <Agent, TakeActionOutput>();

        foreach (var a in agentInfos)
        {
            AgentES agent = a.Key as AgentES;
            if (agent != null)
            {
                if (agent.synchronizedDecision)
                {
                    result[agent] = new TakeActionOutput()
                    {
                        outputAction = Array.ConvertAll(agent.Optimize(), t => (float)t)
                    };
                }
                else
                {
                    agent.OptimizeAsync();
                }
            }
        }
        return(new Dictionary <Agent, TakeActionOutput>());
    }
コード例 #4
0
    public override Dictionary <Agent, TakeActionOutput> TakeAction(Dictionary <Agent, AgentInfo> agentInfos)
    {
        var result = new Dictionary <Agent, TakeActionOutput>();

        var agentList = new List <Agent>(agentInfos.Keys);

        if (agentList.Count <= 0)
        {
            return(result);
        }
        float[,] vectorObsAll = CreateVectorInputBatch(agentInfos, agentList);
        var visualObsAll = CreateVisualInputBatch(agentInfos, agentList, BrainToTrain.brainParameters.cameraResolutions);

        float[,] actions = null;
        var evalOutput = modelSL.EvaluateAction(vectorObsAll, visualObsAll);

        actions = evalOutput.Item1;
        var vars = evalOutput.Item2;

        int   i = 0;
        int   agentNumWithDecision = 0;
        float actionDiff           = 0; //difference between decision and from networknetwork

        foreach (var agent in agentList)
        {
            var info          = agentInfos[agent];
            var agentDecision = agent.GetComponent <AgentDependentDecision>();

            if (agentDecision != null && agentDecision.useDecision)
            {
                //if this agent will use the decision, use it
                float[] action = null;
                if (vars != null)
                {
                    action = agentDecision.Decide(info.stackedVectorObservation, info.visualObservations, new List <float>(actions.GetRow(i)), new List <float>(vars.GetRow(i)));
                }
                else
                {
                    action = agentDecision.Decide(info.stackedVectorObservation, info.visualObservations, new List <float>(actions.GetRow(i)));
                }
                var tempAction = new TakeActionOutput();
                tempAction.outputAction = action;
                result[agent]           = tempAction;
                if (BrainToTrain.brainParameters.vectorActionSpaceType == SpaceType.continuous)
                {
                    actionDiff += Enumerable.Zip(action, actions.GetRow(i), (a, b) => Mathf.Sqrt((a - b) * (a - b))).Aggregate((a, v) => a + v);
                }
                else
                {
                    actionDiff += Enumerable.Zip(action, actions.GetRow(i), (a, b) => (Mathf.RoundToInt(a) == Mathf.RoundToInt(b))?0:1).Aggregate((a, v) => a + v);
                }
                agentNumWithDecision++;
            }
            else
            {
                //use result from neural network directly
                var tempAction = new TakeActionOutput();
                tempAction.outputAction = actions.GetRow(i);
                result[agent]           = tempAction;
            }
            i++;
        }

        if (agentNumWithDecision > 0)
        {
            stats.AddData("action difference", actionDiff / agentNumWithDecision);
        }

        return(result);
    }
コード例 #5
0
    public override Dictionary <Agent, TakeActionOutput> TakeAction(Dictionary <Agent, AgentInfoInternal> agentInfos)
    {
        var result    = new Dictionary <Agent, TakeActionOutput>();
        var agentList = new List <Agent>(agentInfos.Keys);

        if (agentList.Count <= 0)
        {
            return(result);
        }
        float[,] vectorObsAll = CreateVectorInputBatch(agentInfos, agentList);
        var visualObsAll = CreateVisualInputBatch(agentInfos, agentList, BrainToTrain.brainParameters.cameraResolutions);


        var values  = iModelPPO.EvaluateValue(vectorObsAll, visualObsAll);
        var actions = iModelPPO.EvaluateAction(vectorObsAll, visualObsAll);

        /*if (GetStep() < pretrainDataCollectingSteps)
         * {
         *  //collecting data for pretrain
         *  for (int n = 0; n < vectorObsAll.GetLength(1); ++n)
         *  {
         *      for (int m = 0; m < vectorObsAll.GetLength(0); ++m)
         *      {
         *          pretrainObservationDataCollect[n].Add(vectorObsAll[m, n]);
         *      }
         *  }
         * }*/

        int i = 0;

        foreach (var agent in agentList)
        {
            var agentDecision = agent.GetComponent <AgentDependentDecision>();

            if (isTraining && agentDecision != null && agentDecision.useDecision)// && UnityEngine.Random.Range(0, 1.0f) <= parametersPPO.useHeuristicChance)
            {
                //if this agent will use the decision, use it
                var info   = agentInfos[agent];
                var action = agentDecision.Decide(info.stackedVectorObservation, info.visualObservations, new List <float>(actions.GetRow(i)));
                float[,] vectorOb = CreateVectorInputBatch(agentInfos, new List <Agent>()
                {
                    agent
                });
                var visualOb = CreateVisualInputBatch(agentInfos, new List <Agent>()
                {
                    agent
                }, BrainToTrain.brainParameters.cameraResolutions);

                var temp = new TakeActionOutput();
                temp.outputAction = action;
                temp.value        = values[i];
                result[agent]     = temp;
            }
            else
            {
                var temp = new TakeActionOutput();
                //temp.allProbabilities = actionProbs.GetRow(i);
                temp.outputAction = actions.GetRow(i);
                temp.value        = values[i];
                result[agent]     = temp;
            }
            i++;
        }


        return(result);
    }