Example #1
0
    public override float[] Decide(List <float> vectorObs, List <Texture2D> visualObs, List <float> heuristicAction, List <float> heuristicVariance = null)
    {
        var result = base.Decide(vectorObs, visualObs, heuristicAction, heuristicVariance);

        if (log)
        {
            logStep++;
            logger.AddData("Average MAES iteration", optimizer.Iteration);
            logger.AddData("Average MAES best Score", (float)optimizer.BestScore);
            if (logStep % logInterval == 0)
            {
                logger.LogAllCurrentData(logStep);
            }
        }
        return(result);
    }
    public override Dictionary <Agent, TakeActionOutput> TakeAction(Dictionary <Agent, AgentInfo> agentInfos)
    {
        var result = new Dictionary <Agent, TakeActionOutput>();

        var agentList = new List <Agent>(agentInfos.Keys);

        if (agentList.Count <= 0)
        {
            return(result);
        }
        float[,] vectorObsAll = CreateVectorInputBatch(agentInfos, agentList);
        var visualObsAll = CreateVisualInputBatch(agentInfos, agentList, BrainToTrain.brainParameters.cameraResolutions);

        float[,] actions = null;
        var evalOutput = modelSL.EvaluateAction(vectorObsAll, visualObsAll);

        actions = evalOutput.Item1;
        var vars = evalOutput.Item2;

        int   i = 0;
        int   agentNumWithDecision = 0;
        float actionDiff           = 0; //difference between decision and from networknetwork

        foreach (var agent in agentList)
        {
            var info          = agentInfos[agent];
            var agentDecision = agent.GetComponent <AgentDependentDecision>();

            if (agentDecision != null && agentDecision.useDecision)
            {
                //if this agent will use the decision, use it
                float[] action = null;
                if (vars != null)
                {
                    action = agentDecision.Decide(info.stackedVectorObservation, info.visualObservations, new List <float>(actions.GetRow(i)), new List <float>(vars.GetRow(i)));
                }
                else
                {
                    action = agentDecision.Decide(info.stackedVectorObservation, info.visualObservations, new List <float>(actions.GetRow(i)));
                }
                var tempAction = new TakeActionOutput();
                tempAction.outputAction = action;
                result[agent]           = tempAction;
                if (BrainToTrain.brainParameters.vectorActionSpaceType == SpaceType.continuous)
                {
                    actionDiff += Enumerable.Zip(action, actions.GetRow(i), (a, b) => Mathf.Sqrt((a - b) * (a - b))).Aggregate((a, v) => a + v);
                }
                else
                {
                    actionDiff += Enumerable.Zip(action, actions.GetRow(i), (a, b) => (Mathf.RoundToInt(a) == Mathf.RoundToInt(b))?0:1).Aggregate((a, v) => a + v);
                }
                agentNumWithDecision++;
            }
            else
            {
                //use result from neural network directly
                var tempAction = new TakeActionOutput();
                tempAction.outputAction = actions.GetRow(i);
                result[agent]           = tempAction;
            }
            i++;
        }

        if (agentNumWithDecision > 0)
        {
            stats.AddData("action difference", actionDiff / agentNumWithDecision);
        }

        return(result);
    }