public override float[] Decide(List <float> vectorObs, List <Texture2D> visualObs, List <float> heuristicAction, List <float> heuristicVariance = null) { var result = base.Decide(vectorObs, visualObs, heuristicAction, heuristicVariance); if (log) { logStep++; logger.AddData("Average MAES iteration", optimizer.Iteration); logger.AddData("Average MAES best Score", (float)optimizer.BestScore); if (logStep % logInterval == 0) { logger.LogAllCurrentData(logStep); } } return(result); }
public override Dictionary <Agent, TakeActionOutput> TakeAction(Dictionary <Agent, AgentInfo> agentInfos) { var result = new Dictionary <Agent, TakeActionOutput>(); var agentList = new List <Agent>(agentInfos.Keys); if (agentList.Count <= 0) { return(result); } float[,] vectorObsAll = CreateVectorInputBatch(agentInfos, agentList); var visualObsAll = CreateVisualInputBatch(agentInfos, agentList, BrainToTrain.brainParameters.cameraResolutions); float[,] actions = null; var evalOutput = modelSL.EvaluateAction(vectorObsAll, visualObsAll); actions = evalOutput.Item1; var vars = evalOutput.Item2; int i = 0; int agentNumWithDecision = 0; float actionDiff = 0; //difference between decision and from networknetwork foreach (var agent in agentList) { var info = agentInfos[agent]; var agentDecision = agent.GetComponent <AgentDependentDecision>(); if (agentDecision != null && agentDecision.useDecision) { //if this agent will use the decision, use it float[] action = null; if (vars != null) { action = agentDecision.Decide(info.stackedVectorObservation, info.visualObservations, new List <float>(actions.GetRow(i)), new List <float>(vars.GetRow(i))); } else { action = agentDecision.Decide(info.stackedVectorObservation, info.visualObservations, new List <float>(actions.GetRow(i))); } var tempAction = new TakeActionOutput(); tempAction.outputAction = action; result[agent] = tempAction; if (BrainToTrain.brainParameters.vectorActionSpaceType == SpaceType.continuous) { actionDiff += Enumerable.Zip(action, actions.GetRow(i), (a, b) => Mathf.Sqrt((a - b) * (a - b))).Aggregate((a, v) => a + v); } else { actionDiff += Enumerable.Zip(action, actions.GetRow(i), (a, b) => (Mathf.RoundToInt(a) == Mathf.RoundToInt(b))?0:1).Aggregate((a, v) => a + v); } agentNumWithDecision++; } else { //use result from neural network directly var tempAction = new TakeActionOutput(); tempAction.outputAction = actions.GetRow(i); result[agent] = tempAction; } i++; } if (agentNumWithDecision > 0) { stats.AddData("action difference", actionDiff / agentNumWithDecision); } return(result); }