public override Dictionary <Agent, TakeActionOutput> TakeAction(Dictionary <Agent, AgentInfoInternal> agentInfos) { var result = new Dictionary <Agent, TakeActionOutput>(); var agentList = new List <Agent>(agentInfos.Keys); float[,] vectorObsAll = CreateVectorInputBatch(agentInfos, agentList); var visualObsAll = CreateVisualInputBatch(agentInfos, agentList, BrainToTrain.brainParameters.cameraResolutions); var actionMasks = CreateActionMasks(agentInfos, agentList, BrainToTrain.brainParameters.vectorActionSize); float[,] actions = null; actions = modeNE.EvaluateActionNE(vectorObsAll, visualObsAll, actionMasks); int i = 0; foreach (var agent in agentList) { var info = agentInfos[agent]; //use result from neural network directly var tempAction = new TakeActionOutput(); tempAction.outputAction = actions.GetRow(i); result[agent] = tempAction; i++; } return(result); }
public override Dictionary <Agent, TakeActionOutput> TakeAction(Dictionary <Agent, AgentInfoInternal> agentInfos) { var result = new Dictionary <Agent, TakeActionOutput>(); var agentList = new List <Agent>(agentInfos.Keys); if (agentList.Count <= 0) { return(result); } float[,] vectorObsAll = CreateVectorInputBatch(agentInfos, agentList); var visualObsAll = CreateVisualInputBatch(agentInfos, agentList, BrainToTrain.brainParameters.cameraResolutions); var actionMasks = CreateActionMasks(agentInfos, agentList, BrainToTrain.brainParameters.vectorActionSize); float[,] actionProbs = null; var values = iModelPPO.EvaluateValue(vectorObsAll, visualObsAll); var actions = iModelPPO.EvaluateAction(vectorObsAll, out actionProbs, visualObsAll, actionMasks); int i = 0; foreach (var agent in agentList) { var agentDecision = agent.GetComponent <AgentDependentDecision>(); if (isTraining && agentDecision != null && agentDecision.useDecision && UnityEngine.Random.Range(0, 1.0f) <= parametersPPO.useHeuristicChance) { //if this agent will use the decision, use it var info = agentInfos[agent]; var action = agentDecision.Decide(info.stackedVectorObservation, info.visualObservations, new List <float>(actions.GetRow(i))); float[,] vectorOb = CreateVectorInputBatch(agentInfos, new List <Agent>() { agent }); var visualOb = CreateVisualInputBatch(agentInfos, new List <Agent>() { agent }, BrainToTrain.brainParameters.cameraResolutions); var probs = iModelPPO.EvaluateProbability(vectorOb, action.Reshape(1, action.Length), visualOb, actionMasks); var temp = new TakeActionOutput(); temp.allProbabilities = probs.GetRow(0); temp.outputAction = action; temp.value = values[i]; result[agent] = temp; } else { var temp = new TakeActionOutput(); temp.allProbabilities = actionProbs.GetRow(i); temp.outputAction = actions.GetRow(i); temp.value = values[i]; result[agent] = temp; } i++; } return(result); }
public Dictionary <Agent, TakeActionOutput> TakeAction(Dictionary <Agent, AgentInfo> agentInfos) { var result = new Dictionary <Agent, TakeActionOutput>(); foreach (var a in agentInfos) { AgentES agent = a.Key as AgentES; if (agent != null) { if (agent.synchronizedDecision) { result[agent] = new TakeActionOutput() { outputAction = Array.ConvertAll(agent.Optimize(), t => (float)t) }; } else { agent.OptimizeAsync(); } } } return(new Dictionary <Agent, TakeActionOutput>()); }
public override Dictionary <Agent, TakeActionOutput> TakeAction(Dictionary <Agent, AgentInfo> agentInfos) { var result = new Dictionary <Agent, TakeActionOutput>(); var agentList = new List <Agent>(agentInfos.Keys); if (agentList.Count <= 0) { return(result); } float[,] vectorObsAll = CreateVectorInputBatch(agentInfos, agentList); var visualObsAll = CreateVisualInputBatch(agentInfos, agentList, BrainToTrain.brainParameters.cameraResolutions); float[,] actions = null; var evalOutput = modelSL.EvaluateAction(vectorObsAll, visualObsAll); actions = evalOutput.Item1; var vars = evalOutput.Item2; int i = 0; int agentNumWithDecision = 0; float actionDiff = 0; //difference between decision and from networknetwork foreach (var agent in agentList) { var info = agentInfos[agent]; var agentDecision = agent.GetComponent <AgentDependentDecision>(); if (agentDecision != null && agentDecision.useDecision) { //if this agent will use the decision, use it float[] action = null; if (vars != null) { action = agentDecision.Decide(info.stackedVectorObservation, info.visualObservations, new List <float>(actions.GetRow(i)), new List <float>(vars.GetRow(i))); } else { action = agentDecision.Decide(info.stackedVectorObservation, info.visualObservations, new List <float>(actions.GetRow(i))); } var tempAction = new TakeActionOutput(); tempAction.outputAction = action; result[agent] = tempAction; if (BrainToTrain.brainParameters.vectorActionSpaceType == SpaceType.continuous) { actionDiff += Enumerable.Zip(action, actions.GetRow(i), (a, b) => Mathf.Sqrt((a - b) * (a - b))).Aggregate((a, v) => a + v); } else { actionDiff += Enumerable.Zip(action, actions.GetRow(i), (a, b) => (Mathf.RoundToInt(a) == Mathf.RoundToInt(b))?0:1).Aggregate((a, v) => a + v); } agentNumWithDecision++; } else { //use result from neural network directly var tempAction = new TakeActionOutput(); tempAction.outputAction = actions.GetRow(i); result[agent] = tempAction; } i++; } if (agentNumWithDecision > 0) { stats.AddData("action difference", actionDiff / agentNumWithDecision); } return(result); }
public override Dictionary <Agent, TakeActionOutput> TakeAction(Dictionary <Agent, AgentInfoInternal> agentInfos) { var result = new Dictionary <Agent, TakeActionOutput>(); var agentList = new List <Agent>(agentInfos.Keys); if (agentList.Count <= 0) { return(result); } float[,] vectorObsAll = CreateVectorInputBatch(agentInfos, agentList); var visualObsAll = CreateVisualInputBatch(agentInfos, agentList, BrainToTrain.brainParameters.cameraResolutions); var values = iModelPPO.EvaluateValue(vectorObsAll, visualObsAll); var actions = iModelPPO.EvaluateAction(vectorObsAll, visualObsAll); /*if (GetStep() < pretrainDataCollectingSteps) * { * //collecting data for pretrain * for (int n = 0; n < vectorObsAll.GetLength(1); ++n) * { * for (int m = 0; m < vectorObsAll.GetLength(0); ++m) * { * pretrainObservationDataCollect[n].Add(vectorObsAll[m, n]); * } * } * }*/ int i = 0; foreach (var agent in agentList) { var agentDecision = agent.GetComponent <AgentDependentDecision>(); if (isTraining && agentDecision != null && agentDecision.useDecision)// && UnityEngine.Random.Range(0, 1.0f) <= parametersPPO.useHeuristicChance) { //if this agent will use the decision, use it var info = agentInfos[agent]; var action = agentDecision.Decide(info.stackedVectorObservation, info.visualObservations, new List <float>(actions.GetRow(i))); float[,] vectorOb = CreateVectorInputBatch(agentInfos, new List <Agent>() { agent }); var visualOb = CreateVisualInputBatch(agentInfos, new List <Agent>() { agent }, BrainToTrain.brainParameters.cameraResolutions); var temp = new TakeActionOutput(); temp.outputAction = action; temp.value = values[i]; result[agent] = temp; } else { var temp = new TakeActionOutput(); //temp.allProbabilities = actionProbs.GetRow(i); temp.outputAction = actions.GetRow(i); temp.value = values[i]; result[agent] = temp; } i++; } return(result); }