/// Uses the stored information to run the tensorflow graph and generate /// the actions. public void DecideAction(Dictionary <Agent, AgentInfo> newAgentInfos) { int currentBatchSize = newAgentInfos.Count(); List <Agent> newAgentList = newAgentInfos.Keys.ToList(); List <Agent> recordableAgentList = newAgentList.Where((a) => currentInfo != null && currentInfo.ContainsKey(a) && prevActionOutput.ContainsKey(a)).ToList(); /*if (currentBatchSize == 0) * { * return; * }*/ //get the datas only for the agents in the agentInfo input var prevInfo = GetValueForAgents(currentInfo, recordableAgentList); var prevActionActions = GetValueForAgents(prevActionOutput, recordableAgentList); var newInfo = GetValueForAgents(newAgentInfos, recordableAgentList); if (recordableAgentList.Count > 0 && trainerInterface.IsTraining() && trainerInterface.GetStep() <= trainerInterface.GetMaxStep()) { trainerInterface.AddExperience(prevInfo, newInfo, prevActionActions); trainerInterface.ProcessExperience(prevInfo, newInfo); } if (trainerInterface.IsTraining() && trainerInterface.GetStep() <= trainerInterface.GetMaxStep()) { trainerInterface.IncrementStep(); } //update the info UpdateInfos(ref currentInfo, newAgentInfos); var actionOutputs = trainerInterface.TakeAction(GetValueForAgents(currentInfo, newAgentList)); UpdateActionOutputs(ref prevActionOutput, actionOutputs); //TODO Update the agent's other info if there is foreach (Agent agent in newAgentList) { if (actionOutputs.ContainsKey(agent) && actionOutputs[agent].outputAction != null) { agent.UpdateVectorAction(trainerInterface.PostprocessingAction(actionOutputs[agent].outputAction)); } } if (trainerInterface.IsReadyUpdate() && trainerInterface.IsTraining() && trainerInterface.GetStep() <= trainerInterface.GetMaxStep()) { trainerInterface.UpdateModel(); } //clear the prev record if the agent is done foreach (Agent agent in newAgentList) { if (newAgentInfos[agent].done || newAgentInfos[agent].maxStepReached) { currentInfo.Remove(agent); } } }