/// Collects information from the agents and store them public void SendState() { #if ENABLE_TENSORFLOW agentKeys = new List <int>(brain.agents.Keys); currentBatchSize = brain.agents.Count; if (currentBatchSize == 0) { if (coord != null) { coord.giveBrainInfo(brain); } return; } // Create the state tensor if (hasState) { Dictionary <int, List <float> > states = brain.CollectStates(); inputState = new float[currentBatchSize, brain.brainParameters.stateSize]; int i = 0; foreach (int k in agentKeys) { List <float> state_list = states[k]; for (int j = 0; j < brain.brainParameters.stateSize; j++) { inputState[i, j] = state_list[j]; } i++; } } // Create the observation tensors observationMatrixList = brain.GetObservationMatrixList(agentKeys); // Create the recurrent tensor if (hasRecurrent) { Dictionary <int, float[]> old_memories = brain.CollectMemories(); inputOldMemories = new float[currentBatchSize, brain.brainParameters.memorySize]; int i = 0; foreach (int k in agentKeys) { float[] m = old_memories[k]; for (int j = 0; j < brain.brainParameters.memorySize; j++) { inputOldMemories[i, j] = m[j]; } i++; } } if (coord != null) { coord.giveBrainInfo(brain); } #endif }
/// Uses the communicator to send the states, observations, rewards and /// dones outside of Unity public void SendState() { if (coord != null) { coord.giveBrainInfo(brain); } }
/// Nothing to implement, the Player does not use the state to make /// decisions public void SendState() { if (coord != null) { coord.giveBrainInfo(brain); } else { //The states are collected in order to debug the CollectStates method. brain.CollectStates(); } }
public void SendState() { Dictionary <int, List <float> > states = brain.CollectStates(); Dictionary <int, float> rewards = brain.CollectRewards(); Dictionary <int, bool> dones = brain.CollectDones(); if (ctrl == null) { ctrl = brain.brainParameters.syft.controller; } if (coord != null) { coord.giveBrainInfo(brain); } else { if (policy != null) { foreach (KeyValuePair <int, List <float> > idAgent in states) { float raw_reward = rewards[idAgent.Key]; FloatTensor reward = ctrl.floatTensorFactory.Create(_shape: new int[] { 1 }, _data: new float[] { raw_reward }); policy.HookReward(reward); if (dones[idAgent.Key]) { policy.Learn(); } } } } }
/// Collects information from the agents and store them public void SendState() { #if ENABLE_TENSORFLOW agentKeys = new List <int>(brain.agents.Keys); currentBatchSize = brain.agents.Count; if (currentBatchSize == 0) { if (coord != null) { coord.giveBrainInfo(brain); } return; } // Create the state tensor if (hasState) { Dictionary <int, List <float> > states = brain.CollectStates(); inputState = new float[currentBatchSize, brain.brainParameters.stateSize]; int i = 0; foreach (int k in agentKeys) { List <float> state_list = states[k]; for (int j = 0; j < brain.brainParameters.stateSize; j++) { inputState[i, j] = state_list[j]; } i++; } } // Create the observation tensors observationMatrixList = brain.GetObservationMatrixList(agentKeys); float[,,,] data = new float[agentKeys.Count, 20, 20, 7]; for (int i = 0; i < agentKeys.Count; i++) { for (int j = 0; j < 20; j++) { for (int k = 0; k < 20; k++) { for (int c = 0; c < 7; c++) { data[i, j, k, c] = inputState[i, j * 140 + k * 7 + c]; } } } } var tmp = inputState; inputState = new float[currentBatchSize, 8]; for (int i = 0; i < agentKeys.Count; i++) { for (int j = 0; j < 8; j++) { inputState[i, j] = tmp[i, 2800 + j]; } } observationMatrixList.Add(data); // Create the recurrent tensor if (hasRecurrent) { Dictionary <int, float[]> old_memories = brain.CollectMemories(); inputOldMemories = new float[currentBatchSize, brain.brainParameters.memorySize]; int i = 0; foreach (int k in agentKeys) { float[] m = old_memories[k]; for (int j = 0; j < brain.brainParameters.memorySize; j++) { inputOldMemories[i, j] = m[j]; } i++; } } if (coord != null) { coord.giveBrainInfo(brain); } #endif }
/// Uses the communicator to send the states, observations, rewards and /// dones outside of Unity public void SendState() { coord.giveBrainInfo(brain); }