public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable <AgentInfoSensorsPair> infos) { TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); var vecObsSizeT = tensorProxy.shape[tensorProxy.shape.Length - 1]; var agentIndex = 0; foreach (var info in infos) { if (info.agentInfo.done) { // If the agent is done, we might have a stale reference to the sensors // e.g. a dependent object might have been disposed. // To avoid this, just fill observation with zeroes instead of calling sensor.Write. TensorUtils.FillTensorBatch(tensorProxy, agentIndex, 0.0f); } else { var tensorOffset = 0; // Write each sensor consecutively to the tensor foreach (var sensorIndex in m_SensorIndices) { var sensor = info.sensors[sensorIndex]; m_ObservationWriter.SetTarget(tensorProxy, agentIndex, tensorOffset); var numWritten = sensor.Write(m_ObservationWriter); tensorOffset += numWritten; } Debug.AssertFormat( tensorOffset == vecObsSizeT, "mismatch between vector observation size ({0}) and number of observations written ({1})", vecObsSizeT, tensorOffset ); } agentIndex++; } }
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable <AgentInfoSensorsPair> infos) { tensorProxy.data?.Dispose(); tensorProxy.data = m_Allocator.Alloc(new TensorShape(1, 1)); tensorProxy.data[0] = batchSize; }
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable <AgentInfoSensorsPair> infos) { TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); TensorUtils.FillTensorWithRandomNormal(tensorProxy, m_RandomNormal); }
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable <AgentInfoSensorsPair> infos) { TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); }
public void Apply(TensorProxy tensorProxy, IEnumerable <int> actionIds, Dictionary <int, float[]> lastActions) { //var tensorDataProbabilities = tensorProxy.Data as float[,]; var idActionPairList = actionIds as List <int> ?? actionIds.ToList(); var batchSize = idActionPairList.Count; var actionValues = new float[batchSize, m_ActionSize.Length]; var startActionIndices = Utilities.CumSum(m_ActionSize); for (var actionIndex = 0; actionIndex < m_ActionSize.Length; actionIndex++) { var nBranchAction = m_ActionSize[actionIndex]; var actionProbs = new TensorProxy() { valueType = TensorProxy.TensorType.FloatingPoint, shape = new long[] { batchSize, nBranchAction }, data = m_Allocator.Alloc(new TensorShape(batchSize, nBranchAction)) }; for (var batchIndex = 0; batchIndex < batchSize; batchIndex++) { for (var branchActionIndex = 0; branchActionIndex < nBranchAction; branchActionIndex++) { actionProbs.data[batchIndex, branchActionIndex] = tensorProxy.data[batchIndex, startActionIndices[actionIndex] + branchActionIndex]; } } var outputTensor = new TensorProxy() { valueType = TensorProxy.TensorType.FloatingPoint, shape = new long[] { batchSize, 1 }, data = m_Allocator.Alloc(new TensorShape(batchSize, 1)) }; Eval(actionProbs, outputTensor, m_Multinomial); for (var ii = 0; ii < batchSize; ii++) { actionValues[ii, actionIndex] = outputTensor.data[ii, 0]; } actionProbs.data.Dispose(); outputTensor.data.Dispose(); } var agentIndex = 0; foreach (int agentId in actionIds) { if (lastActions.ContainsKey(agentId)) { var actionVal = lastActions[agentId]; if (actionVal == null) { actionVal = new float[m_ActionSize.Length]; lastActions[agentId] = actionVal; } for (var j = 0; j < m_ActionSize.Length; j++) { actionVal[j] = actionValues[agentIndex, j]; } } agentIndex++; } }