public override ActionResponse GetActionResponse(int environmentSymbol, float[] state, float reward, bool done) { ActionResponse response = new ActionResponse(); // Send message with (symbol, *state, reward, done) network.SetFloatInBuffer(environmentSymbol, 0); network.SetArrayFloatsInBuffer(state, 1); network.SetFloatInBuffer(reward, observationDimension + 1); network.SetFloatInBuffer(done ? 1f : 0f, observationDimension + 2); network.Send(); // Receive status symbol & action network.ReceiveFloatsAndString(1 + actionDimension, receiveBuffer, ref receivedMessage); AgentActionSymbol agentSymbol = (AgentActionSymbol)Mathf.RoundToInt(receiveBuffer[0]); for (int i = 1; i < receiveBuffer.Length; i++) { actions[i - 1] = receiveBuffer[i]; } // Return response.symbol = agentSymbol; response.actions = actions; response.msg = receivedMessage; return(response); }
// Constructor to use for training, i.e. when Python starts everything public PythonAgentController(Environment environment, int portSend, int portReceive) { this.environment = environment; network = new UDPNetwork(portSend, portReceive); observationDimension = environment.ObservationDimension(); actionDimension = environment.ActionDimension(); network.AllocateSendBuffer(1 + 4 + 4 * observationDimension); /* Send network msg to Python (12 bytes) * 1. port (int = 4 bytes) (port that python sends and unity receives) * 2. observation dimension (int = 4 bytes) * 3. action dimension (int = 4 bytes) */ byte[] obsdimBytes = BitConverter.GetBytes(observationDimension); byte[] actdimBytes = BitConverter.GetBytes(actionDimension); byte[] introMsg = new byte[2 * 4]; introMsg[0] = obsdimBytes[0]; introMsg[1] = obsdimBytes[1]; introMsg[2] = obsdimBytes[2]; introMsg[3] = obsdimBytes[3]; introMsg[4] = actdimBytes[0]; introMsg[5] = actdimBytes[1]; introMsg[6] = actdimBytes[2]; introMsg[7] = actdimBytes[3]; network.Send(introMsg); }