public void ApplyDiscreteActionOutputLegacy() { var actionSpec = ActionSpec.MakeDiscrete(2, 3); var inputTensor = new TensorProxy() { shape = new long[] { 2, 5 }, data = new Tensor( 2, 5, new[] { 0.5f, 22.5f, 0.1f, 5f, 1f, 4f, 5f, 6f, 7f, 8f }) }; var alloc = new TensorCachingAllocator(); var applier = new LegacyDiscreteActionOutputApplier(actionSpec, 0, alloc); var agentIds = new List <int>() { 0, 1 }; // Dictionary from AgentId to Action var actionDict = new Dictionary <int, ActionBuffers>() { { 0, ActionBuffers.Empty }, { 1, ActionBuffers.Empty } }; applier.Apply(inputTensor, agentIds, actionDict); Assert.AreEqual(actionDict[0].DiscreteActions[0], 1); Assert.AreEqual(actionDict[0].DiscreteActions[1], 1); Assert.AreEqual(actionDict[1].DiscreteActions[0], 1); Assert.AreEqual(actionDict[1].DiscreteActions[1], 2); alloc.Dispose(); }
public void ApplyHybridActionOutputLegacy() { var actionSpec = new ActionSpec(3, new[] { 2, 3 }); var continuousInputTensor = new TensorProxy() { shape = new long[] { 2, 3 }, data = new Tensor(2, 3, new float[] { 1, 2, 3, 4, 5, 6 }) }; var discreteInputTensor = new TensorProxy() { shape = new long[] { 2, 8 }, data = new Tensor( 2, 5, new[] { 0.5f, 22.5f, 0.1f, 5f, 1f, 4f, 5f, 6f, 7f, 8f }) }; var continuousApplier = new ContinuousActionOutputApplier(actionSpec); var alloc = new TensorCachingAllocator(); var discreteApplier = new LegacyDiscreteActionOutputApplier(actionSpec, 0, alloc); var agentIds = new List <int>() { 0, 1 }; // Dictionary from AgentId to Action var actionDict = new Dictionary <int, ActionBuffers>() { { 0, ActionBuffers.Empty }, { 1, ActionBuffers.Empty } }; continuousApplier.Apply(continuousInputTensor, agentIds, actionDict); discreteApplier.Apply(discreteInputTensor, agentIds, actionDict); Assert.AreEqual(actionDict[0].ContinuousActions[0], 1); Assert.AreEqual(actionDict[0].ContinuousActions[1], 2); Assert.AreEqual(actionDict[0].ContinuousActions[2], 3); Assert.AreEqual(actionDict[0].DiscreteActions[0], 1); Assert.AreEqual(actionDict[0].DiscreteActions[1], 1); Assert.AreEqual(actionDict[1].ContinuousActions[0], 4); Assert.AreEqual(actionDict[1].ContinuousActions[1], 5); Assert.AreEqual(actionDict[1].ContinuousActions[2], 6); Assert.AreEqual(actionDict[1].DiscreteActions[0], 1); Assert.AreEqual(actionDict[1].DiscreteActions[1], 2); alloc.Dispose(); }
public void TestDiscreteApply() { var actionSpec = ActionSpec.MakeDiscrete(3, 2); const float smallLogProb = -1000.0f; const float largeLogProb = -1.0f; var logProbs = new TensorProxy { data = new Tensor( 2, 5, new[] { smallLogProb, smallLogProb, largeLogProb, // Agent 0, branch 0 smallLogProb, largeLogProb, // Agent 0, branch 1 largeLogProb, smallLogProb, smallLogProb, // Agent 1, branch 0 largeLogProb, smallLogProb, // Agent 1, branch 1 }), valueType = TensorProxy.TensorType.FloatingPoint }; var applier = new LegacyDiscreteActionOutputApplier(actionSpec, 2020, null); var agentIds = new List <int> { 42, 1337 }; var actionBuffers = new Dictionary <int, ActionBuffers>(); actionBuffers[42] = new ActionBuffers(actionSpec); actionBuffers[1337] = new ActionBuffers(actionSpec); applier.Apply(logProbs, agentIds, actionBuffers); Assert.AreEqual(2, actionBuffers[42].DiscreteActions[0]); Assert.AreEqual(1, actionBuffers[42].DiscreteActions[1]); Assert.AreEqual(0, actionBuffers[1337].DiscreteActions[0]); Assert.AreEqual(0, actionBuffers[1337].DiscreteActions[1]); }