public void ApplyDiscreteActionOutputLegacy()
        {
            var actionSpec  = ActionSpec.MakeDiscrete(2, 3);
            var inputTensor = new TensorProxy()
            {
                shape = new long[] { 2, 5 },
                data  = new Tensor(
                    2,
                    5,
                    new[] { 0.5f, 22.5f, 0.1f, 5f, 1f, 4f, 5f, 6f, 7f, 8f })
            };
            var alloc   = new TensorCachingAllocator();
            var applier = new LegacyDiscreteActionOutputApplier(actionSpec, 0, alloc);

            var agentIds = new List <int>()
            {
                0, 1
            };
            // Dictionary from AgentId to Action
            var actionDict = new Dictionary <int, ActionBuffers>()
            {
                { 0, ActionBuffers.Empty }, { 1, ActionBuffers.Empty }
            };


            applier.Apply(inputTensor, agentIds, actionDict);

            Assert.AreEqual(actionDict[0].DiscreteActions[0], 1);
            Assert.AreEqual(actionDict[0].DiscreteActions[1], 1);

            Assert.AreEqual(actionDict[1].DiscreteActions[0], 1);
            Assert.AreEqual(actionDict[1].DiscreteActions[1], 2);
            alloc.Dispose();
        }
        public void ApplyHybridActionOutputLegacy()
        {
            var actionSpec            = new ActionSpec(3, new[] { 2, 3 });
            var continuousInputTensor = new TensorProxy()
            {
                shape = new long[] { 2, 3 },
                data  = new Tensor(2, 3, new float[] { 1, 2, 3, 4, 5, 6 })
            };
            var discreteInputTensor = new TensorProxy()
            {
                shape = new long[] { 2, 8 },
                data  = new Tensor(
                    2,
                    5,
                    new[] { 0.5f, 22.5f, 0.1f, 5f, 1f, 4f, 5f, 6f, 7f, 8f })
            };
            var continuousApplier = new ContinuousActionOutputApplier(actionSpec);
            var alloc             = new TensorCachingAllocator();
            var discreteApplier   = new LegacyDiscreteActionOutputApplier(actionSpec, 0, alloc);

            var agentIds = new List <int>()
            {
                0, 1
            };
            // Dictionary from AgentId to Action
            var actionDict = new Dictionary <int, ActionBuffers>()
            {
                { 0, ActionBuffers.Empty }, { 1, ActionBuffers.Empty }
            };


            continuousApplier.Apply(continuousInputTensor, agentIds, actionDict);
            discreteApplier.Apply(discreteInputTensor, agentIds, actionDict);

            Assert.AreEqual(actionDict[0].ContinuousActions[0], 1);
            Assert.AreEqual(actionDict[0].ContinuousActions[1], 2);
            Assert.AreEqual(actionDict[0].ContinuousActions[2], 3);
            Assert.AreEqual(actionDict[0].DiscreteActions[0], 1);
            Assert.AreEqual(actionDict[0].DiscreteActions[1], 1);

            Assert.AreEqual(actionDict[1].ContinuousActions[0], 4);
            Assert.AreEqual(actionDict[1].ContinuousActions[1], 5);
            Assert.AreEqual(actionDict[1].ContinuousActions[2], 6);
            Assert.AreEqual(actionDict[1].DiscreteActions[0], 1);
            Assert.AreEqual(actionDict[1].DiscreteActions[1], 2);
            alloc.Dispose();
        }
Ejemplo n.º 3
0
        public void TestDiscreteApply()
        {
            var         actionSpec   = ActionSpec.MakeDiscrete(3, 2);
            const float smallLogProb = -1000.0f;
            const float largeLogProb = -1.0f;

            var logProbs = new TensorProxy
            {
                data = new Tensor(
                    2,
                    5,
                    new[]
                {
                    smallLogProb, smallLogProb, largeLogProb,     // Agent 0, branch 0
                    smallLogProb, largeLogProb,                   // Agent 0, branch 1
                    largeLogProb, smallLogProb, smallLogProb,     // Agent 1, branch 0
                    largeLogProb, smallLogProb,                   // Agent 1, branch 1
                }),
                valueType = TensorProxy.TensorType.FloatingPoint
            };

            var applier  = new LegacyDiscreteActionOutputApplier(actionSpec, 2020, null);
            var agentIds = new List <int> {
                42, 1337
            };
            var actionBuffers = new Dictionary <int, ActionBuffers>();

            actionBuffers[42]   = new ActionBuffers(actionSpec);
            actionBuffers[1337] = new ActionBuffers(actionSpec);

            applier.Apply(logProbs, agentIds, actionBuffers);
            Assert.AreEqual(2, actionBuffers[42].DiscreteActions[0]);
            Assert.AreEqual(1, actionBuffers[42].DiscreteActions[1]);

            Assert.AreEqual(0, actionBuffers[1337].DiscreteActions[0]);
            Assert.AreEqual(0, actionBuffers[1337].DiscreteActions[1]);
        }