Пример #1
0
        public void EpsilonGreedyFixedActionUsingVariableActionInterface()
        {
            int   numActions  = 10;
            float epsilon     = 0f;
            var   policy      = new TestPolicy <VariableActionTestContext>();
            var   testContext = new VariableActionTestContext(numActions);
            var   explorer    = new EpsilonGreedyExplorer(epsilon);

            EpsilonGreedyWithContext(numActions, testContext, policy, explorer);
        }
Пример #2
0
        public void GenericFixedActionUsingVariableActionInterface()
        {
            int numActions  = 10;
            var scorer      = new TestScorer <VariableActionTestContext>(1, numActions);
            var testContext = new VariableActionTestContext(numActions)
            {
                Id = 100
            };
            var explorer = new GenericExplorer();

            GenericWithContext(numActions, testContext, explorer, scorer);
        }
Пример #3
0
        public void TauFirstFixedActionUsingVariableActionInterface()
        {
            int numActions  = 10;
            int tau         = 0;
            var testContext = new VariableActionTestContext(numActions)
            {
                Id = 100
            };
            var policy   = new TestPolicy <VariableActionTestContext>();
            var explorer = new TauFirstExplorer(tau);

            TauFirstWithContext(numActions, testContext, policy, explorer);
        }
Пример #4
0
        public void SoftmaxFixedActionUsingVariableActionInterface()
        {
            int   numActions      = 10;
            float lambda          = 0.5f;
            int   numActionsCover = 100;
            float C        = 5;
            var   scorer   = new TestScorer <VariableActionTestContext>(1, numActions);
            var   explorer = new SoftmaxExplorer(lambda);

            int numDecisions = (int)(numActions * Math.Log(numActions * 1.0) + Math.Log(numActionsCover * 1.0 / numActions) * C * numActions);
            var contexts     = new VariableActionTestContext[numDecisions];

            for (int i = 0; i < numDecisions; i++)
            {
                contexts[i] = new VariableActionTestContext(numActions)
                {
                    Id = i
                };
            }

            SoftmaxWithContext(numActions, explorer, scorer, contexts);
        }