Пример #1
0
        public void GenericFixedActionUsingVariableActionInterface()
        {
            int numActions  = 10;
            var scorer      = new TestScorer <VariableActionTestContext>(1, numActions);
            var testContext = new VariableActionTestContext(numActions)
            {
                Id = 100
            };
            var explorer = new GenericExplorer();

            GenericWithContext(numActions, testContext, explorer, scorer);
        }
Пример #2
0
        public void Generic()
        {
            int numActions = 10;
            var scorer     = new TestScorer <RegularTestContext>(1, numActions);
            RegularTestContext testContext = new RegularTestContext()
            {
                Id = 100
            };
            var explorer = new GenericExplorer();

            GenericWithContext(numActions, testContext, explorer, scorer);
        }
Пример #3
0
        public void Softmax()
        {
            int   numActions      = 10;
            float lambda          = 0.5f;
            int   numActionsCover = 100;
            float C        = 5;
            var   scorer   = new TestScorer <RegularTestContext>(1, numActions);
            var   explorer = new SoftmaxExplorer(lambda);

            uint numDecisions = (uint)(numActions * Math.Log(numActions * 1.0) + Math.Log(numActionsCover * 1.0 / numActions) * C * numActions);
            var  contexts     = new RegularTestContext[numDecisions];

            for (int i = 0; i < numDecisions; i++)
            {
                contexts[i] = new RegularTestContext {
                    Id = i
                };
            }

            SoftmaxWithContext(numActions, explorer, scorer, contexts);
        }
Пример #4
0
        public void SoftmaxFixedActionUsingVariableActionInterface()
        {
            int   numActions      = 10;
            float lambda          = 0.5f;
            int   numActionsCover = 100;
            float C        = 5;
            var   scorer   = new TestScorer <VariableActionTestContext>(1, numActions);
            var   explorer = new SoftmaxExplorer(lambda);

            int numDecisions = (int)(numActions * Math.Log(numActions * 1.0) + Math.Log(numActionsCover * 1.0 / numActions) * C * numActions);
            var contexts     = new VariableActionTestContext[numDecisions];

            for (int i = 0; i < numDecisions; i++)
            {
                contexts[i] = new VariableActionTestContext(numActions)
                {
                    Id = i
                };
            }

            SoftmaxWithContext(numActions, explorer, scorer, contexts);
        }
Пример #5
0
 public TestWordAligner(SegmentPool segmentPool)
     : base(new WordPairAlignerSettings())
 {
     _scorer = new TestScorer(segmentPool);
 }
Пример #6
0
        public void SoftmaxScores()
        {
            int   numActions = 10;
            float lambda     = 0.5f;
            var   recorder   = new TestRecorder <RegularTestContext>();
            var   scorer     = new TestScorer <RegularTestContext>(1, numActions, uniform: false);

            //var mwtt = new MwtExplorer<RegularTestContext>("mwt", recorder);
            var explorer = new SoftmaxExplorer(lambda);

            var mwtt = MwtExplorer.Create("mwt", numActions, recorder, explorer, scorer);

            Random rand = new Random();

            mwtt.ChooseAction(rand.NextDouble().ToString(), new RegularTestContext()
            {
                Id = 100
            });
            mwtt.ChooseAction(rand.NextDouble().ToString(), new RegularTestContext()
            {
                Id = 101
            });
            mwtt.ChooseAction(rand.NextDouble().ToString(), new RegularTestContext()
            {
                Id = 102
            });

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(3, interactions.Count);

            for (int i = 0; i < interactions.Count; i++)
            {
                // Scores are not equal therefore probabilities should not be uniform
                Assert.AreNotEqual(interactions[i].Probability, 1.0f / numActions);
                Assert.AreEqual(100 + i, interactions[i].Context.Id);
            }

            // Verify that policy action is chosen all the time
            RegularTestContext context = new RegularTestContext {
                Id = 100
            };
            List <float> scores             = scorer.MapContext(context).Value.ToList();
            float        maxScore           = 0;
            int          highestScoreAction = 0;

            for (int i = 0; i < scores.Count; i++)
            {
                if (maxScore < scores[i])
                {
                    maxScore           = scores[i];
                    highestScoreAction = i + 1;
                }
            }

            explorer.EnableExplore(false);
            for (int i = 0; i < 1000; i++)
            {
                int chosenAction = mwtt.ChooseAction(rand.NextDouble().ToString(), new RegularTestContext()
                {
                    Id = (int)i
                });
                Assert.AreEqual(highestScoreAction, chosenAction);
            }
        }
Пример #7
0
        static void ExploreGeneric <TContext>
        (
            string appId,
            int policyType,
            JToken configPolicy,
            int numActions,
            string[] experimentalUnitIdList,
            TContext[] contextList,
            string outputFile
        )
        {
            var recorder = new StringRecorder <TContext>();

            bool isVariableActionContext = typeof(IVariableActionContext).IsAssignableFrom(typeof(TContext));

            switch (policyType)
            {
            case 0:     // fixed all-equal scorer
            {
                var scorerScore = configPolicy["Score"].Value <int>();

                var scorer = new TestScorer <TContext>(scorerScore, numActions);

                var explorer = new GenericExplorer();

                var mwt = isVariableActionContext ?
                          MwtExplorer.Create(appId, new VariableActionProvider <TContext>(), recorder, explorer, scorer) :
                          MwtExplorer.Create(appId, numActions, recorder, explorer, scorer);

                for (int i = 0; i < experimentalUnitIdList.Length; i++)
                {
                    int numActionsVariable = isVariableActionContext ? ((IVariableActionContext)contextList[i]).GetNumberOfActions() : int.MaxValue;
                    mwt.ChooseAction(experimentalUnitIdList[i], contextList[i]);
                }

                File.AppendAllText(outputFile, recorder.GetRecording());

                break;
            }

            case 1:     // integer-progression scorer
            {
                var scorerStartScore = configPolicy["Start"].Value <int>();

                var scorer = new TestScorer <TContext>(scorerStartScore, numActions, uniform: false);

                var explorer = new GenericExplorer();

                var mwt = isVariableActionContext ?
                          MwtExplorer.Create(appId, new VariableActionProvider <TContext>(), recorder, explorer, scorer) :
                          MwtExplorer.Create(appId, numActions, recorder, explorer, scorer);

                for (int i = 0; i < experimentalUnitIdList.Length; i++)
                {
                    int numActionsVariable = isVariableActionContext ? ((IVariableActionContext)contextList[i]).GetNumberOfActions() : int.MaxValue;
                    mwt.ChooseAction(experimentalUnitIdList[i], contextList[i]);
                }

                File.AppendAllText(outputFile, recorder.GetRecording());

                break;
            }
            }
        }