public void EndToEndTauFirst() { uint numActions = 10; uint tau = 5; TestRecorder <SimpleContext> recorder = new TestRecorder <SimpleContext>(); TestSimplePolicy policy = new TestSimplePolicy(); MwtExplorer <SimpleContext> mwtt = new MwtExplorer <SimpleContext>("mwt", recorder); var explorer = new TauFirstExplorer <SimpleContext>(policy, tau, numActions); EndToEnd(mwtt, explorer, recorder); }
public void EndToEndEpsilonGreedy() { uint numActions = 10; float epsilon = 0.5f; TestRecorder <SimpleContext> recorder = new TestRecorder <SimpleContext>(); MwtExplorer <SimpleContext> mwtt = new MwtExplorer <SimpleContext>("mwt", recorder); TestSimplePolicy policy = new TestSimplePolicy(); var explorer = new EpsilonGreedyExplorer <SimpleContext>(policy, epsilon, numActions); EndToEnd(mwtt, explorer, recorder); }
public void EndToEndBagging() { uint numActions = 10; uint numbags = 2; TestRecorder <SimpleContext> recorder = new TestRecorder <SimpleContext>(); TestSimplePolicy[] policies = new TestSimplePolicy[numbags]; for (int i = 0; i < numbags; i++) { policies[i] = new TestSimplePolicy(); } MwtExplorer <SimpleContext> mwtt = new MwtExplorer <SimpleContext>("mwt", recorder); var explorer = new BaggingExplorer <SimpleContext>(policies, numbags, numActions); EndToEnd(mwtt, explorer, recorder); }