public void GenericFixedActionUsingVariableActionInterface() { int numActions = 10; var scorer = new TestScorer <VariableActionTestContext>(1, numActions); var testContext = new VariableActionTestContext(numActions) { Id = 100 }; var explorer = new GenericExplorer(); GenericWithContext(numActions, testContext, explorer, scorer); }
public void Generic() { int numActions = 10; var scorer = new TestScorer <RegularTestContext>(1, numActions); RegularTestContext testContext = new RegularTestContext() { Id = 100 }; var explorer = new GenericExplorer(); GenericWithContext(numActions, testContext, explorer, scorer); }
public void Softmax() { int numActions = 10; float lambda = 0.5f; int numActionsCover = 100; float C = 5; var scorer = new TestScorer <RegularTestContext>(1, numActions); var explorer = new SoftmaxExplorer(lambda); uint numDecisions = (uint)(numActions * Math.Log(numActions * 1.0) + Math.Log(numActionsCover * 1.0 / numActions) * C * numActions); var contexts = new RegularTestContext[numDecisions]; for (int i = 0; i < numDecisions; i++) { contexts[i] = new RegularTestContext { Id = i }; } SoftmaxWithContext(numActions, explorer, scorer, contexts); }
public void SoftmaxFixedActionUsingVariableActionInterface() { int numActions = 10; float lambda = 0.5f; int numActionsCover = 100; float C = 5; var scorer = new TestScorer <VariableActionTestContext>(1, numActions); var explorer = new SoftmaxExplorer(lambda); int numDecisions = (int)(numActions * Math.Log(numActions * 1.0) + Math.Log(numActionsCover * 1.0 / numActions) * C * numActions); var contexts = new VariableActionTestContext[numDecisions]; for (int i = 0; i < numDecisions; i++) { contexts[i] = new VariableActionTestContext(numActions) { Id = i }; } SoftmaxWithContext(numActions, explorer, scorer, contexts); }
public TestWordAligner(SegmentPool segmentPool) : base(new WordPairAlignerSettings()) { _scorer = new TestScorer(segmentPool); }
public void SoftmaxScores() { int numActions = 10; float lambda = 0.5f; var recorder = new TestRecorder <RegularTestContext>(); var scorer = new TestScorer <RegularTestContext>(1, numActions, uniform: false); //var mwtt = new MwtExplorer<RegularTestContext>("mwt", recorder); var explorer = new SoftmaxExplorer(lambda); var mwtt = MwtExplorer.Create("mwt", numActions, recorder, explorer, scorer); Random rand = new Random(); mwtt.ChooseAction(rand.NextDouble().ToString(), new RegularTestContext() { Id = 100 }); mwtt.ChooseAction(rand.NextDouble().ToString(), new RegularTestContext() { Id = 101 }); mwtt.ChooseAction(rand.NextDouble().ToString(), new RegularTestContext() { Id = 102 }); var interactions = recorder.GetAllInteractions(); Assert.AreEqual(3, interactions.Count); for (int i = 0; i < interactions.Count; i++) { // Scores are not equal therefore probabilities should not be uniform Assert.AreNotEqual(interactions[i].Probability, 1.0f / numActions); Assert.AreEqual(100 + i, interactions[i].Context.Id); } // Verify that policy action is chosen all the time RegularTestContext context = new RegularTestContext { Id = 100 }; List <float> scores = scorer.MapContext(context).Value.ToList(); float maxScore = 0; int highestScoreAction = 0; for (int i = 0; i < scores.Count; i++) { if (maxScore < scores[i]) { maxScore = scores[i]; highestScoreAction = i + 1; } } explorer.EnableExplore(false); for (int i = 0; i < 1000; i++) { int chosenAction = mwtt.ChooseAction(rand.NextDouble().ToString(), new RegularTestContext() { Id = (int)i }); Assert.AreEqual(highestScoreAction, chosenAction); } }
static void ExploreGeneric <TContext> ( string appId, int policyType, JToken configPolicy, int numActions, string[] experimentalUnitIdList, TContext[] contextList, string outputFile ) { var recorder = new StringRecorder <TContext>(); bool isVariableActionContext = typeof(IVariableActionContext).IsAssignableFrom(typeof(TContext)); switch (policyType) { case 0: // fixed all-equal scorer { var scorerScore = configPolicy["Score"].Value <int>(); var scorer = new TestScorer <TContext>(scorerScore, numActions); var explorer = new GenericExplorer(); var mwt = isVariableActionContext ? MwtExplorer.Create(appId, new VariableActionProvider <TContext>(), recorder, explorer, scorer) : MwtExplorer.Create(appId, numActions, recorder, explorer, scorer); for (int i = 0; i < experimentalUnitIdList.Length; i++) { int numActionsVariable = isVariableActionContext ? ((IVariableActionContext)contextList[i]).GetNumberOfActions() : int.MaxValue; mwt.ChooseAction(experimentalUnitIdList[i], contextList[i]); } File.AppendAllText(outputFile, recorder.GetRecording()); break; } case 1: // integer-progression scorer { var scorerStartScore = configPolicy["Start"].Value <int>(); var scorer = new TestScorer <TContext>(scorerStartScore, numActions, uniform: false); var explorer = new GenericExplorer(); var mwt = isVariableActionContext ? MwtExplorer.Create(appId, new VariableActionProvider <TContext>(), recorder, explorer, scorer) : MwtExplorer.Create(appId, numActions, recorder, explorer, scorer); for (int i = 0; i < experimentalUnitIdList.Length; i++) { int numActionsVariable = isVariableActionContext ? ((IVariableActionContext)contextList[i]).GetNumberOfActions() : int.MaxValue; mwt.ChooseAction(experimentalUnitIdList[i], contextList[i]); } File.AppendAllText(outputFile, recorder.GetRecording()); break; } } }