public async Task TestDSLocalModelUpdate() { string vwArgs = "--cb_explore_adf --epsilon 0.2 --cb_type dr -q ::"; DecisionServiceLocal <FoodContext> dsLocal = new DecisionServiceLocal <FoodContext>(vwArgs, 1, TimeSpan.MaxValue); var context = new FoodContext { Actions = new int[] { 1, 2, 3 }, UserLocation = "HealthyTown" }; string guid1 = Guid.NewGuid().ToString(); string guid2 = Guid.NewGuid().ToString(); byte[] prevModel = null; // Generate interactions and ensure the model updates at the right frequency // (updates every example initially) prevModel = dsLocal.Model; await dsLocal.ChooseActionAsync(guid1, context, 1); dsLocal.ReportRewardAndComplete((float)1.0, guid1); Assert.IsTrue(!dsLocal.Model.SequenceEqual(prevModel)); // Set the model to update every two examples prevModel = dsLocal.Model; dsLocal.ModelUpdateInterval = 2; await dsLocal.ChooseActionAsync(guid1, context, 1); dsLocal.ReportRewardAndComplete((float)1.0, guid1); Assert.IsFalse(!dsLocal.Model.SequenceEqual(prevModel)); await dsLocal.ChooseActionAsync(guid2, context, 1); dsLocal.ReportRewardAndComplete((float)2.0, guid1); Assert.IsTrue(!dsLocal.Model.SequenceEqual(prevModel)); }
public void TestDSLocalConcurrent() { const float Eps = 0.2f; string vwArgs = "--cb_explore_adf --epsilon " + Eps.ToString(); DecisionServiceLocal <SimpleADFContext> dsLocal = new DecisionServiceLocal <SimpleADFContext>(vwArgs, 1, TimeSpan.MaxValue); var context = new SimpleADFContext { Id = "Shared", Actions = new int[] { 1, 2, 3 } }; const int NumThreads = 16; const int NumEventsPerThread = 25; List <Thread> threads = new List <Thread>(NumThreads); int[] targetActionCnts = Enumerable.Repeat <int>(0, NumThreads).ToArray(); int idCounter = 0; for (int i = 0; i < NumThreads; i++) { threads.Add(new Thread(() => { int id = Interlocked.Increment(ref idCounter) - 1; Console.WriteLine("in thread {0}", id); int action; for (int j = 0; j < NumEventsPerThread; j++) { string guid = Guid.NewGuid().ToString(); action = dsLocal.ChooseActionAsync(guid, context, 1).Result; dsLocal.ReportRewardAndComplete((action == 2) ? 1.0f : 0.0f, guid); targetActionCnts[id] += (action == 2) ? 1 : 0; } })); } foreach (Thread t in threads) { t.Start(); } foreach (Thread t in threads) { t.Join(); } // Since the model is updated after each datapoint, we expect most exploit predictions // (1 - Eps) to be the middle action, but allow fro some slack. Console.WriteLine("Sum of target is {0}, total is {1}", targetActionCnts.Sum(), NumThreads * NumEventsPerThread); Assert.IsTrue(targetActionCnts.Sum() * 1.0 / (NumThreads * NumEventsPerThread) >= (1 - Eps) * 0.9); }
public void TestDSLocalModelLearning() { const int NumEvents = 100; const float Eps = 0.2f; string vwArgs = "--cb_explore_adf --epsilon " + Eps.ToString(); // Test both generic class and json string typed versions of DS local DecisionServiceLocal <SimpleADFContext> dsLocal = new DecisionServiceLocal <SimpleADFContext>(vwArgs, 1, TimeSpan.MaxValue); DecisionServiceLocal <string> dsLocalJson = new DecisionServiceLocal <string>(vwArgs, 1, TimeSpan.MaxValue); var context = new SimpleADFContext { Id = "Shared", Actions = new int[] { 1, 2, 3 } }; int action, actionJson; int targetActionCnt = 0, targetActionJsonCnt = 0; // Generate interactions and reward the model for the middle action only (learning the // lowest/highest can be done even with bad featurization, which we want to catch). for (int i = 0; i < NumEvents; i++) { string guid = Guid.NewGuid().ToString(); // Test generic class type action = dsLocal.ChooseActionAsync(guid, context, 1).Result; dsLocal.ReportRewardAndComplete((action == 2) ? 1.0f : 0.0f, guid); targetActionCnt += (action == 2) ? 1 : 0; string contextJson = JsonConvert.SerializeObject(context); actionJson = dsLocalJson.ChooseActionAsync(guid, contextJson, 1).Result; //TODO: The examples should look identical to VW, so predictions should be identical //Assert.IsTrue(action == actionJson); dsLocalJson.ReportRewardAndComplete((actionJson == 2) ? 1.0f : 0.0f, guid); targetActionJsonCnt += (actionJson == 2) ? 1 : 0; } // Since the model is updated after each datapoint, we expect most exploit predictions // (1 - Eps) to be the middle action, but allow fro some slack. Assert.IsTrue(targetActionCnt * 1.0 / NumEvents >= (1 - Eps) * 0.9); Assert.IsTrue(targetActionJsonCnt * 1.0 / NumEvents >= (1 - Eps) * 0.9); }