private int SendEvents(DecisionServiceDeployment deployment, WebClient client, int featureIndex, bool sendReward = true) { const float reward = 2.0F; var expectedEvents = 0; string contextString = $"{{a: \"{features[featureIndex]}\"}}"; var responseJObj = InteractionParts1and2(deployment, contextType, contextString); int action = (int)responseJObj["Action"]; // Feature | Action // A --> 1 // B --> 2 // C --> 3 // D --> 4 // only report in 50% of the cases if (sendReward && rnd.NextDouble() < .75 && action - 1 == featureIndex) { InteractionPart3(deployment, responseJObj, reward); expectedEvents = 1; } var stat = string.Format("'{0}' '{1}' ", features[featureIndex], action); int count; if (freq.TryGetValue(stat, out count)) { freq[stat]++; } else { freq.Add(stat, count); } return(expectedEvents); }
public JObject InteractionParts1and2(DecisionServiceDeployment deployment, string contextType, string contextString) { string contextUri = string.Format(CultureInfo.InvariantCulture, "{0}/API/{1}", deployment.ManagementCenterUrl, contextType); byte[] context = System.Text.Encoding.ASCII.GetBytes(contextString); var response = wc.UploadData(contextUri, "POST", context); var utf8response = UnicodeEncoding.UTF8.GetString(response); JObject responseJObj = JObject.Parse(utf8response); return(responseJObj); }
public string InteractionPart3(DecisionServiceDeployment deployment, JObject responseJObj, float reward) { string eventID = (string)responseJObj["EventId"]; string rewardUri = string.Format(CultureInfo.InvariantCulture, "{0}/API/reward/?eventId={1}", deployment.ManagementCenterUrl, eventID); string rewardString = reward.ToString(); byte[] rewardBytes = System.Text.Encoding.ASCII.GetBytes(rewardString); var response = wc.UploadData(rewardUri, "POST", rewardBytes); string utf8response = UnicodeEncoding.UTF8.GetString(response); return(utf8response); }
public void E2ERankerStochasticRewards(DecisionServiceDeployment deployment) { // Create configuration for the decision service float initialEpsilon = .5f; deployment.ConfigureDecisionService(trainArguments: "--cb_explore_adf --cb_type dr -q :: --epsilon 0.2", initialExplorationEpsilon: initialEpsilon); string settingsBlobUri = deployment.SettingsUrl; deployment.OnlineTrainerWaitForStartup(); float percentCorrect = UploadFoodContextData(deployment, settingsBlobUri, firstPass: true); Assert.IsTrue(percentCorrect < initialEpsilon); percentCorrect = UploadFoodContextData(deployment, settingsBlobUri, firstPass: false); Assert.IsTrue(percentCorrect > .8f); }
public async Task SimplePolicyTest(DecisionServiceDeployment deployment) { deployment.OnlineTrainerWaitForStartup(); deployment.ConfigureDecisionService("--cb_explore 4 --epsilon 0", initialExplorationEpsilon: 1, isExplorationEnabled: true); // 4 Actions // why does this need to be different from default? var config = new DecisionServiceConfiguration(deployment.SettingsUrl) { InteractionUploadConfiguration = new BatchingConfiguration { MaxEventCount = 64 }, ObservationUploadConfiguration = new BatchingConfiguration { MaxEventCount = 64 }, PollingForModelPeriod = TimeSpan.FromMinutes(5) }; config.InteractionUploadConfiguration.ErrorHandler += JoinServiceBatchConfiguration_ErrorHandler; config.InteractionUploadConfiguration.SuccessHandler += JoinServiceBatchConfiguration_SuccessHandler; this.features = new string[] { "a", "b", "c", "d" }; this.freq = new Dictionary <string, int>(); this.rnd = new Random(123); deployment.OnlineTrainerReset(); { var expectedEvents = 0; using (var client = Microsoft.Research.MultiWorldTesting.ClientLibrary.DecisionService.Create <MyContext>(config)) { for (int i = 0; i < 50; i++) { expectedEvents += SendEvents(client, 128); // Thread.Sleep(500); } } // TODO: flush doesn't work // Assert.AreEqual(expectedEvents, this.eventCount); } // 4 actions times 4 feature values Assert.AreEqual(4 * 4, freq.Keys.Count); Console.WriteLine("Exploration"); var total = freq.Values.Sum(); foreach (var k in freq.Keys.OrderBy(k => k)) { var f = freq[k] / (float)total; Assert.IsTrue(f < 0.08); Console.WriteLine("{0} | {1}", k, f); } freq.Clear(); await Task.Delay(TimeSpan.FromMinutes(2)); // TODO: update eps: 0 using (var client = Microsoft.Research.MultiWorldTesting.ClientLibrary.DecisionService.Create <MyContext>(config)) { int i; for (i = 0; i < 120; i++) { try { client.DownloadModelAndUpdate(new System.Threading.CancellationToken()).Wait(); break; } catch (Exception) { await Task.Delay(TimeSpan.FromSeconds(1)); } } Assert.IsTrue(i < 30, "Unable to download model"); for (i = 0; i < 1024; i++) { var key = Guid.NewGuid().ToString(); var featureIndex = i % features.Length; var action = client.ChooseAction(key, new MyContext { Feature = features[featureIndex] }); var stat = string.Format("'{0}' '{1}' ", features[featureIndex], action); int count; if (freq.TryGetValue(stat, out count)) { freq[stat]++; } else { freq.Add(stat, count); } } } Console.WriteLine("Exploitation"); total = freq.Values.Sum(); foreach (var k in freq.Keys.OrderBy(k => k)) { var f = freq[k] / (float)total; Assert.AreEqual(0.25f, f, 0.1); Console.WriteLine("{0} | {1}", k, f); } }
private float UploadFoodContextData(DecisionServiceDeployment deployment, string settingsBlobUri, bool firstPass) { var serviceConfig = new DecisionServiceConfiguration(settingsBlobUri); if (firstPass) { serviceConfig.PollingForModelPeriod = TimeSpan.MinValue; deployment.OnlineTrainerReset(); } using (var service = DecisionService.Create <FoodContext>(serviceConfig)) { if (!firstPass) { Thread.Sleep(10000); } string uniqueKey = "scratch-key-gal"; string[] locations = { "HealthyTown", "LessHealthyTown" }; var rg = new Random(uniqueKey.GetHashCode()); int numActions = 3; // ["Hamburger deal 1", "Hamburger deal 2" (better), "Salad deal"] var csv = new StringBuilder(); int counterCorrect = 0; int counterTotal = 0; var header = "Location,Action,Reward"; csv.AppendLine(header); // number of iterations for (int i = 0; i < 10000 * locations.Length; i++) { // randomly select a location int iL = rg.Next(0, locations.Length); string location = locations[iL]; DateTime timeStamp = DateTime.UtcNow; string key = uniqueKey + Guid.NewGuid().ToString(); FoodContext currentContext = new FoodContext(); currentContext.UserLocation = location; currentContext.Actions = Enumerable.Range(1, numActions).ToArray(); int[] action = service.ChooseRanking(key, currentContext); counterTotal += 1; // We expect healthy town to get salad and unhealthy town to get the second burger (action 2) if (location.Equals("HealthyTown") && action[0] == 3) { counterCorrect += 1; } else if (location.Equals("LessHealthyTown") && action[0] == 2) { counterCorrect += 1; } var csvLocation = location; var csvAction = action[0].ToString(); float reward = 0; double currentRand = rg.NextDouble(); if (location.Equals("HealthyTown")) { // for healthy town, buy burger 1 with probability 0.1, burger 2 with probability 0.15, salad with probability 0.6 if ((action[0] == 1 && currentRand < 0.1) || (action[0] == 2 && currentRand < 0.15) || (action[0] == 3 && currentRand < 0.6)) { reward = 10; } } else { // for unhealthy town, buy burger 1 with probability 0.4, burger 2 with probability 0.6, salad with probability 0.2 if ((action[0] == 1 && currentRand < 0.4) || (action[0] == 2 && currentRand < 0.6) || (action[0] == 3 && currentRand < 0.2)) { reward = 10; } } service.ReportReward(reward, key); var newLine = string.Format("{0},{1},{2}", csvLocation, csvAction, "0"); csv.AppendLine(newLine); System.Threading.Thread.Sleep(1); } return((float)counterCorrect / counterTotal); } }