private int SendEvents(DecisionServiceDeployment deployment, WebClient client, int featureIndex, bool sendReward = true)
        {
            const float reward = 2.0F;

            var    expectedEvents = 0;
            string contextString  = $"{{a: \"{features[featureIndex]}\"}}";
            var    responseJObj   = InteractionParts1and2(deployment, contextType, contextString);
            int    action         = (int)responseJObj["Action"];

            // Feature | Action
            //    A   -->  1
            //    B   -->  2
            //    C   -->  3
            //    D   -->  4
            // only report in 50% of the cases
            if (sendReward && rnd.NextDouble() < .75 && action - 1 == featureIndex)
            {
                InteractionPart3(deployment, responseJObj, reward);
                expectedEvents = 1;
            }

            var stat = string.Format("'{0}' '{1}' ", features[featureIndex], action);
            int count;

            if (freq.TryGetValue(stat, out count))
            {
                freq[stat]++;
            }
            else
            {
                freq.Add(stat, count);
            }

            return(expectedEvents);
        }
        public JObject InteractionParts1and2(DecisionServiceDeployment deployment, string contextType, string contextString)
        {
            string contextUri = string.Format(CultureInfo.InvariantCulture, "{0}/API/{1}", deployment.ManagementCenterUrl, contextType);

            byte[]  context      = System.Text.Encoding.ASCII.GetBytes(contextString);
            var     response     = wc.UploadData(contextUri, "POST", context);
            var     utf8response = UnicodeEncoding.UTF8.GetString(response);
            JObject responseJObj = JObject.Parse(utf8response);

            return(responseJObj);
        }
        public string InteractionPart3(DecisionServiceDeployment deployment, JObject responseJObj, float reward)
        {
            string eventID      = (string)responseJObj["EventId"];
            string rewardUri    = string.Format(CultureInfo.InvariantCulture, "{0}/API/reward/?eventId={1}", deployment.ManagementCenterUrl, eventID);
            string rewardString = reward.ToString();

            byte[] rewardBytes  = System.Text.Encoding.ASCII.GetBytes(rewardString);
            var    response     = wc.UploadData(rewardUri, "POST", rewardBytes);
            string utf8response = UnicodeEncoding.UTF8.GetString(response);

            return(utf8response);
        }
Exemple #4
0
        public void E2ERankerStochasticRewards(DecisionServiceDeployment deployment)
        {
            // Create configuration for the decision service
            float initialEpsilon = .5f;

            deployment.ConfigureDecisionService(trainArguments: "--cb_explore_adf --cb_type dr -q :: --epsilon 0.2", initialExplorationEpsilon: initialEpsilon);

            string settingsBlobUri = deployment.SettingsUrl;

            deployment.OnlineTrainerWaitForStartup();

            float percentCorrect = UploadFoodContextData(deployment, settingsBlobUri, firstPass: true);

            Assert.IsTrue(percentCorrect < initialEpsilon);

            percentCorrect = UploadFoodContextData(deployment, settingsBlobUri, firstPass: false);
            Assert.IsTrue(percentCorrect > .8f);
        }
Exemple #5
0
        public async Task SimplePolicyTest(DecisionServiceDeployment deployment)
        {
            deployment.OnlineTrainerWaitForStartup();

            deployment.ConfigureDecisionService("--cb_explore 4 --epsilon 0", initialExplorationEpsilon: 1, isExplorationEnabled: true);

            // 4 Actions
            // why does this need to be different from default?
            var config = new DecisionServiceConfiguration(deployment.SettingsUrl)
            {
                InteractionUploadConfiguration = new BatchingConfiguration
                {
                    MaxEventCount = 64
                },
                ObservationUploadConfiguration = new BatchingConfiguration
                {
                    MaxEventCount = 64
                },
                PollingForModelPeriod = TimeSpan.FromMinutes(5)
            };

            config.InteractionUploadConfiguration.ErrorHandler   += JoinServiceBatchConfiguration_ErrorHandler;
            config.InteractionUploadConfiguration.SuccessHandler += JoinServiceBatchConfiguration_SuccessHandler;
            this.features = new string[] { "a", "b", "c", "d" };
            this.freq     = new Dictionary <string, int>();
            this.rnd      = new Random(123);

            deployment.OnlineTrainerReset();

            {
                var expectedEvents = 0;
                using (var client = Microsoft.Research.MultiWorldTesting.ClientLibrary.DecisionService.Create <MyContext>(config))
                {
                    for (int i = 0; i < 50; i++)
                    {
                        expectedEvents += SendEvents(client, 128);
                        // Thread.Sleep(500);
                    }
                }

                // TODO: flush doesn't work
                // Assert.AreEqual(expectedEvents, this.eventCount);
            }

            // 4 actions times 4 feature values
            Assert.AreEqual(4 * 4, freq.Keys.Count);

            Console.WriteLine("Exploration");
            var total = freq.Values.Sum();

            foreach (var k in freq.Keys.OrderBy(k => k))
            {
                var f = freq[k] / (float)total;
                Assert.IsTrue(f < 0.08);
                Console.WriteLine("{0} | {1}", k, f);
            }

            freq.Clear();

            await Task.Delay(TimeSpan.FromMinutes(2));

            // TODO: update eps: 0
            using (var client = Microsoft.Research.MultiWorldTesting.ClientLibrary.DecisionService.Create <MyContext>(config))
            {
                int i;
                for (i = 0; i < 120; i++)
                {
                    try
                    {
                        client.DownloadModelAndUpdate(new System.Threading.CancellationToken()).Wait();
                        break;
                    }
                    catch (Exception)
                    {
                        await Task.Delay(TimeSpan.FromSeconds(1));
                    }
                }

                Assert.IsTrue(i < 30, "Unable to download model");

                for (i = 0; i < 1024; i++)
                {
                    var key = Guid.NewGuid().ToString();

                    var featureIndex = i % features.Length;

                    var action = client.ChooseAction(key, new MyContext {
                        Feature = features[featureIndex]
                    });

                    var stat = string.Format("'{0}' '{1}' ", features[featureIndex], action);
                    int count;
                    if (freq.TryGetValue(stat, out count))
                    {
                        freq[stat]++;
                    }
                    else
                    {
                        freq.Add(stat, count);
                    }
                }
            }

            Console.WriteLine("Exploitation");
            total = freq.Values.Sum();
            foreach (var k in freq.Keys.OrderBy(k => k))
            {
                var f = freq[k] / (float)total;
                Assert.AreEqual(0.25f, f, 0.1);
                Console.WriteLine("{0} | {1}", k, f);
            }
        }
Exemple #6
0
        private float UploadFoodContextData(DecisionServiceDeployment deployment, string settingsBlobUri, bool firstPass)
        {
            var serviceConfig = new DecisionServiceConfiguration(settingsBlobUri);

            if (firstPass)
            {
                serviceConfig.PollingForModelPeriod = TimeSpan.MinValue;
                deployment.OnlineTrainerReset();
            }

            using (var service = DecisionService.Create <FoodContext>(serviceConfig))
            {
                if (!firstPass)
                {
                    Thread.Sleep(10000);
                }

                string   uniqueKey = "scratch-key-gal";
                string[] locations = { "HealthyTown", "LessHealthyTown" };

                var rg = new Random(uniqueKey.GetHashCode());

                int numActions = 3; // ["Hamburger deal 1", "Hamburger deal 2" (better), "Salad deal"]

                var csv = new StringBuilder();

                int counterCorrect = 0;
                int counterTotal   = 0;

                var header = "Location,Action,Reward";
                csv.AppendLine(header);
                // number of iterations
                for (int i = 0; i < 10000 * locations.Length; i++)
                {
                    // randomly select a location
                    int    iL       = rg.Next(0, locations.Length);
                    string location = locations[iL];

                    DateTime timeStamp = DateTime.UtcNow;
                    string   key       = uniqueKey + Guid.NewGuid().ToString();

                    FoodContext currentContext = new FoodContext();
                    currentContext.UserLocation = location;
                    currentContext.Actions      = Enumerable.Range(1, numActions).ToArray();

                    int[] action = service.ChooseRanking(key, currentContext);

                    counterTotal += 1;

                    // We expect healthy town to get salad and unhealthy town to get the second burger (action 2)
                    if (location.Equals("HealthyTown") && action[0] == 3)
                    {
                        counterCorrect += 1;
                    }
                    else if (location.Equals("LessHealthyTown") && action[0] == 2)
                    {
                        counterCorrect += 1;
                    }

                    var csvLocation = location;
                    var csvAction   = action[0].ToString();

                    float  reward      = 0;
                    double currentRand = rg.NextDouble();
                    if (location.Equals("HealthyTown"))
                    {
                        // for healthy town, buy burger 1 with probability 0.1, burger 2 with probability 0.15, salad with probability 0.6
                        if ((action[0] == 1 && currentRand < 0.1) ||
                            (action[0] == 2 && currentRand < 0.15) ||
                            (action[0] == 3 && currentRand < 0.6))
                        {
                            reward = 10;
                        }
                    }
                    else
                    {
                        // for unhealthy town, buy burger 1 with probability 0.4, burger 2 with probability 0.6, salad with probability 0.2
                        if ((action[0] == 1 && currentRand < 0.4) ||
                            (action[0] == 2 && currentRand < 0.6) ||
                            (action[0] == 3 && currentRand < 0.2))
                        {
                            reward = 10;
                        }
                    }
                    service.ReportReward(reward, key);
                    var newLine = string.Format("{0},{1},{2}", csvLocation, csvAction, "0");
                    csv.AppendLine(newLine);

                    System.Threading.Thread.Sleep(1);
                }
                return((float)counterCorrect / counterTotal);
            }
        }