Пример #1
0
        public async Task <HttpResponseMessage> Post()
        {
            var header = this.Request.Headers.SingleOrDefault(x => x.Key == "Authorization");

            if (header.Value == null)
            {
                throw new UnauthorizedAccessException("AuthorizationToken missing");
            }

            var userToken = header.Value.First();

            if (string.IsNullOrWhiteSpace(userToken) || userToken != ConfigurationManager.AppSettings["UserToken"])
            {
                return(Request.CreateResponse(HttpStatusCode.Unauthorized));
            }

            if (this.metaData == null || lastDownload + TimeSpan.FromMinutes(1) < DateTime.Now)
            {
                var url = ConfigurationManager.AppSettings["DecisionServiceSettingsUrl"];
                this.metaData = ApplicationMetadataUtil.DownloadMetadata <ApplicationClientMetadata>(url);
                lastDownload  = DateTime.Now;
            }

            using (var vw = new VowpalWabbit(new VowpalWabbitSettings(metaData.TrainArguments)
            {
                EnableStringExampleGeneration = true,
                EnableStringFloatCompact = true
            }))
                using (var serializer = new VowpalWabbitJsonSerializer(vw))
                    using (var example = serializer.ParseAndCreate(new JsonTextReader(new StreamReader(await Request.Content.ReadAsStreamAsync()))))
                    {
                        return(Request.CreateResponse(HttpStatusCode.OK, example.VowpalWabbitString));
                    }
        }
Пример #2
0
        public ActionResult Validate()
        {
            try
            {
                APIUtil.Authenticate(this.Request);

                if (this.metaData == null || lastDownload + TimeSpan.FromMinutes(1) < DateTime.Now)
                {
                    var url = APIUtil.GetSettingsUrl();
                    this.metaData = ApplicationMetadataUtil.DownloadMetadata <ApplicationClientMetadata>(url);
                    lastDownload  = DateTime.Now;
                }

                var context = APIUtil.ReadBody(this.Request);
                using (var vw = new VowpalWabbit(new VowpalWabbitSettings(metaData.TrainArguments)
                {
                    EnableStringExampleGeneration = true,
                    EnableStringFloatCompact = true
                }))
                    using (var serializer = new VowpalWabbitJsonSerializer(vw))
                        using (var example = serializer.ParseAndCreate(context))
                        {
                            return(Json(new { VWExample = example.VowpalWabbitString }));
                        }
            }
            catch (UnauthorizedAccessException ex)
            {
                return(new HttpStatusCodeResult(HttpStatusCode.Unauthorized, ex.Message));
            }
            catch (Exception ex)
            {
                new TelemetryClient().TrackException(ex);
                return(new HttpStatusCodeResult(HttpStatusCode.InternalServerError, ex.ToString()));
            }
        }
Пример #3
0
        public void TestJsonDict()
        {
            var vec = new float[] { 1, 2, 3 };

            var jsonResolver = new RefResolve();
            var settings     = new JsonSerializerSettings {
                ReferenceResolverProvider = () => jsonResolver
            };

            var ctx1 = new Context(vec, 1, settings);
            var ctx2 = new Context(vec, 2, settings);

            using (var vw = new VowpalWabbit(new VowpalWabbitSettings {
                EnableStringExampleGeneration = true
            }))
                using (var resolver = new VowpalWabbitJsonReferenceResolver(serializer => Assert.Fail()))
                    using (var serializer1 = new VowpalWabbitJsonSerializer(vw, resolver))
                        using (var example1 = serializer1.ParseAndCreate(ctx1.JSON))
                            using (var serializer2 = new VowpalWabbitJsonSerializer(vw, resolver))
                                using (var example2 = serializer2.ParseAndCreate(ctx2.JSON))
                                    using (var validator = new VowpalWabbitExampleJsonValidator())
                                    {
                                        validator.Validate("| Id:1 :1 :2 :3", example1);
                                        validator.Validate(ctx1.VW, example1);
                                        validator.Validate("| Id:2 :1 :2 :3", example2);
                                        validator.Validate(ctx2.VW, example2);
                                    }
        }
Пример #4
0
        protected override PolicyDecision <ActionProbability[]> MapContext(VowpalWabbit vw, string context)
        {
            using (var vwJson = new VowpalWabbitJsonSerializer(vw))
                using (VowpalWabbitExampleCollection vwExample = vwJson.ParseAndCreate(context))
                {
                    if (this.developmentMode)
                    {
                        Trace.TraceInformation("Example Context: '{0}'", vwExample.VowpalWabbitString);
                    }

                    var vwPredictions = vwExample.Predict(VowpalWabbitPredictionType.ActionProbabilities);

                    // VW multi-label predictions are 0-based
                    var ap = vwPredictions
                             .Select(a =>
                                     new ActionProbability
                    {
                        Action      = (int)(a.Action + 1),
                        Probability = a.Score
                    })
                             .ToArray();
                    var state = new VWState {
                        ModelId = vw.Native.ID
                    };

                    return(PolicyDecision.Create(ap, state));
                }
        }
 /// <summary>
 /// Learns from the given example.
 /// </summary>
 /// <param name="reader">The example to learn.</param>
 /// <param name="label">
 /// Optional label, taking precedence over "_label" property found in <paramref name="reader"/>.
 /// If null, <paramref name="reader"/> will be inspected and the "_label" property used as label.
 /// </param>
 /// <param name="index">Optional index of example the given label should be applied for multi-line examples.</param>
 public void Learn(JsonReader reader, ILabel label = null, int?index = null)
 {
     using (var serializer = new VowpalWabbitJsonSerializer(vw))
         using (var result = serializer.ParseAndCreate(reader, label, index))
         {
             result.Learn();
         }
 }
 /// <summary>
 /// Predicts for the given example.
 /// </summary>
 /// <typeparam name="TPrediction">The prediction type.</typeparam>
 /// <param name="reader">The example to predict for.</param>
 /// <param name="predictionFactory">The prediction factory to be used. See <see cref="VowpalWabbitPredictionType"/>.</param>
 /// <param name="label">
 /// Optional label, taking precedence over "_label" property found in <paramref name="reader"/>.
 /// If null, <paramref name="reader"/> will be inspected and the "_label" property used as label.
 /// </param>
 /// <param name="index">Optional index of example the given label should be applied for multi-line examples.</param>
 public TPrediction Predict <TPrediction>(JsonReader reader, IVowpalWabbitPredictionFactory <TPrediction> predictionFactory, ILabel label = null, int?index = null)
 {
     using (var serializer = new VowpalWabbitJsonSerializer(vw))
         using (var result = serializer.ParseAndCreate(reader, label, index))
         {
             return(result.Predict(predictionFactory));
         }
 }
 /// <summary>
 /// Learn from the given example and return the current prediction for it.
 /// </summary>
 /// <typeparam name="TPrediction">The prediction type.</typeparam>
 /// <param name="json">The example to learn.</param>
 /// <param name="predictionFactory">The prediction factory to be used. See <see cref="VowpalWabbitPredictionType"/>.</param>
 /// <param name="label">
 /// Optional label, taking precedence over "_label" property found in <paramref name="json"/>.
 /// If null, <paramref name="json"/> will be inspected and the "_label" property used as label.
 /// </param>
 /// <param name="index">Optional index of example the given label should be applied for multi-line examples.</param>
 /// <returns>The prediction for the given <paramref name="json"/>.</returns>
 public TPrediction Learn <TPrediction>(string json, IVowpalWabbitPredictionFactory <TPrediction> predictionFactory, ILabel label = null, int?index = null)
 {
     using (var serializer = new VowpalWabbitJsonSerializer(vw))
         using (var result = serializer.ParseAndCreate(json, label, index))
         {
             return(result.Learn(predictionFactory));
         }
 }
Пример #8
0
 public void Validate(string line, string json, IVowpalWabbitLabelComparator labelComparator = null, ILabel label = null)
 {
     using (var jsonSerializer = new VowpalWabbitJsonSerializer(this.vw))
         using (var jsonExample = jsonSerializer.ParseAndCreate(json, label))
         {
             this.Validate(line, jsonExample, labelComparator, label);
         }
 }
Пример #9
0
        public void Validate(string[] lines, JsonReader jsonReader, IVowpalWabbitLabelComparator labelComparator = null, ILabel label = null, int?index = null, VowpalWabbitJsonExtension extension = null)
        {
            VowpalWabbitExample[] strExamples = new VowpalWabbitExample[lines.Count()];

            try
            {
                for (int i = 0; i < lines.Length; i++)
                {
                    strExamples[i] = this.vw.ParseLine(lines[i]);
                }

                using (var jsonSerializer = new VowpalWabbitJsonSerializer(this.vw))
                {
                    if (extension != null)
                    {
                        jsonSerializer.RegisterExtension(extension);
                        // extension are not supported with native JSON parsing
                    }

                    using (var jsonExample = (VowpalWabbitMultiLineExampleCollection)jsonSerializer.ParseAndCreate(jsonReader, label, index))
                    {
                        var jsonExamples = new List <VowpalWabbitExample>();

                        if (jsonExample.SharedExample != null)
                        {
                            jsonExamples.Add(jsonExample.SharedExample);
                        }

                        jsonExamples.AddRange(jsonExample.Examples);

                        Assert.AreEqual(strExamples.Length, jsonExamples.Count);


                        for (int i = 0; i < strExamples.Length; i++)
                        {
                            using (var strJsonExample = this.vw.ParseLine(jsonExamples[i].VowpalWabbitString))
                            {
                                var diff = strExamples[i].Diff(this.vw, jsonExamples[i], labelComparator);
                                Assert.IsNull(diff, diff + " generated string: '" + jsonExamples[i].VowpalWabbitString + "'");

                                diff = strExamples[i].Diff(this.vw, strJsonExample, labelComparator);
                                Assert.IsNull(diff, diff);
                            }
                        }
                    }
                }
            }
            finally
            {
                foreach (var ex in strExamples)
                {
                    if (ex != null)
                    {
                        ex.Dispose();
                    }
                }
            }
        }
Пример #10
0
        /// <summary>
        /// Predicts for the given example.
        /// </summary>
        /// <typeparam name="TPrediction">The prediction type.</typeparam>
        /// <param name="json">The example to predict for.</param>
        /// <param name="predictionFactory">The prediction factory to be used. See <see cref="VowpalWabbitPredictionType"/>.</param>
        /// <param name="label">
        /// Optional label, taking precedence over "_label" property found in <paramref name="json"/>.
        /// If null, <paramref name="json"/> will be inspected and the "_label" property used as label.
        /// </param>
        /// <param name="index">Optional index of example the given label should be applied for multi-line examples.</param>
        public TPrediction Predict <TPrediction>(string json, IVowpalWabbitPredictionFactory <TPrediction> predictionFactory, ILabel label = null, int?index = null)
        {
            Contract.Requires(json != null);

            using (var serializer = new VowpalWabbitJsonSerializer(vw))
                using (var result = serializer.ParseAndCreate(json, label, index))
                {
                    return(result.Predict(predictionFactory));
                }
        }
Пример #11
0
        /// <summary>
        /// Predicts for the given example.
        /// </summary>
        /// <param name="reader">The example to predict for.</param>
        /// <param name="label">
        /// Optional label, taking precedence over "_label" property found in <paramref name="reader"/>.
        /// If null, <paramref name="reader"/> will be inspected and the "_label" property used as label.
        /// </param>
        /// <param name="index">Optional index of example the given label should be applied for multi-line examples.</param>
        public void Predict(JsonReader reader, ILabel label = null, int?index = null)
        {
            Contract.Requires(reader != null);

            using (var serializer = new VowpalWabbitJsonSerializer(vw))
                using (var result = serializer.ParseAndCreate(reader, label, index))
                {
                    result.Predict();
                }
        }
        public void TestJsonFeatureExtraction()
        {
            string json = "{\"ns1\":{\"location\":\"New York\", \"f2\":3.4}}";

            using (var vw = new VowpalWabbit("-b 3 --noconstant"))
                using (var serializer = new VowpalWabbitJsonSerializer(vw))
                    using (var result = serializer.ParseAndCreate(json))
                    {
                        var singleExample = result as VowpalWabbitSingleLineExampleCollection;
                        Assert.IsNotNull(singleExample);
                        if (singleExample != null)
                        {
                            foreach (var ns in singleExample.Example)
                            {
                                Console.WriteLine(ns.Index);

                                foreach (var feature in ns)
                                {
                                    Console.WriteLine("{0}:{1}", feature.FeatureIndex, feature.X);
                                }
                            }

                            var ns1 = singleExample.Example.ToArray();
                            Assert.AreEqual(1, ns1.Length);
                            Assert.AreEqual((byte)'n', ns1[0].Index);
                            CollectionAssert.AreEqual(
                                new[] {
                                new VowpalWabbitFeature(singleExample.Example, 1, 12),
                                new VowpalWabbitFeature(singleExample.Example, 3.4f, 28)
                            },
                                ns1[0].ToArray());
                        }

                        // for documentation purpose only
                        var multiExample = result as VowpalWabbitMultiLineExampleCollection;
                        Assert.IsNull(multiExample);
                        if (multiExample != null)
                        {
                            foreach (var example in multiExample.Examples)
                            {
                                foreach (var ns in example)
                                {
                                    Console.WriteLine(ns.Index);

                                    foreach (var feature in ns)
                                    {
                                        Console.WriteLine("{0}:{1}", feature.FeatureIndex, feature.X);
                                    }
                                }
                            }
                        }
                    }
        }
Пример #13
0
        /// <summary>
        /// Determines the action to take for a given context.
        /// This implementation should be thread-safe if multithreading is needed.
        /// </summary>
        /// <param name="vw">The Vowpal Wabbit instance to use.</param>
        /// <param name="context">A user-defined context for the decision.</param>
        /// <returns>A decision tuple containing the index of the action to take (1-based), and the Id of the model or policy used to make the decision.
        /// Can be null if the Policy is not ready yet (e.g. model not loaded).</returns>
        protected override PolicyDecision <int> MapContext(VowpalWabbit vw, string context)
        {
            using (var vwJson = new VowpalWabbitJsonSerializer(vw))
                using (VowpalWabbitExampleCollection vwExample = vwJson.ParseAndCreate(context))
                {
                    var action = (int)vwExample.Predict(VowpalWabbitPredictionType.CostSensitive);
                    var state  = new VWState {
                        ModelId = vw.ID
                    };

                    return(PolicyDecision.Create(action, state));
                }
        }
Пример #14
0
        /// <summary>
        /// Determines the action to take for a given context.
        /// This implementation should be thread-safe if multithreading is needed.
        /// </summary>
        /// <param name="vw">The Vowpal Wabbit instance to use.</param>
        /// <param name="context">A user-defined context for the decision.</param>
        /// <returns>A decision tuple containing the index of the action to take (1-based), and the Id of the model or policy used to make the decision.
        /// Can be null if the Policy is not ready yet (e.g. model not loaded).</returns>
        protected override PolicyDecision <int[]> MapContext(VowpalWabbit vw, string context)
        {
            using (var vwJson = new VowpalWabbitJsonSerializer(vw))
                using (VowpalWabbitExampleCollection vwExample = vwJson.ParseAndCreate(context))
                {
                    ActionScore[] vwMultilabelPredictions = vwExample.Predict(VowpalWabbitPredictionType.ActionProbabilities);

                    // VW multi-label predictions are 0-based
                    var actions = vwMultilabelPredictions.Select(a => (int)a.Action + 1).ToArray();
                    var state   = new VWState {
                        ModelId = vw.ID
                    };

                    return(PolicyDecision.Create(actions, state));
                }
        }
Пример #15
0
 private void updateModelMaybe()
 {
     if (sinceLastUpdate >= ModelUpdateInterval)
     {
         // Locking at this level ensures a batch of events is processed completely before
         // the next batch (finer locking would allow interleaving, violating timeorder
         lock (this.vwLock)
         {
             // Exit gracefully if the object has been disposed
             if (vwDisposed)
             {
                 return;
             }
             foreach (var dp in log.FlushCompleteEvents())
             {
                 uint action = (uint)((int[])dp.InteractData.Value)[0];
                 var  label  = new ContextualBanditLabel(action, -dp.Reward, ((GenericTopSlotExplorerState)dp.InteractData.ExplorerState).Probabilities[0]);
                 // String (json) contexts need to be handled specially, since the C# interface
                 // does not currently handle the CB label properly
                 if (typeof(TContext) == typeof(string))
                 {
                     // Manually insert the CB label fields into the context
                     string labelStr = string.Format(CultureInfo.InvariantCulture, "\"_label_Action\":{0},\"_label_Cost\":{1},\"_label_Probability\":{2},\"_labelIndex\":{3},",
                                                     label.Action, label.Cost, label.Probability, label.Action - 1);
                     string context = ((string)dp.InteractData.Context).Insert(1, labelStr);
                     using (var vwSerializer = new VowpalWabbitJsonSerializer(vwJson.Native))
                         using (VowpalWabbitExampleCollection vwExample = vwSerializer.ParseAndCreate(context))
                         {
                             vwExample.Learn();
                         }
                 }
                 else
                 {
                     vw.Learn((TContext)dp.InteractData.Context, label, index: (int)label.Action - 1);
                 }
             }
             using (MemoryStream currModel = new MemoryStream())
             {
                 VowpalWabbit vwNative = (typeof(TContext) == typeof(string)) ? vwJson.Native : vw.Native;
                 vwNative.SaveModel(currModel);
                 currModel.Position = 0;
                 this.UpdateModel(currModel);
                 sinceLastUpdate = 0;
             }
         }
     }
 }
Пример #16
0
        public void Validate(string line, string json, IVowpalWabbitLabelComparator labelComparator = null, ILabel label = null, bool enableNativeJsonValidation = true)
        {
            using (var jsonSerializer = new VowpalWabbitJsonSerializer(this.vw))
                using (var jsonExample = jsonSerializer.ParseAndCreate(json, label))
                {
                    this.Validate(line, jsonExample, labelComparator, label);

                    if (enableNativeJsonValidation)
                    {
                        var examples = this.vw.ParseJson(json);
                        Assert.AreEqual(1, examples.Count);
                        using (var jsonNativeExample = new VowpalWabbitSingleLineExampleCollection(this.vw, examples[0]))
                        {
                            this.Validate(line, jsonNativeExample, labelComparator, label, validateVowpalWabbitString: false);
                        }
                    }
                }
        }
Пример #17
0
        public override void WriteLine(string value)
        {
            using (var jsonSerializer = new VowpalWabbitJsonSerializer(vw))
                using (var example = jsonSerializer.ParseAndCreate(value))
                {
                    if (example == null)
                    {
                        throw new InvalidDataException($"Invalid example: {value}");
                    }

                    var str = example.VowpalWabbitString;
                    if (example is VowpalWabbitMultiLineExampleCollection)
                    {
                        str += "\n";
                    }

                    base.WriteLine(str);
                }
        }
Пример #18
0
        public void TestJsonDictReverse()
        {
            var vec = new float[] { 1, 2, 3 };

            var jsonResolver = new RefResolve();
            var settings     = new JsonSerializerSettings {
                ReferenceResolverProvider = () => jsonResolver
            };

            var ctx1 = new Context(vec, 1, settings);
            var ctx2 = new Context(vec, 2, settings);

            VowpalWabbitJsonSerializer delayedSerializer = null;

            using (var validator = new VowpalWabbitExampleJsonValidator())
                using (var vw = new VowpalWabbit(new VowpalWabbitSettings {
                    EnableStringExampleGeneration = true
                }))
                    using (var resolver = new VowpalWabbitJsonReferenceResolver(serializer => delayedSerializer = serializer))
                    {
                        var serializer2 = new VowpalWabbitJsonSerializer(vw, resolver);
                        var example2    = serializer2.ParseAndCreate(ctx2.JSON);

                        // incomplete data
                        Assert.IsNull(example2);

                        // triggers example2 completion
                        using (var serializer1 = new VowpalWabbitJsonSerializer(vw, resolver))
                            using (var example1 = serializer1.ParseAndCreate(ctx1.JSON))
                            {
                                validator.Validate("| Id:1 :1 :2 :3", example1);
                            }

                        Assert.IsNotNull(delayedSerializer);

                        using (var delayedExample2 = delayedSerializer.CreateExamples())
                        {
                            validator.Validate("| Id:2 :1 :2 :3", delayedExample2);
                        }

                        delayedSerializer.Dispose();
                    }
        }
        public void Validate(string line, TExample example, ILabel label = null)
        {
            IVowpalWabbitLabelComparator comparator;

            if (label == null || label == SharedLabel.Instance)
            {
                comparator = null;
            }
            else if (label is SimpleLabel)
            {
                comparator = VowpalWabbitLabelComparator.Simple;
            }
            else if (label is ContextualBanditLabel)
            {
                comparator = VowpalWabbitLabelComparator.ContextualBandit;
            }
            else
            {
                throw new ArgumentException("Label type not supported: " + label.GetType());
            }

            using (var context = new VowpalWabbitMarshalContext(this.vw.Native))
                using (var contextNative = new VowpalWabbitMarshalContext(this.vwNative.Native))
                {
                    // validate string serializer
                    this.serializer(context, example, label);
                    this.serializerNative(contextNative, example, label);

                    // natively parsed string example compared against:
                    // (1) natively build example
                    // (2) string serialized & natively parsed string example
                    using (var strExample = this.vw.Native.ParseLine(line))
                        using (var strConvertedExample = this.vw.Native.ParseLine(context.StringExample.ToString()))
                            using (var nativeExample = contextNative.ExampleBuilder.CreateExample())
                                using (var nativeExampleWithString = this.factorySerializer.Serialize(example, label))
                                {
                                    var diff = strExample.Diff(this.vw.Native, strConvertedExample, comparator);
                                    Assert.IsNull(diff, diff + " generated string: '" + context.StringExample + "'");

                                    diff = strExample.Diff(this.vw.Native, nativeExample, comparator);
                                    Assert.IsNull(diff, diff);

                                    if (!strExample.IsNewLine)
                                    {
                                        Assert.IsFalse(string.IsNullOrEmpty(nativeExampleWithString.VowpalWabbitString));
                                        Assert.IsFalse(string.IsNullOrEmpty(this.factorySerializer.SerializeToString(example, label)));
                                    }

                                    if (this.vw.Native.Settings.FeatureDiscovery == VowpalWabbitFeatureDiscovery.Json)
                                    {
                                        var jsonStr = JsonConvert.SerializeObject(example);

                                        using (var jsonSerializer = new VowpalWabbitJsonSerializer(this.vw.Native))
                                        {
                                            using (var jsonExample = jsonSerializer.ParseAndCreate(jsonStr, label))
                                            {
                                                var ex = ((VowpalWabbitSingleLineExampleCollection)jsonExample).Example;

                                                diff = strExample.Diff(this.vw.Native, ex, comparator);
                                                Assert.IsNull(diff, diff + "\njson: '" + jsonStr + "'");
                                            }
                                        }
                                    }
                                }
                }
        }
Пример #20
0
        private IEnumerable <PipelineData> Stage1_Deserialize(PipelineData data)
        {
            try
            {
                using (var jsonReader = new JsonTextReader(new StringReader(data.JSON)))
                {
                    //jsonReader.FloatParser = Util.ReadDoubleString;
                    // jsonReader.ArrayPool = pool;

                    VowpalWabbitJsonSerializer vwJsonSerializer = null;
                    try
                    {
                        vwJsonSerializer = new VowpalWabbitJsonSerializer(this.trainer.VowpalWabbit, this.trainer.ReferenceResolver);

                        vwJsonSerializer.RegisterExtension((state, property) =>
                        {
                            if (TryExtractProperty(state, property, "_eventid", JsonToken.String, reader => data.EventId = (string)reader.Value))
                            {
                                return(true);
                            }
                            else if (TryExtractProperty(state, property, "_timestamp", JsonToken.Date, reader => data.Timestamp = (DateTime)reader.Value))
                            {
                                return(true);
                            }
                            else if (TryExtractProperty(state, property, "_ProbabilityOfDrop", JsonToken.Float, reader => data.ProbabilityOfDrop = (float)(reader.Value ?? 0f)))
                            {
                                return(true);
                            }
                            else if (TryExtractArrayProperty <float>(state, property, "_p", arr => data.Probabilities = arr))
                            {
                                return(true);
                            }
                            else if (TryExtractArrayProperty <int>(state, property, "_a", arr => data.Actions = arr))
                            {
                                return(true);
                            }

                            return(false);
                        });

                        data.Example = vwJsonSerializer.ParseAndCreate(jsonReader);

                        if (data.Probabilities == null)
                        {
                            throw new ArgumentNullException("Missing probabilities (_p)");
                        }
                        if (data.Actions == null)
                        {
                            throw new ArgumentNullException("Missing actions (_a)");
                        }

                        if (data.Example == null)
                        {
                            // unable to create example due to missing data
                            // will be trigger later
                            vwJsonSerializer.UserContext = data.Example;
                            // make sure the serialize is not deallocated
                            vwJsonSerializer = null;
                        }
                    }
                    finally
                    {
                        if (vwJsonSerializer != null)
                        {
                            vwJsonSerializer.Dispose();
                        }
                    }

                    performanceCounters.Stage1_JSON_DeserializePerSec.Increment();

                    // delayed
                    if (data.Example == null)
                    {
                        this.performanceCounters.Feature_Requests_Pending.Increment();
                        yield break;
                    }
                }
            }
            catch (Exception ex)
            {
                this.telemetry.TrackException(ex, new Dictionary <string, string> {
                    { "JSON", data.JSON }
                });

                this.performanceCounters.Stage2_Faulty_Examples_Total.Increment();
                this.performanceCounters.Stage2_Faulty_ExamplesPerSec.Increment();

                yield break;
            }

            yield return(data);
        }
        public void Validate(string line, string json, IVowpalWabbitLabelComparator labelComparator = null, ILabel label = null)
        {
            using (var strExample = this.vw.ParseLine(line))
                using (var jsonSerializer = new VowpalWabbitJsonSerializer(this.vw))
                    using (var jsonExample = (VowpalWabbitSingleLineExampleCollection)jsonSerializer.ParseAndCreate(json, label))
                        using (var strJsonExample = this.vw.ParseLine(jsonExample.Example.VowpalWabbitString))
                        {
                            var diff = strExample.Diff(this.vw, jsonExample.Example, labelComparator);
                            Assert.IsNull(diff, diff + " generated string: '" + jsonExample.VowpalWabbitString + "'");

                            diff = strExample.Diff(this.vw, strJsonExample, labelComparator);
                            Assert.IsNull(diff, diff);
                        }
        }
Пример #22
0
        //private class Event
        //{
        //    internal VowpalWabbitExampleCollection Example;

        //    internal string Line;

        //    internal int LineNr;

        //    internal ActionScore[] Prediction;
        //}

        /// <summary>
        /// Train VW on offline data.
        /// </summary>
        /// <param name="arguments">Base arguments.</param>
        /// <param name="inputFile">Path to input file.</param>
        /// <param name="predictionFile">Name of the output prediction file.</param>
        /// <param name="reloadInterval">The TimeSpan interval to reload model.</param>
        /// <param name="learningRate">
        /// Learning rate must be specified here otherwise on Reload it will be reset.
        /// </param>
        /// <param name="cacheFilePrefix">
        /// The prefix of the cache file name to use. For example: prefix = "test" => "test.vw.cache"
        /// If none or null, the input file name is used, e.g. "input.dataset" => "input.vw.cache"
        /// !!! IMPORTANT !!!: Always use a new cache name if a different dataset or reload interval is used.
        /// </param>
        /// <remarks>
        /// Both learning rates and cache file are added to initial training arguments as well as Reload arguments.
        /// </remarks>
        public static void Train(string arguments, string inputFile, string predictionFile = null, TimeSpan?reloadInterval = null, float?learningRate = null, string cacheFilePrefix = null)
        {
            var learningArgs = learningRate == null ? string.Empty : $" -l {learningRate}";

            int cacheIndex = 0;
            var cacheArgs  = (Func <int, string>)(i => $" --cache_file {cacheFilePrefix ?? Path.GetFileNameWithoutExtension(inputFile)}-{i}.vw.cache");

            using (var reader = new StreamReader(inputFile))
                using (var prediction = new StreamWriter(predictionFile ?? inputFile + ".prediction"))
                    using (var vw = new VowpalWabbit(new VowpalWabbitSettings(arguments + learningArgs + cacheArgs(cacheIndex++))
                    {
                        Verbose = true
                    }))
                    {
                        string   line;
                        int      lineNr          = 0;
                        int      invalidExamples = 0;
                        DateTime?lastTimestamp   = null;

                        while ((line = reader.ReadLine()) != null)
                        {
                            try
                            {
                                bool reload = false;
                                using (var jsonSerializer = new VowpalWabbitJsonSerializer(vw))
                                {
                                    if (reloadInterval != null)
                                    {
                                        jsonSerializer.RegisterExtension((state, property) =>
                                        {
                                            if (property.Equals("_timestamp", StringComparison.Ordinal))
                                            {
                                                var eventTimestamp = state.Reader.ReadAsDateTime();
                                                if (lastTimestamp == null)
                                                {
                                                    lastTimestamp = eventTimestamp;
                                                }
                                                else if (lastTimestamp + reloadInterval < eventTimestamp)
                                                {
                                                    reload        = true;
                                                    lastTimestamp = eventTimestamp;
                                                }

                                                return(true);
                                            }

                                            return(false);
                                        });
                                    }

                                    // var pred = vw.Learn(line, VowpalWabbitPredictionType.ActionScore);
                                    using (var example = jsonSerializer.ParseAndCreate(line))
                                    {
                                        var pred = example.Learn(VowpalWabbitPredictionType.ActionScore);

                                        prediction.WriteLine(JsonConvert.SerializeObject(
                                                                 new
                                        {
                                            nr  = lineNr,
                                            @as = pred.Select(x => x.Action),
                                            p   = pred.Select(x => x.Score)
                                        }));
                                    }

                                    if (reload)
                                    {
                                        vw.Reload(learningArgs + cacheArgs(cacheIndex++));
                                    }
                                }
                            }
                            catch (Exception)
                            {
                                invalidExamples++;
                            }

                            lineNr++;
                        }
                    }

            // memory leak and not much gain below...
            //using (var vw = new VowpalWabbit(new VowpalWabbitSettings(arguments)
            //{
            //    Verbose = true,
            //    EnableThreadSafeExamplePooling = true,
            //    MaxExamples = 1024
            //}))
            //using (var reader = new StreamReader(inputFile))
            //using (var prediction = new StreamWriter(inputFile + ".prediction"))
            //{
            //    int invalidExamples = 0;

            //    var deserializeBlock = new TransformBlock<Event, Event>(
            //        evt =>
            //        {
            //            try
            //            {
            //                using (var vwJsonSerializer = new VowpalWabbitJsonSerializer(vw))
            //                {
            //                    evt.Example = vwJsonSerializer.ParseAndCreate(evt.Line);
            //                }
            //                // reclaim memory
            //                evt.Line = null;

            //                return evt;
            //            }
            //            catch (Exception)
            //            {
            //                Interlocked.Increment(ref invalidExamples);
            //                return null;
            //            }
            //        },
            //        new ExecutionDataflowBlockOptions
            //        {
            //            BoundedCapacity = 16,
            //            MaxDegreeOfParallelism = 8 // TODO: parameterize
            //        });

            //    var learnBlock = new TransformBlock<Event, Event>(
            //        evt =>
            //        {
            //            evt.Prediction = evt.Example.Learn(VowpalWabbitPredictionType.ActionScore);
            //            evt.Example.Dispose();
            //            return evt;
            //        },
            //        new ExecutionDataflowBlockOptions
            //        {
            //            BoundedCapacity = 64,
            //            MaxDegreeOfParallelism = 1
            //        });

            //    var predictionBlock = new ActionBlock<Event>(
            //        evt => prediction.WriteLine(evt.LineNr + " " + string.Join(",", evt.Prediction.Select(a_s => $"{a_s.Action}:{a_s.Score}"))),
            //        new ExecutionDataflowBlockOptions
            //        {
            //            BoundedCapacity = 16,
            //            MaxDegreeOfParallelism = 1
            //        });

            //    var input = deserializeBlock.AsObserver();

            //    deserializeBlock.LinkTo(learnBlock, new DataflowLinkOptions { PropagateCompletion = true }, evt => evt != null);
            //    deserializeBlock.LinkTo(DataflowBlock.NullTarget<object>());

            //    learnBlock.LinkTo(predictionBlock, new DataflowLinkOptions { PropagateCompletion = true });

            //    string line;
            //    int lineNr = 0;

            //    while ((line = reader.ReadLine()) != null)
            //        input.OnNext(new Event { Line = line, LineNr = lineNr++ });
            //    input.OnCompleted();

            //    predictionBlock.Completion.Wait();

            //Console.WriteLine($"Examples {lineNr}. Invalid: {invalidExamples}");
            //}
        }
Пример #23
0
        public void TestJsonDictThreading()
        {
            var jsonResolver = new RefResolve();
            var settings     = new JsonSerializerSettings {
                ReferenceResolverProvider = () => jsonResolver
            };

            var rnd      = new Random(123);
            var examples = new List <Context>();

            var id = 0;

            // different reference objects
            for (int i = 0; i < 10; i++)
            {
                var data = Enumerable.Range(1, 5).Select(_ => (float)rnd.Next(10)).ToArray();

                // referencing the same data
                for (int j = 0; j < 5; j++)
                {
                    examples.Add(new Context(data, id++, settings));
                }
            }

            for (int i = 0; i < 4; i++)
            {
                Permute(examples, rnd);

                for (int maxDegreeOfParallelism = 1; maxDegreeOfParallelism < 4; maxDegreeOfParallelism++)
                {
                    var examplesFound = 0;
                    using (var vw = new VowpalWabbit(new VowpalWabbitSettings {
                        EnableStringExampleGeneration = true, EnableThreadSafeExamplePooling = true
                    }))
                        using (var resolver = new VowpalWabbitJsonReferenceResolver(serializer =>
                        {
                            using (var example = serializer.CreateExamples())
                            {
                                ValidateExample(example, (Context)serializer.UserContext);
                            }

                            serializer.Dispose();

                            Interlocked.Increment(ref examplesFound);
                        }))
                        {
                            Parallel.ForEach(
                                Partitioner.Create(0, examples.Count),
                                new ParallelOptions {
                                MaxDegreeOfParallelism = maxDegreeOfParallelism
                            },
                                range =>
                            {
                                for (int j = range.Item1; j < range.Item2; j++)
                                {
                                    var ctx        = examples[j];
                                    var serializer = new VowpalWabbitJsonSerializer(vw, resolver)
                                    {
                                        UserContext = ctx
                                    };

                                    var example = serializer.ParseAndCreate(ctx.JSON);

                                    // example not ready yet
                                    if (example == null)
                                    {
                                        continue;
                                    }

                                    ValidateExample(example, ctx);

                                    example.Dispose();
                                    serializer.Dispose();

                                    Interlocked.Increment(ref examplesFound);
                                }
                            });
                        }

                    Assert.AreEqual(examples.Count, examplesFound);
                }
            }
        }
Пример #24
0
        public static void Convert(StreamReader reader, StreamWriter writer)
        {
            var line = reader.ReadLine();

            if (line == null)
            {
                return;
            }

            var jExample = JObject.Parse(line);
            var settings = jExample.Properties().Any(p => p.Name == "_multi") ? "--cb_explore_adf" : "--cb_explore";

            int lineNr = 1;

            using (var vw = new VowpalWabbit(new VowpalWabbitSettings(settings)
            {
                EnableStringExampleGeneration = true,
                EnableStringFloatCompact = true,
                EnableThreadSafeExamplePooling = true
            }))
            {
                var serializeBlock = new TransformBlock <Tuple <string, int>, string>(l =>
                {
                    using (var jsonSerializer = new VowpalWabbitJsonSerializer(vw))
                        using (var example = jsonSerializer.ParseAndCreate(l.Item1))
                        {
                            if (example == null)
                            {
                                throw new InvalidDataException($"Invalid example in line {l.Item2}: '{l.Item1}'");
                            }

                            var str = example.VowpalWabbitString;
                            if (example is VowpalWabbitMultiLineExampleCollection)
                            {
                                str += "\n";
                            }

                            return(str);
                        }
                },
                                                                                      new ExecutionDataflowBlockOptions
                {
                    BoundedCapacity        = 1024,
                    MaxDegreeOfParallelism = 8
                });

                var writeBlock = new ActionBlock <string>(
                    l => writer.WriteLine(l),
                    new ExecutionDataflowBlockOptions {
                    MaxDegreeOfParallelism = 1, BoundedCapacity = 128
                });
                serializeBlock.LinkTo(writeBlock, new DataflowLinkOptions {
                    PropagateCompletion = true
                });

                var input = serializeBlock.AsObserver();

                do
                {
                    input.OnNext(Tuple.Create(line, lineNr));
                    lineNr++;
                } while ((line = reader.ReadLine()) != null);

                input.OnCompleted();

                serializeBlock.Completion.Wait();
            }
        }
Пример #25
0
        private IEnumerable <PipelineData> Stage1_Deserialize(PipelineData data)
        {
            try
            {
                using (var jsonReader = new JsonTextReader(new StringReader(data.JSON)))
                {
                    //jsonReader.FloatParser = Util.ReadDoubleString;
                    // jsonReader.ArrayPool = pool;

                    VowpalWabbitJsonSerializer vwJsonSerializer = null;
                    try
                    {
                        vwJsonSerializer = new VowpalWabbitJsonSerializer(this.trainer.VowpalWabbit, this.trainer.ReferenceResolver);

                        vwJsonSerializer.RegisterExtension((state, property) =>
                        {
                            if (property.Equals("_eventid", StringComparison.OrdinalIgnoreCase))
                            {
                                if (!state.Reader.Read() && state.Reader.TokenType != JsonToken.String)
                                {
                                    throw new VowpalWabbitJsonException(state.Reader, "Expected string");
                                }
                                data.EventId = (string)state.Reader.Value;

                                return(true);
                            }
                            else if (property.Equals("_timestamp", StringComparison.OrdinalIgnoreCase))
                            {
                                if (!state.Reader.Read() && state.Reader.TokenType != JsonToken.Date)
                                {
                                    throw new VowpalWabbitJsonException(state.Reader, "Expected date");
                                }
                                data.Timestamp = (DateTime)state.Reader.Value;
                            }

                            return(false);
                        });

                        data.Example = vwJsonSerializer.ParseAndCreate(jsonReader);

                        if (data.Example == null)
                        {
                            // unable to create example due to missing data
                            // will be trigger later
                            vwJsonSerializer.UserContext = data.Example;
                            // make sure the serialize is not deallocated
                            vwJsonSerializer = null;
                        }
                    }
                    finally
                    {
                        if (vwJsonSerializer != null)
                        {
                            vwJsonSerializer.Dispose();
                        }
                    }

                    performanceCounters.Stage1_JSON_DeserializePerSec.Increment();

                    // delayed
                    if (data.Example == null)
                    {
                        this.performanceCounters.Feature_Requests_Pending.Increment();
                        yield break;
                    }
                }
            }
            catch (Exception ex)
            {
                this.telemetry.TrackException(ex, new Dictionary <string, string> {
                    { "JSON", data.JSON }
                });

                this.performanceCounters.Stage2_Faulty_Examples_Total.Increment();
                this.performanceCounters.Stage2_Faulty_ExamplesPerSec.Increment();

                yield break;
            }

            yield return(data);
        }