public async Task <HttpResponseMessage> Post() { var header = this.Request.Headers.SingleOrDefault(x => x.Key == "Authorization"); if (header.Value == null) { throw new UnauthorizedAccessException("AuthorizationToken missing"); } var userToken = header.Value.First(); if (string.IsNullOrWhiteSpace(userToken) || userToken != ConfigurationManager.AppSettings["UserToken"]) { return(Request.CreateResponse(HttpStatusCode.Unauthorized)); } if (this.metaData == null || lastDownload + TimeSpan.FromMinutes(1) < DateTime.Now) { var url = ConfigurationManager.AppSettings["DecisionServiceSettingsUrl"]; this.metaData = ApplicationMetadataUtil.DownloadMetadata <ApplicationClientMetadata>(url); lastDownload = DateTime.Now; } using (var vw = new VowpalWabbit(new VowpalWabbitSettings(metaData.TrainArguments) { EnableStringExampleGeneration = true, EnableStringFloatCompact = true })) using (var serializer = new VowpalWabbitJsonSerializer(vw)) using (var example = serializer.ParseAndCreate(new JsonTextReader(new StreamReader(await Request.Content.ReadAsStreamAsync())))) { return(Request.CreateResponse(HttpStatusCode.OK, example.VowpalWabbitString)); } }
public ActionResult Validate() { try { APIUtil.Authenticate(this.Request); if (this.metaData == null || lastDownload + TimeSpan.FromMinutes(1) < DateTime.Now) { var url = APIUtil.GetSettingsUrl(); this.metaData = ApplicationMetadataUtil.DownloadMetadata <ApplicationClientMetadata>(url); lastDownload = DateTime.Now; } var context = APIUtil.ReadBody(this.Request); using (var vw = new VowpalWabbit(new VowpalWabbitSettings(metaData.TrainArguments) { EnableStringExampleGeneration = true, EnableStringFloatCompact = true })) using (var serializer = new VowpalWabbitJsonSerializer(vw)) using (var example = serializer.ParseAndCreate(context)) { return(Json(new { VWExample = example.VowpalWabbitString })); } } catch (UnauthorizedAccessException ex) { return(new HttpStatusCodeResult(HttpStatusCode.Unauthorized, ex.Message)); } catch (Exception ex) { new TelemetryClient().TrackException(ex); return(new HttpStatusCodeResult(HttpStatusCode.InternalServerError, ex.ToString())); } }
public void TestJsonDict() { var vec = new float[] { 1, 2, 3 }; var jsonResolver = new RefResolve(); var settings = new JsonSerializerSettings { ReferenceResolverProvider = () => jsonResolver }; var ctx1 = new Context(vec, 1, settings); var ctx2 = new Context(vec, 2, settings); using (var vw = new VowpalWabbit(new VowpalWabbitSettings { EnableStringExampleGeneration = true })) using (var resolver = new VowpalWabbitJsonReferenceResolver(serializer => Assert.Fail())) using (var serializer1 = new VowpalWabbitJsonSerializer(vw, resolver)) using (var example1 = serializer1.ParseAndCreate(ctx1.JSON)) using (var serializer2 = new VowpalWabbitJsonSerializer(vw, resolver)) using (var example2 = serializer2.ParseAndCreate(ctx2.JSON)) using (var validator = new VowpalWabbitExampleJsonValidator()) { validator.Validate("| Id:1 :1 :2 :3", example1); validator.Validate(ctx1.VW, example1); validator.Validate("| Id:2 :1 :2 :3", example2); validator.Validate(ctx2.VW, example2); } }
protected override PolicyDecision <ActionProbability[]> MapContext(VowpalWabbit vw, string context) { using (var vwJson = new VowpalWabbitJsonSerializer(vw)) using (VowpalWabbitExampleCollection vwExample = vwJson.ParseAndCreate(context)) { if (this.developmentMode) { Trace.TraceInformation("Example Context: '{0}'", vwExample.VowpalWabbitString); } var vwPredictions = vwExample.Predict(VowpalWabbitPredictionType.ActionProbabilities); // VW multi-label predictions are 0-based var ap = vwPredictions .Select(a => new ActionProbability { Action = (int)(a.Action + 1), Probability = a.Score }) .ToArray(); var state = new VWState { ModelId = vw.Native.ID }; return(PolicyDecision.Create(ap, state)); } }
/// <summary> /// Learns from the given example. /// </summary> /// <param name="reader">The example to learn.</param> /// <param name="label"> /// Optional label, taking precedence over "_label" property found in <paramref name="reader"/>. /// If null, <paramref name="reader"/> will be inspected and the "_label" property used as label. /// </param> /// <param name="index">Optional index of example the given label should be applied for multi-line examples.</param> public void Learn(JsonReader reader, ILabel label = null, int?index = null) { using (var serializer = new VowpalWabbitJsonSerializer(vw)) using (var result = serializer.ParseAndCreate(reader, label, index)) { result.Learn(); } }
/// <summary> /// Predicts for the given example. /// </summary> /// <typeparam name="TPrediction">The prediction type.</typeparam> /// <param name="reader">The example to predict for.</param> /// <param name="predictionFactory">The prediction factory to be used. See <see cref="VowpalWabbitPredictionType"/>.</param> /// <param name="label"> /// Optional label, taking precedence over "_label" property found in <paramref name="reader"/>. /// If null, <paramref name="reader"/> will be inspected and the "_label" property used as label. /// </param> /// <param name="index">Optional index of example the given label should be applied for multi-line examples.</param> public TPrediction Predict <TPrediction>(JsonReader reader, IVowpalWabbitPredictionFactory <TPrediction> predictionFactory, ILabel label = null, int?index = null) { using (var serializer = new VowpalWabbitJsonSerializer(vw)) using (var result = serializer.ParseAndCreate(reader, label, index)) { return(result.Predict(predictionFactory)); } }
/// <summary> /// Learn from the given example and return the current prediction for it. /// </summary> /// <typeparam name="TPrediction">The prediction type.</typeparam> /// <param name="json">The example to learn.</param> /// <param name="predictionFactory">The prediction factory to be used. See <see cref="VowpalWabbitPredictionType"/>.</param> /// <param name="label"> /// Optional label, taking precedence over "_label" property found in <paramref name="json"/>. /// If null, <paramref name="json"/> will be inspected and the "_label" property used as label. /// </param> /// <param name="index">Optional index of example the given label should be applied for multi-line examples.</param> /// <returns>The prediction for the given <paramref name="json"/>.</returns> public TPrediction Learn <TPrediction>(string json, IVowpalWabbitPredictionFactory <TPrediction> predictionFactory, ILabel label = null, int?index = null) { using (var serializer = new VowpalWabbitJsonSerializer(vw)) using (var result = serializer.ParseAndCreate(json, label, index)) { return(result.Learn(predictionFactory)); } }
public void Validate(string line, string json, IVowpalWabbitLabelComparator labelComparator = null, ILabel label = null) { using (var jsonSerializer = new VowpalWabbitJsonSerializer(this.vw)) using (var jsonExample = jsonSerializer.ParseAndCreate(json, label)) { this.Validate(line, jsonExample, labelComparator, label); } }
public void Validate(string[] lines, JsonReader jsonReader, IVowpalWabbitLabelComparator labelComparator = null, ILabel label = null, int?index = null, VowpalWabbitJsonExtension extension = null) { VowpalWabbitExample[] strExamples = new VowpalWabbitExample[lines.Count()]; try { for (int i = 0; i < lines.Length; i++) { strExamples[i] = this.vw.ParseLine(lines[i]); } using (var jsonSerializer = new VowpalWabbitJsonSerializer(this.vw)) { if (extension != null) { jsonSerializer.RegisterExtension(extension); // extension are not supported with native JSON parsing } using (var jsonExample = (VowpalWabbitMultiLineExampleCollection)jsonSerializer.ParseAndCreate(jsonReader, label, index)) { var jsonExamples = new List <VowpalWabbitExample>(); if (jsonExample.SharedExample != null) { jsonExamples.Add(jsonExample.SharedExample); } jsonExamples.AddRange(jsonExample.Examples); Assert.AreEqual(strExamples.Length, jsonExamples.Count); for (int i = 0; i < strExamples.Length; i++) { using (var strJsonExample = this.vw.ParseLine(jsonExamples[i].VowpalWabbitString)) { var diff = strExamples[i].Diff(this.vw, jsonExamples[i], labelComparator); Assert.IsNull(diff, diff + " generated string: '" + jsonExamples[i].VowpalWabbitString + "'"); diff = strExamples[i].Diff(this.vw, strJsonExample, labelComparator); Assert.IsNull(diff, diff); } } } } } finally { foreach (var ex in strExamples) { if (ex != null) { ex.Dispose(); } } } }
/// <summary> /// Predicts for the given example. /// </summary> /// <typeparam name="TPrediction">The prediction type.</typeparam> /// <param name="json">The example to predict for.</param> /// <param name="predictionFactory">The prediction factory to be used. See <see cref="VowpalWabbitPredictionType"/>.</param> /// <param name="label"> /// Optional label, taking precedence over "_label" property found in <paramref name="json"/>. /// If null, <paramref name="json"/> will be inspected and the "_label" property used as label. /// </param> /// <param name="index">Optional index of example the given label should be applied for multi-line examples.</param> public TPrediction Predict <TPrediction>(string json, IVowpalWabbitPredictionFactory <TPrediction> predictionFactory, ILabel label = null, int?index = null) { Contract.Requires(json != null); using (var serializer = new VowpalWabbitJsonSerializer(vw)) using (var result = serializer.ParseAndCreate(json, label, index)) { return(result.Predict(predictionFactory)); } }
/// <summary> /// Predicts for the given example. /// </summary> /// <param name="reader">The example to predict for.</param> /// <param name="label"> /// Optional label, taking precedence over "_label" property found in <paramref name="reader"/>. /// If null, <paramref name="reader"/> will be inspected and the "_label" property used as label. /// </param> /// <param name="index">Optional index of example the given label should be applied for multi-line examples.</param> public void Predict(JsonReader reader, ILabel label = null, int?index = null) { Contract.Requires(reader != null); using (var serializer = new VowpalWabbitJsonSerializer(vw)) using (var result = serializer.ParseAndCreate(reader, label, index)) { result.Predict(); } }
public void TestJsonFeatureExtraction() { string json = "{\"ns1\":{\"location\":\"New York\", \"f2\":3.4}}"; using (var vw = new VowpalWabbit("-b 3 --noconstant")) using (var serializer = new VowpalWabbitJsonSerializer(vw)) using (var result = serializer.ParseAndCreate(json)) { var singleExample = result as VowpalWabbitSingleLineExampleCollection; Assert.IsNotNull(singleExample); if (singleExample != null) { foreach (var ns in singleExample.Example) { Console.WriteLine(ns.Index); foreach (var feature in ns) { Console.WriteLine("{0}:{1}", feature.FeatureIndex, feature.X); } } var ns1 = singleExample.Example.ToArray(); Assert.AreEqual(1, ns1.Length); Assert.AreEqual((byte)'n', ns1[0].Index); CollectionAssert.AreEqual( new[] { new VowpalWabbitFeature(singleExample.Example, 1, 12), new VowpalWabbitFeature(singleExample.Example, 3.4f, 28) }, ns1[0].ToArray()); } // for documentation purpose only var multiExample = result as VowpalWabbitMultiLineExampleCollection; Assert.IsNull(multiExample); if (multiExample != null) { foreach (var example in multiExample.Examples) { foreach (var ns in example) { Console.WriteLine(ns.Index); foreach (var feature in ns) { Console.WriteLine("{0}:{1}", feature.FeatureIndex, feature.X); } } } } } }
/// <summary> /// Determines the action to take for a given context. /// This implementation should be thread-safe if multithreading is needed. /// </summary> /// <param name="vw">The Vowpal Wabbit instance to use.</param> /// <param name="context">A user-defined context for the decision.</param> /// <returns>A decision tuple containing the index of the action to take (1-based), and the Id of the model or policy used to make the decision. /// Can be null if the Policy is not ready yet (e.g. model not loaded).</returns> protected override PolicyDecision <int> MapContext(VowpalWabbit vw, string context) { using (var vwJson = new VowpalWabbitJsonSerializer(vw)) using (VowpalWabbitExampleCollection vwExample = vwJson.ParseAndCreate(context)) { var action = (int)vwExample.Predict(VowpalWabbitPredictionType.CostSensitive); var state = new VWState { ModelId = vw.ID }; return(PolicyDecision.Create(action, state)); } }
/// <summary> /// Determines the action to take for a given context. /// This implementation should be thread-safe if multithreading is needed. /// </summary> /// <param name="vw">The Vowpal Wabbit instance to use.</param> /// <param name="context">A user-defined context for the decision.</param> /// <returns>A decision tuple containing the index of the action to take (1-based), and the Id of the model or policy used to make the decision. /// Can be null if the Policy is not ready yet (e.g. model not loaded).</returns> protected override PolicyDecision <int[]> MapContext(VowpalWabbit vw, string context) { using (var vwJson = new VowpalWabbitJsonSerializer(vw)) using (VowpalWabbitExampleCollection vwExample = vwJson.ParseAndCreate(context)) { ActionScore[] vwMultilabelPredictions = vwExample.Predict(VowpalWabbitPredictionType.ActionProbabilities); // VW multi-label predictions are 0-based var actions = vwMultilabelPredictions.Select(a => (int)a.Action + 1).ToArray(); var state = new VWState { ModelId = vw.ID }; return(PolicyDecision.Create(actions, state)); } }
private void updateModelMaybe() { if (sinceLastUpdate >= ModelUpdateInterval) { // Locking at this level ensures a batch of events is processed completely before // the next batch (finer locking would allow interleaving, violating timeorder lock (this.vwLock) { // Exit gracefully if the object has been disposed if (vwDisposed) { return; } foreach (var dp in log.FlushCompleteEvents()) { uint action = (uint)((int[])dp.InteractData.Value)[0]; var label = new ContextualBanditLabel(action, -dp.Reward, ((GenericTopSlotExplorerState)dp.InteractData.ExplorerState).Probabilities[0]); // String (json) contexts need to be handled specially, since the C# interface // does not currently handle the CB label properly if (typeof(TContext) == typeof(string)) { // Manually insert the CB label fields into the context string labelStr = string.Format(CultureInfo.InvariantCulture, "\"_label_Action\":{0},\"_label_Cost\":{1},\"_label_Probability\":{2},\"_labelIndex\":{3},", label.Action, label.Cost, label.Probability, label.Action - 1); string context = ((string)dp.InteractData.Context).Insert(1, labelStr); using (var vwSerializer = new VowpalWabbitJsonSerializer(vwJson.Native)) using (VowpalWabbitExampleCollection vwExample = vwSerializer.ParseAndCreate(context)) { vwExample.Learn(); } } else { vw.Learn((TContext)dp.InteractData.Context, label, index: (int)label.Action - 1); } } using (MemoryStream currModel = new MemoryStream()) { VowpalWabbit vwNative = (typeof(TContext) == typeof(string)) ? vwJson.Native : vw.Native; vwNative.SaveModel(currModel); currModel.Position = 0; this.UpdateModel(currModel); sinceLastUpdate = 0; } } } }
public void Validate(string line, string json, IVowpalWabbitLabelComparator labelComparator = null, ILabel label = null, bool enableNativeJsonValidation = true) { using (var jsonSerializer = new VowpalWabbitJsonSerializer(this.vw)) using (var jsonExample = jsonSerializer.ParseAndCreate(json, label)) { this.Validate(line, jsonExample, labelComparator, label); if (enableNativeJsonValidation) { var examples = this.vw.ParseJson(json); Assert.AreEqual(1, examples.Count); using (var jsonNativeExample = new VowpalWabbitSingleLineExampleCollection(this.vw, examples[0])) { this.Validate(line, jsonNativeExample, labelComparator, label, validateVowpalWabbitString: false); } } } }
public override void WriteLine(string value) { using (var jsonSerializer = new VowpalWabbitJsonSerializer(vw)) using (var example = jsonSerializer.ParseAndCreate(value)) { if (example == null) { throw new InvalidDataException($"Invalid example: {value}"); } var str = example.VowpalWabbitString; if (example is VowpalWabbitMultiLineExampleCollection) { str += "\n"; } base.WriteLine(str); } }
public void TestJsonDictReverse() { var vec = new float[] { 1, 2, 3 }; var jsonResolver = new RefResolve(); var settings = new JsonSerializerSettings { ReferenceResolverProvider = () => jsonResolver }; var ctx1 = new Context(vec, 1, settings); var ctx2 = new Context(vec, 2, settings); VowpalWabbitJsonSerializer delayedSerializer = null; using (var validator = new VowpalWabbitExampleJsonValidator()) using (var vw = new VowpalWabbit(new VowpalWabbitSettings { EnableStringExampleGeneration = true })) using (var resolver = new VowpalWabbitJsonReferenceResolver(serializer => delayedSerializer = serializer)) { var serializer2 = new VowpalWabbitJsonSerializer(vw, resolver); var example2 = serializer2.ParseAndCreate(ctx2.JSON); // incomplete data Assert.IsNull(example2); // triggers example2 completion using (var serializer1 = new VowpalWabbitJsonSerializer(vw, resolver)) using (var example1 = serializer1.ParseAndCreate(ctx1.JSON)) { validator.Validate("| Id:1 :1 :2 :3", example1); } Assert.IsNotNull(delayedSerializer); using (var delayedExample2 = delayedSerializer.CreateExamples()) { validator.Validate("| Id:2 :1 :2 :3", delayedExample2); } delayedSerializer.Dispose(); } }
public void Validate(string line, TExample example, ILabel label = null) { IVowpalWabbitLabelComparator comparator; if (label == null || label == SharedLabel.Instance) { comparator = null; } else if (label is SimpleLabel) { comparator = VowpalWabbitLabelComparator.Simple; } else if (label is ContextualBanditLabel) { comparator = VowpalWabbitLabelComparator.ContextualBandit; } else { throw new ArgumentException("Label type not supported: " + label.GetType()); } using (var context = new VowpalWabbitMarshalContext(this.vw.Native)) using (var contextNative = new VowpalWabbitMarshalContext(this.vwNative.Native)) { // validate string serializer this.serializer(context, example, label); this.serializerNative(contextNative, example, label); // natively parsed string example compared against: // (1) natively build example // (2) string serialized & natively parsed string example using (var strExample = this.vw.Native.ParseLine(line)) using (var strConvertedExample = this.vw.Native.ParseLine(context.StringExample.ToString())) using (var nativeExample = contextNative.ExampleBuilder.CreateExample()) using (var nativeExampleWithString = this.factorySerializer.Serialize(example, label)) { var diff = strExample.Diff(this.vw.Native, strConvertedExample, comparator); Assert.IsNull(diff, diff + " generated string: '" + context.StringExample + "'"); diff = strExample.Diff(this.vw.Native, nativeExample, comparator); Assert.IsNull(diff, diff); if (!strExample.IsNewLine) { Assert.IsFalse(string.IsNullOrEmpty(nativeExampleWithString.VowpalWabbitString)); Assert.IsFalse(string.IsNullOrEmpty(this.factorySerializer.SerializeToString(example, label))); } if (this.vw.Native.Settings.FeatureDiscovery == VowpalWabbitFeatureDiscovery.Json) { var jsonStr = JsonConvert.SerializeObject(example); using (var jsonSerializer = new VowpalWabbitJsonSerializer(this.vw.Native)) { using (var jsonExample = jsonSerializer.ParseAndCreate(jsonStr, label)) { var ex = ((VowpalWabbitSingleLineExampleCollection)jsonExample).Example; diff = strExample.Diff(this.vw.Native, ex, comparator); Assert.IsNull(diff, diff + "\njson: '" + jsonStr + "'"); } } } } } }
private IEnumerable <PipelineData> Stage1_Deserialize(PipelineData data) { try { using (var jsonReader = new JsonTextReader(new StringReader(data.JSON))) { //jsonReader.FloatParser = Util.ReadDoubleString; // jsonReader.ArrayPool = pool; VowpalWabbitJsonSerializer vwJsonSerializer = null; try { vwJsonSerializer = new VowpalWabbitJsonSerializer(this.trainer.VowpalWabbit, this.trainer.ReferenceResolver); vwJsonSerializer.RegisterExtension((state, property) => { if (TryExtractProperty(state, property, "_eventid", JsonToken.String, reader => data.EventId = (string)reader.Value)) { return(true); } else if (TryExtractProperty(state, property, "_timestamp", JsonToken.Date, reader => data.Timestamp = (DateTime)reader.Value)) { return(true); } else if (TryExtractProperty(state, property, "_ProbabilityOfDrop", JsonToken.Float, reader => data.ProbabilityOfDrop = (float)(reader.Value ?? 0f))) { return(true); } else if (TryExtractArrayProperty <float>(state, property, "_p", arr => data.Probabilities = arr)) { return(true); } else if (TryExtractArrayProperty <int>(state, property, "_a", arr => data.Actions = arr)) { return(true); } return(false); }); data.Example = vwJsonSerializer.ParseAndCreate(jsonReader); if (data.Probabilities == null) { throw new ArgumentNullException("Missing probabilities (_p)"); } if (data.Actions == null) { throw new ArgumentNullException("Missing actions (_a)"); } if (data.Example == null) { // unable to create example due to missing data // will be trigger later vwJsonSerializer.UserContext = data.Example; // make sure the serialize is not deallocated vwJsonSerializer = null; } } finally { if (vwJsonSerializer != null) { vwJsonSerializer.Dispose(); } } performanceCounters.Stage1_JSON_DeserializePerSec.Increment(); // delayed if (data.Example == null) { this.performanceCounters.Feature_Requests_Pending.Increment(); yield break; } } } catch (Exception ex) { this.telemetry.TrackException(ex, new Dictionary <string, string> { { "JSON", data.JSON } }); this.performanceCounters.Stage2_Faulty_Examples_Total.Increment(); this.performanceCounters.Stage2_Faulty_ExamplesPerSec.Increment(); yield break; } yield return(data); }
public void Validate(string line, string json, IVowpalWabbitLabelComparator labelComparator = null, ILabel label = null) { using (var strExample = this.vw.ParseLine(line)) using (var jsonSerializer = new VowpalWabbitJsonSerializer(this.vw)) using (var jsonExample = (VowpalWabbitSingleLineExampleCollection)jsonSerializer.ParseAndCreate(json, label)) using (var strJsonExample = this.vw.ParseLine(jsonExample.Example.VowpalWabbitString)) { var diff = strExample.Diff(this.vw, jsonExample.Example, labelComparator); Assert.IsNull(diff, diff + " generated string: '" + jsonExample.VowpalWabbitString + "'"); diff = strExample.Diff(this.vw, strJsonExample, labelComparator); Assert.IsNull(diff, diff); } }
//private class Event //{ // internal VowpalWabbitExampleCollection Example; // internal string Line; // internal int LineNr; // internal ActionScore[] Prediction; //} /// <summary> /// Train VW on offline data. /// </summary> /// <param name="arguments">Base arguments.</param> /// <param name="inputFile">Path to input file.</param> /// <param name="predictionFile">Name of the output prediction file.</param> /// <param name="reloadInterval">The TimeSpan interval to reload model.</param> /// <param name="learningRate"> /// Learning rate must be specified here otherwise on Reload it will be reset. /// </param> /// <param name="cacheFilePrefix"> /// The prefix of the cache file name to use. For example: prefix = "test" => "test.vw.cache" /// If none or null, the input file name is used, e.g. "input.dataset" => "input.vw.cache" /// !!! IMPORTANT !!!: Always use a new cache name if a different dataset or reload interval is used. /// </param> /// <remarks> /// Both learning rates and cache file are added to initial training arguments as well as Reload arguments. /// </remarks> public static void Train(string arguments, string inputFile, string predictionFile = null, TimeSpan?reloadInterval = null, float?learningRate = null, string cacheFilePrefix = null) { var learningArgs = learningRate == null ? string.Empty : $" -l {learningRate}"; int cacheIndex = 0; var cacheArgs = (Func <int, string>)(i => $" --cache_file {cacheFilePrefix ?? Path.GetFileNameWithoutExtension(inputFile)}-{i}.vw.cache"); using (var reader = new StreamReader(inputFile)) using (var prediction = new StreamWriter(predictionFile ?? inputFile + ".prediction")) using (var vw = new VowpalWabbit(new VowpalWabbitSettings(arguments + learningArgs + cacheArgs(cacheIndex++)) { Verbose = true })) { string line; int lineNr = 0; int invalidExamples = 0; DateTime?lastTimestamp = null; while ((line = reader.ReadLine()) != null) { try { bool reload = false; using (var jsonSerializer = new VowpalWabbitJsonSerializer(vw)) { if (reloadInterval != null) { jsonSerializer.RegisterExtension((state, property) => { if (property.Equals("_timestamp", StringComparison.Ordinal)) { var eventTimestamp = state.Reader.ReadAsDateTime(); if (lastTimestamp == null) { lastTimestamp = eventTimestamp; } else if (lastTimestamp + reloadInterval < eventTimestamp) { reload = true; lastTimestamp = eventTimestamp; } return(true); } return(false); }); } // var pred = vw.Learn(line, VowpalWabbitPredictionType.ActionScore); using (var example = jsonSerializer.ParseAndCreate(line)) { var pred = example.Learn(VowpalWabbitPredictionType.ActionScore); prediction.WriteLine(JsonConvert.SerializeObject( new { nr = lineNr, @as = pred.Select(x => x.Action), p = pred.Select(x => x.Score) })); } if (reload) { vw.Reload(learningArgs + cacheArgs(cacheIndex++)); } } } catch (Exception) { invalidExamples++; } lineNr++; } } // memory leak and not much gain below... //using (var vw = new VowpalWabbit(new VowpalWabbitSettings(arguments) //{ // Verbose = true, // EnableThreadSafeExamplePooling = true, // MaxExamples = 1024 //})) //using (var reader = new StreamReader(inputFile)) //using (var prediction = new StreamWriter(inputFile + ".prediction")) //{ // int invalidExamples = 0; // var deserializeBlock = new TransformBlock<Event, Event>( // evt => // { // try // { // using (var vwJsonSerializer = new VowpalWabbitJsonSerializer(vw)) // { // evt.Example = vwJsonSerializer.ParseAndCreate(evt.Line); // } // // reclaim memory // evt.Line = null; // return evt; // } // catch (Exception) // { // Interlocked.Increment(ref invalidExamples); // return null; // } // }, // new ExecutionDataflowBlockOptions // { // BoundedCapacity = 16, // MaxDegreeOfParallelism = 8 // TODO: parameterize // }); // var learnBlock = new TransformBlock<Event, Event>( // evt => // { // evt.Prediction = evt.Example.Learn(VowpalWabbitPredictionType.ActionScore); // evt.Example.Dispose(); // return evt; // }, // new ExecutionDataflowBlockOptions // { // BoundedCapacity = 64, // MaxDegreeOfParallelism = 1 // }); // var predictionBlock = new ActionBlock<Event>( // evt => prediction.WriteLine(evt.LineNr + " " + string.Join(",", evt.Prediction.Select(a_s => $"{a_s.Action}:{a_s.Score}"))), // new ExecutionDataflowBlockOptions // { // BoundedCapacity = 16, // MaxDegreeOfParallelism = 1 // }); // var input = deserializeBlock.AsObserver(); // deserializeBlock.LinkTo(learnBlock, new DataflowLinkOptions { PropagateCompletion = true }, evt => evt != null); // deserializeBlock.LinkTo(DataflowBlock.NullTarget<object>()); // learnBlock.LinkTo(predictionBlock, new DataflowLinkOptions { PropagateCompletion = true }); // string line; // int lineNr = 0; // while ((line = reader.ReadLine()) != null) // input.OnNext(new Event { Line = line, LineNr = lineNr++ }); // input.OnCompleted(); // predictionBlock.Completion.Wait(); //Console.WriteLine($"Examples {lineNr}. Invalid: {invalidExamples}"); //} }
public void TestJsonDictThreading() { var jsonResolver = new RefResolve(); var settings = new JsonSerializerSettings { ReferenceResolverProvider = () => jsonResolver }; var rnd = new Random(123); var examples = new List <Context>(); var id = 0; // different reference objects for (int i = 0; i < 10; i++) { var data = Enumerable.Range(1, 5).Select(_ => (float)rnd.Next(10)).ToArray(); // referencing the same data for (int j = 0; j < 5; j++) { examples.Add(new Context(data, id++, settings)); } } for (int i = 0; i < 4; i++) { Permute(examples, rnd); for (int maxDegreeOfParallelism = 1; maxDegreeOfParallelism < 4; maxDegreeOfParallelism++) { var examplesFound = 0; using (var vw = new VowpalWabbit(new VowpalWabbitSettings { EnableStringExampleGeneration = true, EnableThreadSafeExamplePooling = true })) using (var resolver = new VowpalWabbitJsonReferenceResolver(serializer => { using (var example = serializer.CreateExamples()) { ValidateExample(example, (Context)serializer.UserContext); } serializer.Dispose(); Interlocked.Increment(ref examplesFound); })) { Parallel.ForEach( Partitioner.Create(0, examples.Count), new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism }, range => { for (int j = range.Item1; j < range.Item2; j++) { var ctx = examples[j]; var serializer = new VowpalWabbitJsonSerializer(vw, resolver) { UserContext = ctx }; var example = serializer.ParseAndCreate(ctx.JSON); // example not ready yet if (example == null) { continue; } ValidateExample(example, ctx); example.Dispose(); serializer.Dispose(); Interlocked.Increment(ref examplesFound); } }); } Assert.AreEqual(examples.Count, examplesFound); } } }
public static void Convert(StreamReader reader, StreamWriter writer) { var line = reader.ReadLine(); if (line == null) { return; } var jExample = JObject.Parse(line); var settings = jExample.Properties().Any(p => p.Name == "_multi") ? "--cb_explore_adf" : "--cb_explore"; int lineNr = 1; using (var vw = new VowpalWabbit(new VowpalWabbitSettings(settings) { EnableStringExampleGeneration = true, EnableStringFloatCompact = true, EnableThreadSafeExamplePooling = true })) { var serializeBlock = new TransformBlock <Tuple <string, int>, string>(l => { using (var jsonSerializer = new VowpalWabbitJsonSerializer(vw)) using (var example = jsonSerializer.ParseAndCreate(l.Item1)) { if (example == null) { throw new InvalidDataException($"Invalid example in line {l.Item2}: '{l.Item1}'"); } var str = example.VowpalWabbitString; if (example is VowpalWabbitMultiLineExampleCollection) { str += "\n"; } return(str); } }, new ExecutionDataflowBlockOptions { BoundedCapacity = 1024, MaxDegreeOfParallelism = 8 }); var writeBlock = new ActionBlock <string>( l => writer.WriteLine(l), new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 1, BoundedCapacity = 128 }); serializeBlock.LinkTo(writeBlock, new DataflowLinkOptions { PropagateCompletion = true }); var input = serializeBlock.AsObserver(); do { input.OnNext(Tuple.Create(line, lineNr)); lineNr++; } while ((line = reader.ReadLine()) != null); input.OnCompleted(); serializeBlock.Completion.Wait(); } }
private IEnumerable <PipelineData> Stage1_Deserialize(PipelineData data) { try { using (var jsonReader = new JsonTextReader(new StringReader(data.JSON))) { //jsonReader.FloatParser = Util.ReadDoubleString; // jsonReader.ArrayPool = pool; VowpalWabbitJsonSerializer vwJsonSerializer = null; try { vwJsonSerializer = new VowpalWabbitJsonSerializer(this.trainer.VowpalWabbit, this.trainer.ReferenceResolver); vwJsonSerializer.RegisterExtension((state, property) => { if (property.Equals("_eventid", StringComparison.OrdinalIgnoreCase)) { if (!state.Reader.Read() && state.Reader.TokenType != JsonToken.String) { throw new VowpalWabbitJsonException(state.Reader, "Expected string"); } data.EventId = (string)state.Reader.Value; return(true); } else if (property.Equals("_timestamp", StringComparison.OrdinalIgnoreCase)) { if (!state.Reader.Read() && state.Reader.TokenType != JsonToken.Date) { throw new VowpalWabbitJsonException(state.Reader, "Expected date"); } data.Timestamp = (DateTime)state.Reader.Value; } return(false); }); data.Example = vwJsonSerializer.ParseAndCreate(jsonReader); if (data.Example == null) { // unable to create example due to missing data // will be trigger later vwJsonSerializer.UserContext = data.Example; // make sure the serialize is not deallocated vwJsonSerializer = null; } } finally { if (vwJsonSerializer != null) { vwJsonSerializer.Dispose(); } } performanceCounters.Stage1_JSON_DeserializePerSec.Increment(); // delayed if (data.Example == null) { this.performanceCounters.Feature_Requests_Pending.Increment(); yield break; } } } catch (Exception ex) { this.telemetry.TrackException(ex, new Dictionary <string, string> { { "JSON", data.JSON } }); this.performanceCounters.Stage2_Faulty_Examples_Total.Increment(); this.performanceCounters.Stage2_Faulty_ExamplesPerSec.Increment(); yield break; } yield return(data); }