public void Validate(string[] lines, string json, IVowpalWabbitLabelComparator labelComparator = null, ILabel label = null, int? index = null, VowpalWabbitJsonExtension extension = null) { VowpalWabbitExample[] strExamples = new VowpalWabbitExample[lines.Count()]; try { for (int i = 0; i < lines.Length; i++) strExamples[i] = this.vw.ParseLine(lines[i]); using (var jsonSerializer = new VowpalWabbitJsonSerializer(this.vw)) { if (extension != null) jsonSerializer.RegisterExtension(extension); using (var jsonExample = (VowpalWabbitMultiLineExampleCollection)jsonSerializer.ParseAndCreate(json, label, index)) { var jsonExamples = new List<VowpalWabbitExample>(); if (jsonExample.SharedExample != null) jsonExamples.Add(jsonExample.SharedExample); jsonExamples.AddRange(jsonExample.Examples); Assert.AreEqual(strExamples.Length, jsonExamples.Count); for (int i = 0; i < strExamples.Length; i++) { using (var strJsonExample = this.vw.ParseLine(jsonExamples[i].VowpalWabbitString)) { var diff = strExamples[i].Diff(this.vw, jsonExamples[i], labelComparator); Assert.IsNull(diff, diff + " generated string: '" + jsonExamples[i].VowpalWabbitString + "'"); diff = strExamples[i].Diff(this.vw, strJsonExample, labelComparator); Assert.IsNull(diff, diff); } } } } } finally { foreach (var ex in strExamples) if (ex != null) ex.Dispose(); } }
public void TestJsonLabelExtraction() { using (var vw = new VowpalWabbit("--cb_adf --rank_all")) { using (var jsonSerializer = new VowpalWabbitJsonSerializer(vw)) { string eventId = null; jsonSerializer.RegisterExtension((state, property) => { Assert.AreEqual(property, "_eventid"); Assert.IsTrue(state.Reader.Read()); eventId = (string)state.Reader.Value; return true; }); jsonSerializer.Parse("{\"_eventid\":\"abc123\",\"a\":1,\"_label_cost\":-1,\"_label_probability\":0.3}"); Assert.AreEqual("abc123", eventId); using (var examples = jsonSerializer.CreateExamples()) { var single = examples as VowpalWabbitSingleLineExampleCollection; Assert.IsNotNull(single); var label = single.Example.Label as ContextualBanditLabel; Assert.IsNotNull(label); Assert.AreEqual(-1, label.Cost); Assert.AreEqual(0.3, label.Probability, 0.0001); } } using (var jsonSerializer = new VowpalWabbitJsonSerializer(vw)) { jsonSerializer.Parse("{\"_multi\":[{\"_text\":\"w1 w2\", \"a\":{\"x\":1}}, {\"_text\":\"w2 w3\"}], \"_labelindex\":1, \"_label_cost\":-1, \"_label_probability\":0.3}"); using (var examples = jsonSerializer.CreateExamples()) { var multi = examples as VowpalWabbitMultiLineExampleCollection; Assert.IsNotNull(multi); Assert.AreEqual(2, multi.Examples.Length); var label = multi.Examples[0].Label as ContextualBanditLabel; Assert.AreEqual(0, label.Cost); Assert.AreEqual(0, label.Probability); label = multi.Examples[1].Label as ContextualBanditLabel; Assert.IsNotNull(label); Assert.AreEqual(-1, label.Cost); Assert.AreEqual(0.3, label.Probability, 0.0001); } } } }
private IEnumerable<PipelineData> Stage1_Deserialize(PipelineData data) { try { using (var jsonReader = new JsonTextReader(new StringReader(data.JSON))) { //jsonReader.FloatParser = Util.ReadDoubleString; // jsonReader.ArrayPool = pool; VowpalWabbitJsonSerializer vwJsonSerializer = null; try { vwJsonSerializer = new VowpalWabbitJsonSerializer(this.trainer.VowpalWabbit, this.trainer.ReferenceResolver); vwJsonSerializer.RegisterExtension((state, property) => { if (property.Equals("_eventid", StringComparison.OrdinalIgnoreCase)) { if (!state.Reader.Read() && state.Reader.TokenType != JsonToken.String) throw new VowpalWabbitJsonException(state.Reader, "Expected string"); data.EventId = (string)state.Reader.Value; return true; } else if (property.Equals("_timestamp", StringComparison.OrdinalIgnoreCase)) { if (!state.Reader.Read() && state.Reader.TokenType != JsonToken.Date) throw new VowpalWabbitJsonException(state.Reader, "Expected date"); data.Timestamp = (DateTime)state.Reader.Value; } return false; }); data.Example = vwJsonSerializer.ParseAndCreate(jsonReader); if (data.Example == null) { // unable to create example due to missing data // will be trigger later vwJsonSerializer.UserContext = data.Example; // make sure the serialize is not deallocated vwJsonSerializer = null; } } finally { if (vwJsonSerializer != null) vwJsonSerializer.Dispose(); } performanceCounters.Stage1_JSON_DeserializePerSec.Increment(); // delayed if (data.Example == null) { this.performanceCounters.Feature_Requests_Pending.Increment(); yield break; } } } catch (Exception ex) { this.telemetry.TrackException(ex, new Dictionary<string, string> { { "JSON", data.JSON } }); this.performanceCounters.Stage2_Faulty_Examples_Total.Increment(); this.performanceCounters.Stage2_Faulty_ExamplesPerSec.Increment(); yield break; } yield return data; }