private IEnumerable<PipelineData> Stage1_Deserialize(PipelineData data) { try { using (var jsonReader = new JsonTextReader(new StringReader(data.JSON))) { //jsonReader.FloatParser = Util.ReadDoubleString; // jsonReader.ArrayPool = pool; VowpalWabbitJsonSerializer vwJsonSerializer = null; try { vwJsonSerializer = new VowpalWabbitJsonSerializer(this.trainer.VowpalWabbit, this.trainer.ReferenceResolver); vwJsonSerializer.RegisterExtension((state, property) => { if (property.Equals("_eventid", StringComparison.OrdinalIgnoreCase)) { if (!state.Reader.Read() && state.Reader.TokenType != JsonToken.String) throw new VowpalWabbitJsonException(state.Reader, "Expected string"); data.EventId = (string)state.Reader.Value; return true; } else if (property.Equals("_timestamp", StringComparison.OrdinalIgnoreCase)) { if (!state.Reader.Read() && state.Reader.TokenType != JsonToken.Date) throw new VowpalWabbitJsonException(state.Reader, "Expected date"); data.Timestamp = (DateTime)state.Reader.Value; } return false; }); data.Example = vwJsonSerializer.ParseAndCreate(jsonReader); if (data.Example == null) { // unable to create example due to missing data // will be trigger later vwJsonSerializer.UserContext = data.Example; // make sure the serialize is not deallocated vwJsonSerializer = null; } } finally { if (vwJsonSerializer != null) vwJsonSerializer.Dispose(); } performanceCounters.Stage1_JSON_DeserializePerSec.Increment(); // delayed if (data.Example == null) { this.performanceCounters.Feature_Requests_Pending.Increment(); yield break; } } } catch (Exception ex) { this.telemetry.TrackException(ex, new Dictionary<string, string> { { "JSON", data.JSON } }); this.performanceCounters.Stage2_Faulty_Examples_Total.Increment(); this.performanceCounters.Stage2_Faulty_ExamplesPerSec.Increment(); yield break; } yield return data; }
public TrainerResult Learn(PipelineData example) { try { if (this.settings.EnableExampleTracing) this.telemetry.TrackTrace( "Example", SeverityLevel.Verbose, new Dictionary<string, string> { { "ID", example.EventId }, { "VW", example.Example.VowpalWabbitString }, { "JSON", example.JSON } }); var label = example.Example.Labels .OfType<ContextualBanditLabel>() .FirstOrDefault(l => l.Probability != 0f || l.Cost != 0); if (label == null) this.telemetry.TrackTrace($"Unable to find valid label for event '{example.EventId}'", SeverityLevel.Warning); var progressivePrediction = example.Example.Learn(VowpalWabbitPredictionType.ActionScore, this.vw); //if (this.vwAllReduce != null) //{ // this.vwAllReduce.Post(vw => // { // var actions = example.Example.Learn(VowpalWabbitPredictionType.Multilabel, vw); // PerformanceCounters.Instance.ExamplesLearnedTotal.Increment(); // PerformanceCounters.Instance.ExamplesLearnedSec.Increment(); // PerformanceCounters.Instance.FeaturesLearnedSec.IncrementBy((long)example.Example.NumberOfFeatures); // example.Example.Dispose(); // }); //} // record event id for reproducibility this.trackbackList.Add(example.EventId); this.perfCounters.Stage2_Learn_Total.Increment(); this.perfCounters.Stage2_Learn_ExamplesPerSec.Increment(); this.perfCounters.Stage2_Learn_FeaturesPerSec.IncrementBy((long)example.Example.NumberOfFeatures); // measure latency const int TimeSpanTicksPerMillisecond = 10000; var latency = DateTime.UtcNow - example.Timestamp; var performanceCounterTicks = latency.Ticks * Stopwatch.Frequency / TimeSpanTicksPerMillisecond; this.perfCounters.AverageExampleLatency.IncrementBy(performanceCounterTicks); this.perfCounters.AverageExampleLatencyBase.Increment(); // update partition state if (example.PartitionKey != null && example.PartitionKey != null) { this.state.Partitions[example.PartitionKey] = example.Offset; // this.state.PartitionsDateTime[eventHubExample.PartitionKey] = eventHubExample.Offset; } return new TrainerResult(example.Actions, example.Probabilities) { Label = label, ProgressivePrediction = progressivePrediction, PartitionKey = example.PartitionKey, Latency = latency, ProbabilityOfDrop = example.ProbabilityOfDrop }; } catch (Exception ex) { this.telemetry.TrackException(ex); this.perfCounters.Stage2_Faulty_ExamplesPerSec.Increment(); this.perfCounters.Stage2_Faulty_Examples_Total.Increment(); return null; } finally { if (example.Example != null) example.Example.Dispose(); } }
internal async Task Stage0_Split(PartitionContext context, IEnumerable<EventData> messages) { foreach (EventData eventData in messages) { try { using (var eventStream = eventData.GetBodyStream()) { using (var sr = new StreamReader(eventStream, Encoding.UTF8)) { string line; while ((line = await sr.ReadLineAsync()) != null) { var data = new PipelineData { JSON = line, PartitionKey = context.Lease.PartitionId, Offset = eventData.Offset }; // TODO: ArrayBuffer to avoid string allocation... // also just send char ref + offset + length if (!await this.deserializeBlock.SendAsync(data)) this.telemetry.TrackTrace("Failed to enqueue data"); } this.performanceCounters.Stage0_IncomingBytesPerSec.IncrementBy(eventStream.Position); this.performanceCounters.Stage0_Batches_Size.IncrementBy(eventStream.Position); this.performanceCounters.Stage0_Batches_SizeBase.Increment(); } } this.performanceCounters.Stage0_BatchesPerSec.Increment(); this.performanceCounters.Stage0_Batches_Total.Increment(); } catch (Exception ex) { this.telemetry.TrackException(ex); } } }