private IEnumerable<PipelineData> Stage1_Deserialize(PipelineData data)
        {
            try
            {
                using (var jsonReader = new JsonTextReader(new StringReader(data.JSON)))
                {
                    //jsonReader.FloatParser = Util.ReadDoubleString;
                    // jsonReader.ArrayPool = pool;

                    VowpalWabbitJsonSerializer vwJsonSerializer = null;
                    try
                    {
                        vwJsonSerializer = new VowpalWabbitJsonSerializer(this.trainer.VowpalWabbit, this.trainer.ReferenceResolver);

                        vwJsonSerializer.RegisterExtension((state, property) =>
                        {
                            if (property.Equals("_eventid", StringComparison.OrdinalIgnoreCase))
                            {
                                if (!state.Reader.Read() && state.Reader.TokenType != JsonToken.String)
                                    throw new VowpalWabbitJsonException(state.Reader, "Expected string");
                                data.EventId = (string)state.Reader.Value;

                                return true;
                            }
                            else if (property.Equals("_timestamp", StringComparison.OrdinalIgnoreCase))
                            {
                                if (!state.Reader.Read() && state.Reader.TokenType != JsonToken.Date)
                                    throw new VowpalWabbitJsonException(state.Reader, "Expected date");
                                data.Timestamp = (DateTime)state.Reader.Value;
                            }

                            return false;
                        });

                        data.Example = vwJsonSerializer.ParseAndCreate(jsonReader);

                        if (data.Example == null)
                        {
                            // unable to create example due to missing data
                            // will be trigger later
                            vwJsonSerializer.UserContext = data.Example;
                            // make sure the serialize is not deallocated
                            vwJsonSerializer = null;
                        }
                    }
                    finally
                    {
                        if (vwJsonSerializer != null)
                            vwJsonSerializer.Dispose();
                    }

                    performanceCounters.Stage1_JSON_DeserializePerSec.Increment();

                    // delayed
                    if (data.Example == null)
                    {
                        this.performanceCounters.Feature_Requests_Pending.Increment();
                        yield break;
                    }
                }
            }
            catch (Exception ex)
            {
                this.telemetry.TrackException(ex, new Dictionary<string, string> { { "JSON", data.JSON } });

                this.performanceCounters.Stage2_Faulty_Examples_Total.Increment();
                this.performanceCounters.Stage2_Faulty_ExamplesPerSec.Increment();

                yield break;
            }

            yield return data;
        }
Exemplo n.º 2
0
        public TrainerResult Learn(PipelineData example)
        {
            try
            {
                if (this.settings.EnableExampleTracing)
                    this.telemetry.TrackTrace(
                        "Example",
                        SeverityLevel.Verbose,
                        new Dictionary<string, string>
                        {
                            { "ID", example.EventId },
                            { "VW", example.Example.VowpalWabbitString },
                            { "JSON", example.JSON }
                        });

                var label = example.Example.Labels
                    .OfType<ContextualBanditLabel>()
                    .FirstOrDefault(l => l.Probability != 0f || l.Cost != 0);

                if (label == null)
                    this.telemetry.TrackTrace($"Unable to find valid label for event '{example.EventId}'", SeverityLevel.Warning);

                var progressivePrediction = example.Example.Learn(VowpalWabbitPredictionType.ActionScore, this.vw);

                //if (this.vwAllReduce != null)
                //{
                //    this.vwAllReduce.Post(vw =>
                //    {
                //        var actions = example.Example.Learn(VowpalWabbitPredictionType.Multilabel, vw);

                //        PerformanceCounters.Instance.ExamplesLearnedTotal.Increment();
                //        PerformanceCounters.Instance.ExamplesLearnedSec.Increment();
                //        PerformanceCounters.Instance.FeaturesLearnedSec.IncrementBy((long)example.Example.NumberOfFeatures);

                //        example.Example.Dispose();
                //    });
                //}

                // record event id for reproducibility
                this.trackbackList.Add(example.EventId);

                this.perfCounters.Stage2_Learn_Total.Increment();
                this.perfCounters.Stage2_Learn_ExamplesPerSec.Increment();
                this.perfCounters.Stage2_Learn_FeaturesPerSec.IncrementBy((long)example.Example.NumberOfFeatures);

                // measure latency
                const int TimeSpanTicksPerMillisecond = 10000;

                var latency = DateTime.UtcNow - example.Timestamp;
                var performanceCounterTicks =
                    latency.Ticks * Stopwatch.Frequency / TimeSpanTicksPerMillisecond;
                this.perfCounters.AverageExampleLatency.IncrementBy(performanceCounterTicks);
                this.perfCounters.AverageExampleLatencyBase.Increment();

                // update partition state
                if (example.PartitionKey != null && example.PartitionKey != null)
                {
                    this.state.Partitions[example.PartitionKey] = example.Offset;
                    // this.state.PartitionsDateTime[eventHubExample.PartitionKey] = eventHubExample.Offset;
                }

                return new TrainerResult(example.Actions, example.Probabilities)
                {
                    Label = label,
                    ProgressivePrediction = progressivePrediction,
                    PartitionKey = example.PartitionKey,
                    Latency = latency,
                    ProbabilityOfDrop = example.ProbabilityOfDrop
                };
            }
            catch (Exception ex)
            {
                this.telemetry.TrackException(ex);
                this.perfCounters.Stage2_Faulty_ExamplesPerSec.Increment();
                this.perfCounters.Stage2_Faulty_Examples_Total.Increment();
                return null;
            }
            finally
            {
                if (example.Example != null)
                    example.Example.Dispose();
            }
        }
        internal async Task Stage0_Split(PartitionContext context, IEnumerable<EventData> messages)
        {
            foreach (EventData eventData in messages)
            {
                try
                {
                    using (var eventStream = eventData.GetBodyStream())
                    {
                        using (var sr = new StreamReader(eventStream, Encoding.UTF8))
                        {
                            string line;
                            while ((line = await sr.ReadLineAsync()) != null)
                            {
                                var data = new PipelineData
                                {
                                    JSON = line,
                                    PartitionKey = context.Lease.PartitionId,
                                    Offset = eventData.Offset
                                };

                                // TODO: ArrayBuffer to avoid string allocation...
                                // also just send char ref + offset + length
                                if (!await this.deserializeBlock.SendAsync(data))
                                    this.telemetry.TrackTrace("Failed to enqueue data");
                            }

                            this.performanceCounters.Stage0_IncomingBytesPerSec.IncrementBy(eventStream.Position);
                            this.performanceCounters.Stage0_Batches_Size.IncrementBy(eventStream.Position);
                            this.performanceCounters.Stage0_Batches_SizeBase.Increment();
                        }
                    }

                    this.performanceCounters.Stage0_BatchesPerSec.Increment();
                    this.performanceCounters.Stage0_Batches_Total.Increment();
                }
                catch (Exception ex)
                {
                    this.telemetry.TrackException(ex);
                }
            }
        }