Exemple #1
0
        internal EvalOperation(OnlineTrainerSettingsInternal settings)
        {
            this.telemetry = new TelemetryClient();

            // evaluation pipeline
            this.evalEventHubClient = EventHubClient.CreateFromConnectionString(settings.EvalEventHubConnectionString);

            this.evalBlock = new TransformManyBlock <object, EvalData>(
                (Func <object, IEnumerable <EvalData> >) this.OfflineEvaluate,
                new ExecutionDataflowBlockOptions
            {
                MaxDegreeOfParallelism = 4,
                BoundedCapacity        = 1024
            });

            this.evalBlock.Completion.ContinueWith(t =>
            {
                this.telemetry.TrackTrace($"Stage 3 - Evaluation pipeline completed: {t.Status}");
                if (t.IsFaulted)
                {
                    this.telemetry.TrackException(t.Exception);
                }
            });

            // batch output together to match EventHub throughput by maintaining maximum latency of 5 seconds
            this.evalBlockDisposable = this.evalBlock.AsObservable()
                                       .GroupBy(k => k.PolicyName)
                                       .Select(g =>
                                               g.Window(TimeSpan.FromSeconds(5))
                                               .Select(w => w.Buffer(245 * 1024, e => Encoding.UTF8.GetByteCount(e.JSON)))
                                               .SelectMany(w => w)
                                               .Subscribe(this.UploadEvaluation))
                                       .Publish()
                                       .Connect();
        }
        internal EvalOperation(OnlineTrainerSettingsInternal settings, PerformanceCounters performanceCounters)
        {
            this.performanceCounters = performanceCounters;

            this.telemetry = new TelemetryClient();

            // evaluation pipeline
            this.evalEventHubClient = EventHubClient.CreateFromConnectionString(settings.EvalEventHubConnectionString);

            this.evalBlock = new TransformManyBlock <object, EvalData>(
                (Func <object, IEnumerable <EvalData> >) this.OfflineEvaluate,
                new ExecutionDataflowBlockOptions
            {
                MaxDegreeOfParallelism = 4,
                BoundedCapacity        = 1024
            });

            this.evalBlock.Completion.Trace(this.telemetry, "Stage 4 - Evaluation pipeline");

            // batch output together to match EventHub throughput by maintaining maximum latency of 1 seconds
            this.evalBlockDisposable = this.evalBlock.AsObservable()
                                       .GroupBy(k => k.PartitionKey)
                                       .Select(g =>
                                               g.Window(TimeSpan.FromSeconds(1))
                                               .Select(w => w.Buffer(245 * 1024, e => Encoding.UTF8.GetByteCount(e.JSON)))
                                               .SelectMany(w => w)
                                               .Subscribe(this.UploadEvaluation))
                                       .Publish()
                                       .Connect();
        }
        internal EvalOperation(OnlineTrainerSettingsInternal settings)
        {
            this.telemetry = new TelemetryClient();

            // evaluation pipeline
            this.evalEventHubClient = EventHubClient.CreateFromConnectionString(settings.EvalEventHubConnectionString);

            this.evalBlock = new TransformManyBlock<object, EvalData>(
                (Func<object, IEnumerable<EvalData>>)this.OfflineEvaluate,
                new ExecutionDataflowBlockOptions
                {
                    MaxDegreeOfParallelism = 4,
                    BoundedCapacity = 1024
                });

            this.evalBlock.Completion.ContinueWith(t =>
            {
                this.telemetry.TrackTrace($"Stage 3 - Evaluation pipeline completed: {t.Status}");
                if (t.IsFaulted)
                    this.telemetry.TrackException(t.Exception);
            });

            // batch output together to match EventHub throughput by maintaining maximum latency of 5 seconds
            this.evalBlockDisposable = this.evalBlock.AsObservable()
                .GroupBy(k => k.PolicyName)
                   .Select(g =>
                        g.Window(TimeSpan.FromSeconds(5))
                         .Select(w => w.Buffer(245 * 1024, e => Encoding.UTF8.GetByteCount(e.JSON)))
                         .SelectMany(w => w)
                         .Subscribe(this.UploadEvaluation))
                   .Publish()
                   .Connect();
        }
        public void RestartTrainProcessorHost()
        {
            if (this.metaData == null)
            {
                return;
            }

            var settings = new OnlineTrainerSettingsInternal
            {
                StorageConnectionString        = CloudConfigurationManager.GetSetting("StorageConnectionString"),
                JoinedEventHubConnectionString = CloudConfigurationManager.GetSetting("JoinedEventHubConnectionString"),
                EvalEventHubConnectionString   = CloudConfigurationManager.GetSetting("EvalEventHubConnectionString"),
                Metadata         = this.metaData,
                CheckpointPolicy = ParseCheckpointPolicy(),
                // make sure we ignore previous events
                EventHubStartDateTimeUtc = DateTime.UtcNow
            };

            bool enableExampleTracing;

            if (bool.TryParse(CloudConfigurationManager.GetSetting("EnableExampleTracing"), out enableExampleTracing))
            {
                settings.EnableExampleTracing = enableExampleTracing;
            }

            ServiceBusConnectionStringBuilder serviceBusConnectionStringBuilder;

            try
            {
                serviceBusConnectionStringBuilder = new ServiceBusConnectionStringBuilder(settings.JoinedEventHubConnectionString);
            }
            catch (Exception e)
            {
                throw new InvalidDataException($"Invalid JoinedEventHubConnectionString '{settings.JoinedEventHubConnectionString}' found in settings: {e.Message}");
            }

            try
            {
                serviceBusConnectionStringBuilder = new ServiceBusConnectionStringBuilder(settings.EvalEventHubConnectionString);
            }
            catch (Exception e)
            {
                throw new InvalidDataException($"Invalid EventHubEvalConnectionString '{settings.EvalEventHubConnectionString}' found in settings: {e.Message}");
            }

            if (string.IsNullOrEmpty(serviceBusConnectionStringBuilder.EntityPath))
            {
                throw new InvalidDataException($"Invalid EventHubEvalConnectionString '{settings.EvalEventHubConnectionString}' found in settings: EntityPath missing");
            }

            CloudStorageAccount cloudStorageAccount;

            if (!CloudStorageAccount.TryParse(settings.StorageConnectionString, out cloudStorageAccount))
            {
                throw new InvalidDataException($"Invalid StorageConnectionString '{settings.StorageConnectionString}' found in settings");
            }

            // fire and forget
            var task = this.trainProcessorHost.Restart(settings);
        }
Exemple #5
0
        //private VowpalWabbitThreadedLearning vwAllReduce;

        internal Learner(OnlineTrainerSettingsInternal settings, Action<VowpalWabbitJsonSerializer> delayedExampleCallback, PerformanceCounters perfCounters)
        {
            this.telemetry = new TelemetryClient();

            this.settings = settings;
            this.delayedExampleCallback = delayedExampleCallback;
            this.perfCounters = perfCounters;

            this.trackbackList = new List<string>();
            this.blobClient = CloudStorageAccount.Parse(settings.StorageConnectionString).CreateCloudBlobClient();
        }
 public async Task Restart(OnlineTrainerSettingsInternal settings)
 {
     await this.SafeExecute(async () => await this.RestartInternalAsync(settings));
 }
        private async Task StartInternalAsync(OnlineTrainerSettingsInternal settings, OnlineTrainerState state = null, byte[] model = null)
        {
            this.LastStartDateTimeUtc = DateTime.UtcNow;
            this.perfCounters = new PerformanceCounters(settings.Metadata.ApplicationID);

            // setup trainer
            this.trainer = new Learner(settings, this.DelayedExampleCallback, this.perfCounters);

            if (settings.ForceFreshStart || model != null)
                this.trainer.FreshStart(state, model);
            else
                await this.trainer.FindAndResumeFromState();

            // setup factory
            this.trainProcessorFactory = new TrainEventProcessorFactory(settings, this.trainer, this.perfCounters);

            // setup host
            var serviceBusConnectionStringBuilder = new ServiceBusConnectionStringBuilder(settings.JoinedEventHubConnectionString);
            var joinedEventhubName = serviceBusConnectionStringBuilder.EntityPath;
            serviceBusConnectionStringBuilder.EntityPath = string.Empty;

            this.eventProcessorHost = new EventProcessorHost(settings.Metadata.ApplicationID, joinedEventhubName,
                EventHubConsumerGroup.DefaultGroupName, serviceBusConnectionStringBuilder.ToString(), settings.StorageConnectionString);

            await this.eventProcessorHost.RegisterEventProcessorFactoryAsync(
                this.trainProcessorFactory,
                new EventProcessorOptions { InitialOffsetProvider = this.InitialOffsetProvider });

            // don't perform too often
            this.perfUpdater = new SafeTimer(
                TimeSpan.FromMilliseconds(500),
                this.UpdatePerformanceCounters);

            this.telemetry.TrackTrace(
                "OnlineTrainer started",
                SeverityLevel.Information,
                new Dictionary<string, string>
                {
                { "CheckpointPolicy", settings.CheckpointPolicy.ToString() },
                { "VowpalWabbit", settings.Metadata.TrainArguments },
                { "ExampleTracing", settings.EnableExampleTracing.ToString() }
                });
        }
        private async Task RestartInternalAsync(OnlineTrainerSettingsInternal settings)
        {
            this.telemetry.TrackTrace("Online Trainer restarting", SeverityLevel.Information);

            await this.StopInternalAsync();

            // make sure we ignore previous events
            this.eventHubStartDateTimeUtc = DateTime.UtcNow;

            await this.StartInternalAsync(settings);
        }
Exemple #9
0
        internal TrainEventProcessorFactory(OnlineTrainerSettingsInternal settings, Learner trainer, PerformanceCounters performanceCounters)
        {
            if (settings == null)
            {
                throw new ArgumentNullException(nameof(settings));
            }

            if (trainer == null)
            {
                throw new ArgumentNullException(nameof(trainer));
            }

            if (performanceCounters == null)
            {
                throw new ArgumentNullException(nameof(performanceCounters));
            }

            this.trainer             = trainer;
            this.performanceCounters = performanceCounters;

            this.telemetry = new TelemetryClient();
            this.telemetry.Context.Component.Version = GetType().Assembly.GetName().Version.ToString();

            this.evalOperation    = new EvalOperation(settings, performanceCounters);
            this.latencyOperation = new LatencyOperation();

            this.deserializeBlock = new TransformManyBlock <PipelineData, PipelineData>(
                (Func <PipelineData, IEnumerable <PipelineData> >) this.Stage1_Deserialize,
                new ExecutionDataflowBlockOptions
            {
                MaxDegreeOfParallelism = 4,     // Math.Max(2, Environment.ProcessorCount - 1),
                BoundedCapacity        = 1024
            });
            this.deserializeBlock.Completion.Trace(this.telemetry, "Stage 1 - Deserialization");

            this.learnBlock = new TransformManyBlock <object, object>(
                (Func <object, IEnumerable <object> >) this.Stage2_ProcessEvent,
                new ExecutionDataflowBlockOptions
            {
                MaxDegreeOfParallelism = 1,
                BoundedCapacity        = 1024
            });
            this.learnBlock.Completion.Trace(this.telemetry, "Stage 2 - Learning");

            // trigger checkpoint checking every second
            this.checkpointTrigger = Observable.Interval(TimeSpan.FromSeconds(1))
                                     .Select(_ => new CheckpointEvaluateTriggerEvent())
                                     .Subscribe(this.learnBlock.AsObserver());

            this.checkpointBlock = new ActionBlock <object>(
                this.trainer.Checkpoint,
                new ExecutionDataflowBlockOptions
            {
                MaxDegreeOfParallelism = 1,
                BoundedCapacity        = 4
            });
            this.learnBlock.Completion.Trace(this.telemetry, "Stage 3 - CheckPointing");

            // setup pipeline
            this.deserializeBlock.LinkTo(
                this.learnBlock,
                new DataflowLinkOptions {
                PropagateCompletion = true
            });

            this.learnBlock.LinkTo(
                this.evalOperation.TargetBlock,
                new DataflowLinkOptions {
                PropagateCompletion = true
            },
                obj => obj is TrainerResult);

            this.learnBlock.LinkTo(
                this.checkpointBlock,
                new DataflowLinkOptions {
                PropagateCompletion = true
            },
                obj => obj is CheckpointData);

            // consume all unmatched
            this.learnBlock.LinkTo(DataflowBlock.NullTarget <object>());
        }
        public void RestartTrainProcessorHost()
        {
            if (this.metaData == null)
                return;

            var settings = new OnlineTrainerSettingsInternal
            {
                StorageConnectionString = CloudConfigurationManager.GetSetting("StorageConnectionString"),
                JoinedEventHubConnectionString = CloudConfigurationManager.GetSetting("JoinedEventHubConnectionString"),
                EvalEventHubConnectionString = CloudConfigurationManager.GetSetting("EvalEventHubConnectionString"),
                Metadata = this.metaData,
                CheckpointPolicy = ParseCheckpointPolicy()
            };

            bool enableExampleTracing;
            if (bool.TryParse(CloudConfigurationManager.GetSetting("EnableExampleTracing"), out enableExampleTracing))
                settings.EnableExampleTracing = enableExampleTracing;

            ServiceBusConnectionStringBuilder serviceBusConnectionStringBuilder;
            try
            {
                serviceBusConnectionStringBuilder = new ServiceBusConnectionStringBuilder(settings.JoinedEventHubConnectionString);
            }
            catch (Exception e)
            {
                throw new InvalidDataException($"Invalid JoinedEventHubConnectionString '{settings.JoinedEventHubConnectionString}' found in settings: {e.Message}");
            }

            try
            {
                serviceBusConnectionStringBuilder = new ServiceBusConnectionStringBuilder(settings.EvalEventHubConnectionString);
            }
            catch (Exception e)
            {
                throw new InvalidDataException($"Invalid EventHubEvalConnectionString '{settings.EvalEventHubConnectionString}' found in settings: {e.Message}");
            }

            if (string.IsNullOrEmpty(serviceBusConnectionStringBuilder.EntityPath))
                throw new InvalidDataException($"Invalid EventHubEvalConnectionString '{settings.EvalEventHubConnectionString}' found in settings: EntityPath missing");

            CloudStorageAccount cloudStorageAccount;
            if (!CloudStorageAccount.TryParse(settings.StorageConnectionString, out cloudStorageAccount))
                throw new InvalidDataException($"Invalid StorageConnectionString '{settings.StorageConnectionString}' found in settings");

            // fire and forget
            var task = this.trainProcessorHost.Restart(settings);
        }
        internal TrainEventProcessorFactory(OnlineTrainerSettingsInternal settings, Learner trainer, PerformanceCounters performanceCounters)
        {
            if (settings == null)
                throw new ArgumentNullException(nameof(settings));

            if (trainer == null)
                throw new ArgumentNullException(nameof(trainer));

            if (performanceCounters == null)
                throw new ArgumentNullException(nameof(performanceCounters));

            this.trainer = trainer;
            this.performanceCounters = performanceCounters;

            this.telemetry = new TelemetryClient();
            this.telemetry.Context.Component.Version = GetType().Assembly.GetName().Version.ToString();

            this.evalOperation = new EvalOperation(settings);
            this.latencyOperation = new LatencyOperation();

            this.deserializeBlock = new TransformManyBlock<PipelineData, PipelineData>(
                (Func<PipelineData, IEnumerable<PipelineData>>)this.Stage1_Deserialize,
                new ExecutionDataflowBlockOptions
                {
                    MaxDegreeOfParallelism = 4, // Math.Max(2, Environment.ProcessorCount - 1),
                    BoundedCapacity = 1024
                });
            this.deserializeBlock.Completion.Trace(this.telemetry, "Stage 1 - Deserialization");

            this.learnBlock = new TransformManyBlock<object, object>(
                (Func<object, IEnumerable<object>>)this.Stage2_ProcessEvent,
                new ExecutionDataflowBlockOptions
                {
                    MaxDegreeOfParallelism = 1,
                    BoundedCapacity = 1024
                });
            this.learnBlock.Completion.Trace(this.telemetry, "Stage 2 - Learning");

            // trigger checkpoint checking every second
            this.checkpointTrigger = Observable.Interval(TimeSpan.FromSeconds(1))
                .Select(_ => new CheckpointEvaluateTriggerEvent())
                .Subscribe(this.learnBlock.AsObserver());

            this.checkpointBlock = new ActionBlock<object>(
                this.trainer.Checkpoint,
                new ExecutionDataflowBlockOptions
                {
                    MaxDegreeOfParallelism = 1,
                    BoundedCapacity = 4
                });
            this.learnBlock.Completion.Trace(this.telemetry, "Stage 3 - CheckPointing");

            // setup pipeline
            this.deserializeBlock.LinkTo(
                this.learnBlock,
                new DataflowLinkOptions { PropagateCompletion = true });

            this.learnBlock.LinkTo(
                this.evalOperation.TargetBlock,
                new DataflowLinkOptions { PropagateCompletion = true },
                obj => obj is TrainerResult);

            this.learnBlock.LinkTo(
                this.checkpointBlock,
                new DataflowLinkOptions { PropagateCompletion = true },
                obj => obj is CheckpointData);

            // consume all unmatched
            this.learnBlock.LinkTo(DataflowBlock.NullTarget<object>());
        }