internal EvalOperation(OnlineTrainerSettingsInternal settings) { this.telemetry = new TelemetryClient(); // evaluation pipeline this.evalEventHubClient = EventHubClient.CreateFromConnectionString(settings.EvalEventHubConnectionString); this.evalBlock = new TransformManyBlock <object, EvalData>( (Func <object, IEnumerable <EvalData> >) this.OfflineEvaluate, new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 4, BoundedCapacity = 1024 }); this.evalBlock.Completion.ContinueWith(t => { this.telemetry.TrackTrace($"Stage 3 - Evaluation pipeline completed: {t.Status}"); if (t.IsFaulted) { this.telemetry.TrackException(t.Exception); } }); // batch output together to match EventHub throughput by maintaining maximum latency of 5 seconds this.evalBlockDisposable = this.evalBlock.AsObservable() .GroupBy(k => k.PolicyName) .Select(g => g.Window(TimeSpan.FromSeconds(5)) .Select(w => w.Buffer(245 * 1024, e => Encoding.UTF8.GetByteCount(e.JSON))) .SelectMany(w => w) .Subscribe(this.UploadEvaluation)) .Publish() .Connect(); }
internal EvalOperation(OnlineTrainerSettingsInternal settings, PerformanceCounters performanceCounters) { this.performanceCounters = performanceCounters; this.telemetry = new TelemetryClient(); // evaluation pipeline this.evalEventHubClient = EventHubClient.CreateFromConnectionString(settings.EvalEventHubConnectionString); this.evalBlock = new TransformManyBlock <object, EvalData>( (Func <object, IEnumerable <EvalData> >) this.OfflineEvaluate, new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 4, BoundedCapacity = 1024 }); this.evalBlock.Completion.Trace(this.telemetry, "Stage 4 - Evaluation pipeline"); // batch output together to match EventHub throughput by maintaining maximum latency of 1 seconds this.evalBlockDisposable = this.evalBlock.AsObservable() .GroupBy(k => k.PartitionKey) .Select(g => g.Window(TimeSpan.FromSeconds(1)) .Select(w => w.Buffer(245 * 1024, e => Encoding.UTF8.GetByteCount(e.JSON))) .SelectMany(w => w) .Subscribe(this.UploadEvaluation)) .Publish() .Connect(); }
internal EvalOperation(OnlineTrainerSettingsInternal settings) { this.telemetry = new TelemetryClient(); // evaluation pipeline this.evalEventHubClient = EventHubClient.CreateFromConnectionString(settings.EvalEventHubConnectionString); this.evalBlock = new TransformManyBlock<object, EvalData>( (Func<object, IEnumerable<EvalData>>)this.OfflineEvaluate, new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 4, BoundedCapacity = 1024 }); this.evalBlock.Completion.ContinueWith(t => { this.telemetry.TrackTrace($"Stage 3 - Evaluation pipeline completed: {t.Status}"); if (t.IsFaulted) this.telemetry.TrackException(t.Exception); }); // batch output together to match EventHub throughput by maintaining maximum latency of 5 seconds this.evalBlockDisposable = this.evalBlock.AsObservable() .GroupBy(k => k.PolicyName) .Select(g => g.Window(TimeSpan.FromSeconds(5)) .Select(w => w.Buffer(245 * 1024, e => Encoding.UTF8.GetByteCount(e.JSON))) .SelectMany(w => w) .Subscribe(this.UploadEvaluation)) .Publish() .Connect(); }
public void RestartTrainProcessorHost() { if (this.metaData == null) { return; } var settings = new OnlineTrainerSettingsInternal { StorageConnectionString = CloudConfigurationManager.GetSetting("StorageConnectionString"), JoinedEventHubConnectionString = CloudConfigurationManager.GetSetting("JoinedEventHubConnectionString"), EvalEventHubConnectionString = CloudConfigurationManager.GetSetting("EvalEventHubConnectionString"), Metadata = this.metaData, CheckpointPolicy = ParseCheckpointPolicy(), // make sure we ignore previous events EventHubStartDateTimeUtc = DateTime.UtcNow }; bool enableExampleTracing; if (bool.TryParse(CloudConfigurationManager.GetSetting("EnableExampleTracing"), out enableExampleTracing)) { settings.EnableExampleTracing = enableExampleTracing; } ServiceBusConnectionStringBuilder serviceBusConnectionStringBuilder; try { serviceBusConnectionStringBuilder = new ServiceBusConnectionStringBuilder(settings.JoinedEventHubConnectionString); } catch (Exception e) { throw new InvalidDataException($"Invalid JoinedEventHubConnectionString '{settings.JoinedEventHubConnectionString}' found in settings: {e.Message}"); } try { serviceBusConnectionStringBuilder = new ServiceBusConnectionStringBuilder(settings.EvalEventHubConnectionString); } catch (Exception e) { throw new InvalidDataException($"Invalid EventHubEvalConnectionString '{settings.EvalEventHubConnectionString}' found in settings: {e.Message}"); } if (string.IsNullOrEmpty(serviceBusConnectionStringBuilder.EntityPath)) { throw new InvalidDataException($"Invalid EventHubEvalConnectionString '{settings.EvalEventHubConnectionString}' found in settings: EntityPath missing"); } CloudStorageAccount cloudStorageAccount; if (!CloudStorageAccount.TryParse(settings.StorageConnectionString, out cloudStorageAccount)) { throw new InvalidDataException($"Invalid StorageConnectionString '{settings.StorageConnectionString}' found in settings"); } // fire and forget var task = this.trainProcessorHost.Restart(settings); }
//private VowpalWabbitThreadedLearning vwAllReduce; internal Learner(OnlineTrainerSettingsInternal settings, Action<VowpalWabbitJsonSerializer> delayedExampleCallback, PerformanceCounters perfCounters) { this.telemetry = new TelemetryClient(); this.settings = settings; this.delayedExampleCallback = delayedExampleCallback; this.perfCounters = perfCounters; this.trackbackList = new List<string>(); this.blobClient = CloudStorageAccount.Parse(settings.StorageConnectionString).CreateCloudBlobClient(); }
public async Task Restart(OnlineTrainerSettingsInternal settings) { await this.SafeExecute(async () => await this.RestartInternalAsync(settings)); }
private async Task StartInternalAsync(OnlineTrainerSettingsInternal settings, OnlineTrainerState state = null, byte[] model = null) { this.LastStartDateTimeUtc = DateTime.UtcNow; this.perfCounters = new PerformanceCounters(settings.Metadata.ApplicationID); // setup trainer this.trainer = new Learner(settings, this.DelayedExampleCallback, this.perfCounters); if (settings.ForceFreshStart || model != null) this.trainer.FreshStart(state, model); else await this.trainer.FindAndResumeFromState(); // setup factory this.trainProcessorFactory = new TrainEventProcessorFactory(settings, this.trainer, this.perfCounters); // setup host var serviceBusConnectionStringBuilder = new ServiceBusConnectionStringBuilder(settings.JoinedEventHubConnectionString); var joinedEventhubName = serviceBusConnectionStringBuilder.EntityPath; serviceBusConnectionStringBuilder.EntityPath = string.Empty; this.eventProcessorHost = new EventProcessorHost(settings.Metadata.ApplicationID, joinedEventhubName, EventHubConsumerGroup.DefaultGroupName, serviceBusConnectionStringBuilder.ToString(), settings.StorageConnectionString); await this.eventProcessorHost.RegisterEventProcessorFactoryAsync( this.trainProcessorFactory, new EventProcessorOptions { InitialOffsetProvider = this.InitialOffsetProvider }); // don't perform too often this.perfUpdater = new SafeTimer( TimeSpan.FromMilliseconds(500), this.UpdatePerformanceCounters); this.telemetry.TrackTrace( "OnlineTrainer started", SeverityLevel.Information, new Dictionary<string, string> { { "CheckpointPolicy", settings.CheckpointPolicy.ToString() }, { "VowpalWabbit", settings.Metadata.TrainArguments }, { "ExampleTracing", settings.EnableExampleTracing.ToString() } }); }
private async Task RestartInternalAsync(OnlineTrainerSettingsInternal settings) { this.telemetry.TrackTrace("Online Trainer restarting", SeverityLevel.Information); await this.StopInternalAsync(); // make sure we ignore previous events this.eventHubStartDateTimeUtc = DateTime.UtcNow; await this.StartInternalAsync(settings); }
internal TrainEventProcessorFactory(OnlineTrainerSettingsInternal settings, Learner trainer, PerformanceCounters performanceCounters) { if (settings == null) { throw new ArgumentNullException(nameof(settings)); } if (trainer == null) { throw new ArgumentNullException(nameof(trainer)); } if (performanceCounters == null) { throw new ArgumentNullException(nameof(performanceCounters)); } this.trainer = trainer; this.performanceCounters = performanceCounters; this.telemetry = new TelemetryClient(); this.telemetry.Context.Component.Version = GetType().Assembly.GetName().Version.ToString(); this.evalOperation = new EvalOperation(settings, performanceCounters); this.latencyOperation = new LatencyOperation(); this.deserializeBlock = new TransformManyBlock <PipelineData, PipelineData>( (Func <PipelineData, IEnumerable <PipelineData> >) this.Stage1_Deserialize, new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 4, // Math.Max(2, Environment.ProcessorCount - 1), BoundedCapacity = 1024 }); this.deserializeBlock.Completion.Trace(this.telemetry, "Stage 1 - Deserialization"); this.learnBlock = new TransformManyBlock <object, object>( (Func <object, IEnumerable <object> >) this.Stage2_ProcessEvent, new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 1, BoundedCapacity = 1024 }); this.learnBlock.Completion.Trace(this.telemetry, "Stage 2 - Learning"); // trigger checkpoint checking every second this.checkpointTrigger = Observable.Interval(TimeSpan.FromSeconds(1)) .Select(_ => new CheckpointEvaluateTriggerEvent()) .Subscribe(this.learnBlock.AsObserver()); this.checkpointBlock = new ActionBlock <object>( this.trainer.Checkpoint, new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 1, BoundedCapacity = 4 }); this.learnBlock.Completion.Trace(this.telemetry, "Stage 3 - CheckPointing"); // setup pipeline this.deserializeBlock.LinkTo( this.learnBlock, new DataflowLinkOptions { PropagateCompletion = true }); this.learnBlock.LinkTo( this.evalOperation.TargetBlock, new DataflowLinkOptions { PropagateCompletion = true }, obj => obj is TrainerResult); this.learnBlock.LinkTo( this.checkpointBlock, new DataflowLinkOptions { PropagateCompletion = true }, obj => obj is CheckpointData); // consume all unmatched this.learnBlock.LinkTo(DataflowBlock.NullTarget <object>()); }
public void RestartTrainProcessorHost() { if (this.metaData == null) return; var settings = new OnlineTrainerSettingsInternal { StorageConnectionString = CloudConfigurationManager.GetSetting("StorageConnectionString"), JoinedEventHubConnectionString = CloudConfigurationManager.GetSetting("JoinedEventHubConnectionString"), EvalEventHubConnectionString = CloudConfigurationManager.GetSetting("EvalEventHubConnectionString"), Metadata = this.metaData, CheckpointPolicy = ParseCheckpointPolicy() }; bool enableExampleTracing; if (bool.TryParse(CloudConfigurationManager.GetSetting("EnableExampleTracing"), out enableExampleTracing)) settings.EnableExampleTracing = enableExampleTracing; ServiceBusConnectionStringBuilder serviceBusConnectionStringBuilder; try { serviceBusConnectionStringBuilder = new ServiceBusConnectionStringBuilder(settings.JoinedEventHubConnectionString); } catch (Exception e) { throw new InvalidDataException($"Invalid JoinedEventHubConnectionString '{settings.JoinedEventHubConnectionString}' found in settings: {e.Message}"); } try { serviceBusConnectionStringBuilder = new ServiceBusConnectionStringBuilder(settings.EvalEventHubConnectionString); } catch (Exception e) { throw new InvalidDataException($"Invalid EventHubEvalConnectionString '{settings.EvalEventHubConnectionString}' found in settings: {e.Message}"); } if (string.IsNullOrEmpty(serviceBusConnectionStringBuilder.EntityPath)) throw new InvalidDataException($"Invalid EventHubEvalConnectionString '{settings.EvalEventHubConnectionString}' found in settings: EntityPath missing"); CloudStorageAccount cloudStorageAccount; if (!CloudStorageAccount.TryParse(settings.StorageConnectionString, out cloudStorageAccount)) throw new InvalidDataException($"Invalid StorageConnectionString '{settings.StorageConnectionString}' found in settings"); // fire and forget var task = this.trainProcessorHost.Restart(settings); }
internal TrainEventProcessorFactory(OnlineTrainerSettingsInternal settings, Learner trainer, PerformanceCounters performanceCounters) { if (settings == null) throw new ArgumentNullException(nameof(settings)); if (trainer == null) throw new ArgumentNullException(nameof(trainer)); if (performanceCounters == null) throw new ArgumentNullException(nameof(performanceCounters)); this.trainer = trainer; this.performanceCounters = performanceCounters; this.telemetry = new TelemetryClient(); this.telemetry.Context.Component.Version = GetType().Assembly.GetName().Version.ToString(); this.evalOperation = new EvalOperation(settings); this.latencyOperation = new LatencyOperation(); this.deserializeBlock = new TransformManyBlock<PipelineData, PipelineData>( (Func<PipelineData, IEnumerable<PipelineData>>)this.Stage1_Deserialize, new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 4, // Math.Max(2, Environment.ProcessorCount - 1), BoundedCapacity = 1024 }); this.deserializeBlock.Completion.Trace(this.telemetry, "Stage 1 - Deserialization"); this.learnBlock = new TransformManyBlock<object, object>( (Func<object, IEnumerable<object>>)this.Stage2_ProcessEvent, new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 1, BoundedCapacity = 1024 }); this.learnBlock.Completion.Trace(this.telemetry, "Stage 2 - Learning"); // trigger checkpoint checking every second this.checkpointTrigger = Observable.Interval(TimeSpan.FromSeconds(1)) .Select(_ => new CheckpointEvaluateTriggerEvent()) .Subscribe(this.learnBlock.AsObserver()); this.checkpointBlock = new ActionBlock<object>( this.trainer.Checkpoint, new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 1, BoundedCapacity = 4 }); this.learnBlock.Completion.Trace(this.telemetry, "Stage 3 - CheckPointing"); // setup pipeline this.deserializeBlock.LinkTo( this.learnBlock, new DataflowLinkOptions { PropagateCompletion = true }); this.learnBlock.LinkTo( this.evalOperation.TargetBlock, new DataflowLinkOptions { PropagateCompletion = true }, obj => obj is TrainerResult); this.learnBlock.LinkTo( this.checkpointBlock, new DataflowLinkOptions { PropagateCompletion = true }, obj => obj is CheckpointData); // consume all unmatched this.learnBlock.LinkTo(DataflowBlock.NullTarget<object>()); }