protected override Task Execute(CancellationToken token) { log.LogInformation("Downloading message..."); var downloadMessages = File.ReadLines(config.Ids).Select(long.Parse).ToArray(); log.LogInformation("Total messages to download: {0}", downloadMessages.Length); var cred = auth.Authenticate(); var extractor = new MessageCleanup(); var monitor = new PerformanceMonitor(downloadMessages.Length); using (var streamWriter = new StreamWriter(config.Out, false, new UTF8Encoding(false))) using (var csvDataTarget = new CsvWriter(streamWriter)) { csvDataTarget.WriteField("Id"); csvDataTarget.WriteField("Date"); csvDataTarget.WriteField("Author"); csvDataTarget.WriteField("Message"); csvDataTarget.NextRecord(); Auth.ExecuteOperationWithCredentials( cred, () => { using (Observable.Interval(TimeSpan.FromSeconds(30)).Subscribe(item => log.LogInformation(monitor.ToString()))) { downloader.Download(downloadMessages) .ToObservable() .Select( item => { try { csvDataTarget.WriteField(item.Id); csvDataTarget.WriteField(item.CreatedAt); csvDataTarget.WriteField(item.CreatedBy.Id); var text = item.Text; if (config.Clean) { text = extractor.Cleanup(text); } csvDataTarget.WriteField(text); csvDataTarget.NextRecord(); monitor.Increment(); } catch (Exception e) { log.LogError(e, "Error"); } return(item); }) .LastOrDefaultAsync() .Wait(); } }); } return(Task.CompletedTask); }
private async Task Process(string[] files) { var monitor = new PerformanceMonitor(files.Length); using (Observable.Interval(TimeSpan.FromSeconds(30)).Subscribe(item => log.LogInformation(monitor.ToString()))) { var inputBlock = new BufferBlock <ProcessingChunk <string> >(new DataflowBlockOptions { BoundedCapacity = 1000000 }); var deserializeBlock = new TransformBlock <ProcessingChunk <string>, ProcessingChunk <TweetDTO> >( json => new ProcessingChunk <TweetDTO>(json.FileName, json.ChunkId, json.TotalChunks, jsonConvert.DeserializeObject <TweetDTO>(json.Data)), new ExecutionDataflowBlockOptions { BoundedCapacity = 2, MaxDegreeOfParallelism = Environment.ProcessorCount }); var outputBlock = new ActionBlock <ProcessingChunk <TweetDTO> >( Deserialized, new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = Environment.ProcessorCount }); inputBlock.LinkTo(deserializeBlock, new DataflowLinkOptions { PropagateCompletion = true }); deserializeBlock.LinkTo(outputBlock, new DataflowLinkOptions { PropagateCompletion = true }); foreach (var file in files) { try { var data = fileLoader.Load(file); for (var i = 0; i < data.Length; i++) { await inputBlock.SendAsync(new ProcessingChunk <string>(file, i, data.Length, data[i])).ConfigureAwait(false); } monitor.Increment(); } catch (Exception ex) { log.LogError(ex, "Failed"); } } inputBlock.Complete(); await Task.WhenAll(inputBlock.Completion, outputBlock.Completion).ConfigureAwait(false); } }
protected override Task Execute(CancellationToken token) { log.LogInformation("Starting twitter monitoring..."); SetupWords(); RateLimit.RateLimitTrackerMode = RateLimitTrackerMode.TrackAndAwait; var cleanup = new MessageCleanup(); var monitor = new PerformanceMonitor(100000); var cred = auth.Authenticate(); using (Observable.Interval(TimeSpan.FromSeconds(30)).Subscribe(item => log.LogInformation(monitor.ToString()))) using (var streamWriter = new StreamWriter(config.Out, true, new UTF8Encoding(false))) using (var csvDataTarget = new CsvWriter(streamWriter)) { Auth.ExecuteOperationWithCredentials( cred, () => { var enrichments = Enrichment().ToArray(); foreach (var enrichment in enrichments) { enrichment.Discovery.BatchSize = 5; enrichment.Discovery.AddProcessed(enrichments.SelectMany(p => p.Discovery.Processed).ToArray()); enrichment.Discovery.Process() .ToObservable() .ObserveOn(TaskPoolScheduler.Default) .Select( x => { var text = cleanup.Cleanup(x.Message.Text).Replace("\r\n", " "); if (!CanInclude(text, enrichment.Type)) { return(x); } text = Regex.Replace(text, @"[^\u0000-\u007F]+", string.Empty); csvDataTarget.WriteField(x.Message.Id); csvDataTarget.WriteField(x.Topic); csvDataTarget.WriteField(enrichment.Type); csvDataTarget.WriteField(text); csvDataTarget.NextRecord(); streamWriter.Flush(); monitor.Increment(); return(x); }) .Wait(); } }); } return(Task.CompletedTask); }
public void Construct() { Assert.AreEqual("Processed: 0/9 Operations per second: 0", instance.ToString()); for (int i = 0; i < 5; i++) { instance.ManualyCount(); instance.Increment(); } instance.Increment(); Assert.AreEqual("Processed: 6/9 Opera", instance.ToString().Substring(0, 20)); for (int i = 0; i < 10; i++) { instance.ManualyCount(); } for (int i = 0; i < 10; i++) { instance.Increment(); } Assert.AreEqual("Processed: 16/16 Ope", instance.ToString().Substring(0, 20)); }
public async Task Process(IConnectionContext target, SentimentMessage message, CancellationToken token) { if (message == null) { throw new ArgumentNullException(nameof(message)); } var request = message.Request; if (request?.Documents == null) { throw new Exception("Nothing to process"); } if (request.Documents.Length > 500) { throw new Exception("Too many documents. Maximum is 500"); } var completed = new CompletedMessage(); try { var monitor = new PerformanceMonitor(request.Documents.Length); using (Observable.Interval(TimeSpan.FromSeconds(10)) .Subscribe(item => logger.LogInformation(monitor.ToString()))) { ISentimentDataHolder lexicon = default; if (request.Dictionary != null && request.Dictionary.Count > 0) { logger.LogInformation("Creating custom dictionary with {0} words", request.Dictionary.Count); lexicon = SentimentDataHolder.Load(request.Dictionary.Select(item => new WordSentimentValueData( item.Key, new SentimentValueData(item.Value)))); } if ((lexicon == null || request.AdjustDomain) && !string.IsNullOrEmpty(request.Domain)) { logger.LogInformation("Using Domain dictionary [{0}]", request.Domain); var previous = lexicon; lexicon = lexiconLoader.GetLexicon(request.Domain); if (previous != null) { lexicon.Merge(previous); } } string modelLocation = null; if (!string.IsNullOrEmpty(request.Model)) { logger.LogInformation("Using model path: {0}", request.Model); modelLocation = storage.GetLocation(target.Connection.User, request.Model, ServiceConstants.Model); if (!Directory.Exists(modelLocation)) { throw new ApplicationException($"Can't find model {request.Model}"); } } using (var scope = provider.CreateScope()) { var container = scope.ServiceProvider.GetService <ISessionContainer>(); container.Context.NGram = 3; container.Context.ExtractAttributes = request.Emotions; var client = container.GetTesting(modelLocation); var converter = scope.ServiceProvider.GetService <IDocumentConverter>(); client.Init(); client.Pipeline.ResetMonitor(); if (lexicon != null) { client.Lexicon = lexicon; } await client.Process(request.Documents.Select(item => converter.Convert(item, request.CleanText)) .ToObservable()) .Select(item => { monitor.Increment(); return(item); }) .Buffer(TimeSpan.FromSeconds(5), 10, scheduler) .Select(async item => { var result = new ResultMessage <Document> { Data = item.Select(x => x.Processed).ToArray() }; await target.Write(result, token).ConfigureAwait(false); return(Unit.Default); }) .Merge(); } logger.LogInformation("Completed with final performance: {0}", monitor); completed.Message = "Testing Completed"; await target.Write(completed, token).ConfigureAwait(false); } } catch (Exception e) { completed.Message = e.Message; await target.Write(completed, token).ConfigureAwait(false); completed.IsError = true; throw; } }