Ejemplo n.º 1
0
        protected override Task Execute(CancellationToken token)
        {
            log.LogInformation("Downloading message...");
            var downloadMessages = File.ReadLines(config.Ids).Select(long.Parse).ToArray();

            log.LogInformation("Total messages to download: {0}", downloadMessages.Length);
            var cred      = auth.Authenticate();
            var extractor = new MessageCleanup();
            var monitor   = new PerformanceMonitor(downloadMessages.Length);

            using (var streamWriter = new StreamWriter(config.Out, false, new UTF8Encoding(false)))
                using (var csvDataTarget = new CsvWriter(streamWriter))
                {
                    csvDataTarget.WriteField("Id");
                    csvDataTarget.WriteField("Date");
                    csvDataTarget.WriteField("Author");
                    csvDataTarget.WriteField("Message");
                    csvDataTarget.NextRecord();

                    Auth.ExecuteOperationWithCredentials(
                        cred,
                        () =>
                    {
                        using (Observable.Interval(TimeSpan.FromSeconds(30)).Subscribe(item => log.LogInformation(monitor.ToString())))
                        {
                            downloader.Download(downloadMessages)
                            .ToObservable()
                            .Select(
                                item =>
                            {
                                try
                                {
                                    csvDataTarget.WriteField(item.Id);
                                    csvDataTarget.WriteField(item.CreatedAt);
                                    csvDataTarget.WriteField(item.CreatedBy.Id);
                                    var text = item.Text;
                                    if (config.Clean)
                                    {
                                        text = extractor.Cleanup(text);
                                    }

                                    csvDataTarget.WriteField(text);
                                    csvDataTarget.NextRecord();
                                    monitor.Increment();
                                }
                                catch (Exception e)
                                {
                                    log.LogError(e, "Error");
                                }

                                return(item);
                            })
                            .LastOrDefaultAsync()
                            .Wait();
                        }
                    });
                }

            return(Task.CompletedTask);
        }
Ejemplo n.º 2
0
        private async Task Process(string[] files)
        {
            var monitor = new PerformanceMonitor(files.Length);

            using (Observable.Interval(TimeSpan.FromSeconds(30)).Subscribe(item => log.LogInformation(monitor.ToString())))
            {
                var inputBlock = new BufferBlock <ProcessingChunk <string> >(new DataflowBlockOptions {
                    BoundedCapacity = 1000000
                });
                var deserializeBlock = new TransformBlock <ProcessingChunk <string>, ProcessingChunk <TweetDTO> >(
                    json => new ProcessingChunk <TweetDTO>(json.FileName, json.ChunkId, json.TotalChunks, jsonConvert.DeserializeObject <TweetDTO>(json.Data)),
                    new ExecutionDataflowBlockOptions
                {
                    BoundedCapacity        = 2,
                    MaxDegreeOfParallelism = Environment.ProcessorCount
                });
                var outputBlock = new ActionBlock <ProcessingChunk <TweetDTO> >(
                    Deserialized,
                    new ExecutionDataflowBlockOptions {
                    MaxDegreeOfParallelism = Environment.ProcessorCount
                });

                inputBlock.LinkTo(deserializeBlock, new DataflowLinkOptions {
                    PropagateCompletion = true
                });
                deserializeBlock.LinkTo(outputBlock, new DataflowLinkOptions {
                    PropagateCompletion = true
                });

                foreach (var file in files)
                {
                    try
                    {
                        var data = fileLoader.Load(file);
                        for (var i = 0; i < data.Length; i++)
                        {
                            await inputBlock.SendAsync(new ProcessingChunk <string>(file, i, data.Length, data[i])).ConfigureAwait(false);
                        }

                        monitor.Increment();
                    }
                    catch (Exception ex)
                    {
                        log.LogError(ex, "Failed");
                    }
                }

                inputBlock.Complete();
                await Task.WhenAll(inputBlock.Completion, outputBlock.Completion).ConfigureAwait(false);
            }
        }
Ejemplo n.º 3
0
        protected override Task Execute(CancellationToken token)
        {
            log.LogInformation("Starting twitter monitoring...");
            SetupWords();
            RateLimit.RateLimitTrackerMode = RateLimitTrackerMode.TrackAndAwait;
            var cleanup = new MessageCleanup();
            var monitor = new PerformanceMonitor(100000);
            var cred    = auth.Authenticate();

            using (Observable.Interval(TimeSpan.FromSeconds(30)).Subscribe(item => log.LogInformation(monitor.ToString())))
                using (var streamWriter = new StreamWriter(config.Out, true, new UTF8Encoding(false)))
                    using (var csvDataTarget = new CsvWriter(streamWriter))
                    {
                        Auth.ExecuteOperationWithCredentials(
                            cred,
                            () =>
                        {
                            var enrichments = Enrichment().ToArray();
                            foreach (var enrichment in enrichments)
                            {
                                enrichment.Discovery.BatchSize = 5;
                                enrichment.Discovery.AddProcessed(enrichments.SelectMany(p => p.Discovery.Processed).ToArray());
                                enrichment.Discovery.Process()
                                .ToObservable()
                                .ObserveOn(TaskPoolScheduler.Default)
                                .Select(
                                    x =>
                                {
                                    var text = cleanup.Cleanup(x.Message.Text).Replace("\r\n", " ");
                                    if (!CanInclude(text, enrichment.Type))
                                    {
                                        return(x);
                                    }

                                    text = Regex.Replace(text, @"[^\u0000-\u007F]+", string.Empty);
                                    csvDataTarget.WriteField(x.Message.Id);
                                    csvDataTarget.WriteField(x.Topic);
                                    csvDataTarget.WriteField(enrichment.Type);
                                    csvDataTarget.WriteField(text);
                                    csvDataTarget.NextRecord();
                                    streamWriter.Flush();
                                    monitor.Increment();
                                    return(x);
                                })
                                .Wait();
                            }
                        });
                    }

            return(Task.CompletedTask);
        }
Ejemplo n.º 4
0
        public void Construct()
        {
            Assert.AreEqual("Processed: 0/9 Operations per second: 0", instance.ToString());
            for (int i = 0; i < 5; i++)
            {
                instance.ManualyCount();
                instance.Increment();
            }

            instance.Increment();
            Assert.AreEqual("Processed: 6/9 Opera", instance.ToString().Substring(0, 20));
            for (int i = 0; i < 10; i++)
            {
                instance.ManualyCount();
            }

            for (int i = 0; i < 10; i++)
            {
                instance.Increment();
            }

            Assert.AreEqual("Processed: 16/16 Ope", instance.ToString().Substring(0, 20));
        }
        public async Task Process(IConnectionContext target, SentimentMessage message, CancellationToken token)
        {
            if (message == null)
            {
                throw new ArgumentNullException(nameof(message));
            }

            var request = message.Request;

            if (request?.Documents == null)
            {
                throw new Exception("Nothing to process");
            }

            if (request.Documents.Length > 500)
            {
                throw new Exception("Too many documents. Maximum is 500");
            }

            var completed = new CompletedMessage();

            try
            {
                var monitor = new PerformanceMonitor(request.Documents.Length);

                using (Observable.Interval(TimeSpan.FromSeconds(10))
                       .Subscribe(item => logger.LogInformation(monitor.ToString())))
                {
                    ISentimentDataHolder lexicon = default;

                    if (request.Dictionary != null &&
                        request.Dictionary.Count > 0)
                    {
                        logger.LogInformation("Creating custom dictionary with {0} words", request.Dictionary.Count);

                        lexicon = SentimentDataHolder.Load(request.Dictionary.Select(item =>
                                                                                     new WordSentimentValueData(
                                                                                         item.Key,
                                                                                         new SentimentValueData(item.Value))));
                    }

                    if ((lexicon == null || request.AdjustDomain) &&
                        !string.IsNullOrEmpty(request.Domain))
                    {
                        logger.LogInformation("Using Domain dictionary [{0}]", request.Domain);
                        var previous = lexicon;
                        lexicon = lexiconLoader.GetLexicon(request.Domain);
                        if (previous != null)
                        {
                            lexicon.Merge(previous);
                        }
                    }

                    string modelLocation = null;

                    if (!string.IsNullOrEmpty(request.Model))
                    {
                        logger.LogInformation("Using model path: {0}", request.Model);
                        modelLocation = storage.GetLocation(target.Connection.User, request.Model, ServiceConstants.Model);

                        if (!Directory.Exists(modelLocation))
                        {
                            throw new ApplicationException($"Can't find model {request.Model}");
                        }
                    }

                    using (var scope = provider.CreateScope())
                    {
                        var container = scope.ServiceProvider.GetService <ISessionContainer>();
                        container.Context.NGram             = 3;
                        container.Context.ExtractAttributes = request.Emotions;

                        var client    = container.GetTesting(modelLocation);
                        var converter = scope.ServiceProvider.GetService <IDocumentConverter>();
                        client.Init();
                        client.Pipeline.ResetMonitor();

                        if (lexicon != null)
                        {
                            client.Lexicon = lexicon;
                        }

                        await client.Process(request.Documents.Select(item => converter.Convert(item, request.CleanText))
                                             .ToObservable())
                        .Select(item =>
                        {
                            monitor.Increment();
                            return(item);
                        })
                        .Buffer(TimeSpan.FromSeconds(5), 10, scheduler)
                        .Select(async item =>
                        {
                            var result = new ResultMessage <Document> {
                                Data = item.Select(x => x.Processed).ToArray()
                            };
                            await target.Write(result, token).ConfigureAwait(false);
                            return(Unit.Default);
                        })
                        .Merge();
                    }

                    logger.LogInformation("Completed with final performance: {0}", monitor);
                    completed.Message = "Testing Completed";
                    await target.Write(completed, token).ConfigureAwait(false);
                }
            }
            catch (Exception e)
            {
                completed.Message = e.Message;
                await target.Write(completed, token).ConfigureAwait(false);

                completed.IsError = true;
                throw;
            }
        }