コード例 #1
0
        protected override Task Execute(CancellationToken token)
        {
            log.LogInformation("Downloading message...");
            var downloadMessages = File.ReadLines(config.Ids).Select(long.Parse).ToArray();

            log.LogInformation("Total messages to download: {0}", downloadMessages.Length);
            var cred      = auth.Authenticate();
            var extractor = new MessageCleanup();
            var monitor   = new PerformanceMonitor(downloadMessages.Length);

            using (var streamWriter = new StreamWriter(config.Out, false, new UTF8Encoding(false)))
                using (var csvDataTarget = new CsvWriter(streamWriter))
                {
                    csvDataTarget.WriteField("Id");
                    csvDataTarget.WriteField("Date");
                    csvDataTarget.WriteField("Author");
                    csvDataTarget.WriteField("Message");
                    csvDataTarget.NextRecord();

                    Auth.ExecuteOperationWithCredentials(
                        cred,
                        () =>
                    {
                        using (Observable.Interval(TimeSpan.FromSeconds(30)).Subscribe(item => log.LogInformation(monitor.ToString())))
                        {
                            downloader.Download(downloadMessages)
                            .ToObservable()
                            .Select(
                                item =>
                            {
                                try
                                {
                                    csvDataTarget.WriteField(item.Id);
                                    csvDataTarget.WriteField(item.CreatedAt);
                                    csvDataTarget.WriteField(item.CreatedBy.Id);
                                    var text = item.Text;
                                    if (config.Clean)
                                    {
                                        text = extractor.Cleanup(text);
                                    }

                                    csvDataTarget.WriteField(text);
                                    csvDataTarget.NextRecord();
                                    monitor.Increment();
                                }
                                catch (Exception e)
                                {
                                    log.LogError(e, "Error");
                                }

                                return(item);
                            })
                            .LastOrDefaultAsync()
                            .Wait();
                        }
                    });
                }

            return(Task.CompletedTask);
        }
コード例 #2
0
        public IServiceCollection ConfigureServices(IServiceCollection services)
        {
            services.AddSingleton <IWordsDictionary, BasicEnglishDictionary>();
            services.AddSingleton <INRCDictionary>(ctx =>
            {
                var dictionary = new NRCDictionary();
                dictionary.Load();
                return(dictionary);
            });

            services.AddSingleton <ISentenceTokenizerFactory, SentenceTokenizerFactory>();
            services.AddSingleton <IPOSTagger, NaivePOSTagger>();
            services.AddSingleton <BNCList>();
            services.AddSingleton <IPosTagResolver>(ctx => ctx.GetService <BNCList>());
            services.AddSingleton <IWordFrequencyList>(ctx => ctx.GetService <BNCList>());

            services.AddSingleton(ctx => WordTypeResolver.Instance);

            services.AddSingleton <IMessageCleanup>(ctx =>
            {
                var item           = new MessageCleanup();
                item.CleanCashTags = false;
                item.LowerCase     = false;
                return(item);
            });

            services.AddSingleton <IRawTextExtractor, RawWordExtractor>();
            services.AddSingleton <IMemoryCache>(ctx => new MemoryCache(new MemoryCacheOptions()));
            return(services);
        }
コード例 #3
0
ファイル: EnrichCommand.cs プロジェクト: hnjm/TwitterMonitor
        protected override Task Execute(CancellationToken token)
        {
            log.LogInformation("Starting twitter monitoring...");
            SetupWords();
            RateLimit.RateLimitTrackerMode = RateLimitTrackerMode.TrackAndAwait;
            var cleanup = new MessageCleanup();
            var monitor = new PerformanceMonitor(100000);
            var cred    = auth.Authenticate();

            using (Observable.Interval(TimeSpan.FromSeconds(30)).Subscribe(item => log.LogInformation(monitor.ToString())))
                using (var streamWriter = new StreamWriter(config.Out, true, new UTF8Encoding(false)))
                    using (var csvDataTarget = new CsvWriter(streamWriter))
                    {
                        Auth.ExecuteOperationWithCredentials(
                            cred,
                            () =>
                        {
                            var enrichments = Enrichment().ToArray();
                            foreach (var enrichment in enrichments)
                            {
                                enrichment.Discovery.BatchSize = 5;
                                enrichment.Discovery.AddProcessed(enrichments.SelectMany(p => p.Discovery.Processed).ToArray());
                                enrichment.Discovery.Process()
                                .ToObservable()
                                .ObserveOn(TaskPoolScheduler.Default)
                                .Select(
                                    x =>
                                {
                                    var text = cleanup.Cleanup(x.Message.Text).Replace("\r\n", " ");
                                    if (!CanInclude(text, enrichment.Type))
                                    {
                                        return(x);
                                    }

                                    text = Regex.Replace(text, @"[^\u0000-\u007F]+", string.Empty);
                                    csvDataTarget.WriteField(x.Message.Id);
                                    csvDataTarget.WriteField(x.Topic);
                                    csvDataTarget.WriteField(enrichment.Type);
                                    csvDataTarget.WriteField(text);
                                    csvDataTarget.NextRecord();
                                    streamWriter.Flush();
                                    monitor.Increment();
                                    return(x);
                                })
                                .Wait();
                            }
                        });
                    }

            return(Task.CompletedTask);
        }
コード例 #4
0
 public void Setup()
 {
     instance = CreateMessageCleanup();
 }